Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cmd/mapt/cmd/aws/services/snc.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
disableClusterReadinessDesc = "If this flag is set it will skip the checks for the cluster readiness. In this case the kubeconfig can not be generated"

sncProfile = "profile"
sncProfileDesc = "comma separated list of profiles to apply on the SNC cluster. Profiles available: virtualization"
sncProfileDesc = "comma separated list of profiles to apply on the SNC cluster. Profiles available: virtualization, serverless-serving, serverless-eventing, serverless, servicemesh, ai. The ai profile automatically includes servicemesh and serverless-serving as prerequisites and raises the minimum instance size to 16 vCPUs"
)

func GetOpenshiftSNCCmd() *cobra.Command {
Expand Down Expand Up @@ -62,6 +62,9 @@ func createSNC() *cobra.Command {
if sncApi.ProfilesRequireNestedVirt(profiles) {
computeReq.NestedVirt = true
}
if minCPUs := sncApi.ProfilesMinCPUs(profiles); minCPUs > computeReq.CPUs {
computeReq.CPUs = minCPUs
}
if _, err := openshiftsnc.Create(
&maptContext.ContextArgs{
Context: cmd.Context(),
Expand Down
11 changes: 8 additions & 3 deletions docs/aws/openshift-snc.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ After the AMI is published and accessible by the account, we can use the followi
--pull-secret-file /home/tester/Downloads/pull-secret
```

After the above command succeeds the `kubeconfig` to access the deployed cluster will be available in `/tmp/snc/kubeconfig`
When `--conn-details-output` is set, the `kubeconfig` is written to disk as soon as the cluster is ready — before any profile deployment begins. This means the kubeconfig is available at `<conn-details-output>/kubeconfig` even if a profile installation fails or times out.

## Profiles

Expand All @@ -62,19 +62,24 @@ mapt aws openshift-snc create \
--profile virtualization
```

Multiple profiles can be specified as a comma-separated list (e.g., `--profile virtualization,serverless`).
Multiple profiles can be specified as a comma-separated list (e.g., `--profile virtualization,ai`).

### Available profiles

| Profile | Description |
|---------|-------------|
| `virtualization` | Installs [OpenShift Virtualization](https://docs.openshift.com/container-platform/latest/virt/about_virt/about-virt.html) (CNV) on the cluster, enabling virtual machines to run on the single-node cluster. When this profile is selected, nested virtualization is automatically enabled on the cloud instance. Because standard Nitro-based instances do not expose `/dev/kvm`, a bare metal instance is required.|
| `serverless-serving` | Installs [OpenShift Serverless](https://docs.openshift.com/serverless/latest/about/about-serverless.html) and creates a KnativeServing instance, enabling serverless workloads (Knative Serving) on the cluster.|
| `serverless-eventing` | Installs [OpenShift Serverless](https://docs.openshift.com/serverless/latest/about/about-serverless.html) and creates a KnativeEventing instance, enabling event-driven workloads (Knative Eventing) on the cluster.|
| `serverless` | Installs [OpenShift Serverless](https://docs.openshift.com/serverless/latest/about/about-serverless.html) and creates both KnativeServing and KnativeEventing instances.|
| `servicemesh` | Installs [OpenShift Service Mesh 3](https://docs.openshift.com/service-mesh/latest/about/about-ossm.html) (Sail/Istio) on the cluster, deploying IstioCNI and an Istio control plane.|
| `ai` | Installs [Red Hat OpenShift AI](https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed) (RHOAI) on the cluster. Automatically installs Service Mesh v2 (Maistra) and Serverless Serving as prerequisites for Kserve. All three operators install in parallel; the DataScienceCluster CR is only created once Service Mesh and Serverless are fully ready. The minimum instance size is raised to 16 vCPUs (from the default 8) to accommodate the additional operators. **Cannot be combined with the `servicemesh` profile** (which deploys Service Mesh v3/Sail).|


### Adding new profiles

To add a new profile:

1. Create `profile_<name>.go` under `pkg/target/service/snc/` — Go file with a `deploy<Name>()` function that uses the Pulumi Kubernetes provider to create the required resources (Namespace, OperatorGroup, Subscription, CRs, etc.)
2. Register the profile name in `profiles.go` by adding it to `validProfiles` and the `DeployProfile()` switch
2. Register the profile name in `profiles.go` by adding it to `validProfiles` and the `DeployProfiles()` function

20 changes: 12 additions & 8 deletions pkg/provider/aws/action/snc/snc.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,20 +264,24 @@ func (r *openshiftSNCRequest) deploy(ctx *pulumi.Context) error {
}
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, apiSNC.OutputKubeconfig),
pulumi.ToSecret(kubeconfig))
// Write kubeconfig to disk early so it is available even if profile deployment fails
if outputPath := r.mCtx.GetResultsOutputPath(); len(outputPath) > 0 {
kubeconfig.ApplyT(func(kc string) error {
return os.WriteFile(fmt.Sprintf("%s/kubeconfig", outputPath), []byte(kc), 0600)
})
}
// Deploy profiles using Kubernetes provider
if len(r.profiles) > 0 {
k8sProvider, err := apiSNC.NewK8sProvider(ctx, "k8s-provider", kubeconfig)
if err != nil {
return err
}
for _, profileName := range r.profiles {
if _, err := apiSNC.DeployProfile(ctx, profileName, &apiSNC.ProfileDeployArgs{
K8sProvider: k8sProvider,
Kubeconfig: kubeconfig,
Prefix: *r.prefix,
}); err != nil {
return err
}
if err := apiSNC.DeployProfiles(ctx, r.profiles, &apiSNC.ProfileDeployArgs{
K8sProvider: k8sProvider,
Kubeconfig: kubeconfig,
Prefix: *r.prefix,
}); err != nil {
return err
}
}
return nil
Expand Down
12 changes: 10 additions & 2 deletions pkg/target/service/snc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,22 @@ func waitForCRCondition(ctx context.Context, kubeconfig string, gvr schema.Group
}

// findResource returns a single resource by exact name or by name prefix.
// When namespace is empty the resource is looked up at cluster scope.
func findResource(ctx context.Context, dc dynamic.Interface, gvr schema.GroupVersionResource,
namespace, name string, prefixMatch bool) (*unstructured.Unstructured, error) {

var ri dynamic.ResourceInterface
if namespace != "" {
ri = dc.Resource(gvr).Namespace(namespace)
} else {
ri = dc.Resource(gvr)
}

if !prefixMatch {
return dc.Resource(gvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{})
return ri.Get(ctx, name, metav1.GetOptions{})
}

list, err := dc.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{})
list, err := ri.List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
Expand Down
168 changes: 168 additions & 0 deletions pkg/target/service/snc/profile_openshift_ai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package snc

import (
"fmt"
"time"

"github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions"
corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1"
metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
"k8s.io/apimachinery/pkg/runtime/schema"
)

const (
rhoaiNamespace = "redhat-ods-operator"
)

var (
dscGVR = schema.GroupVersionResource{
Group: "datasciencecluster.opendatahub.io",
Version: "v1",
Resource: "datascienceclusters",
}
)

// deployOpenShiftAI installs the RHOAI operator and creates a DataScienceCluster.
// The entire RHOAI installation is gated on prereqs (ServiceMesh v2, Authorino,
// and Serverless readiness outputs) so that when the operator starts and auto-creates
// the DSCI, it finds all dependencies already in place.
func deployOpenShiftAI(ctx *pulumi.Context, args *ProfileDeployArgs, prereqs []pulumi.StringOutput) (pulumi.Resource, error) {
goCtx := ctx.Context()
rn := func(suffix string) string {
return fmt.Sprintf("%s-rhoai-%s", args.Prefix, suffix)
}

// Gate the entire RHOAI installation on prerequisites.
// The namespace name won't resolve until all prereqs are ready,
// which delays the operator install until SM v2 + Authorino +
// Serverless are fully operational.
nsName := pulumi.String(rhoaiNamespace).ToStringOutput()
for _, p := range prereqs {
prev := nsName
nsName = pulumi.All(prev, p).ApplyT(
func(args []interface{}) string {
return args[0].(string)
}).(pulumi.StringOutput)
}

// Create Namespace (blocked until all prereqs resolve)
ns, err := corev1.NewNamespace(ctx, rn("ns"),
&corev1.NamespaceArgs{
Metadata: &metav1.ObjectMetaArgs{
Name: nsName,
},
},
pulumi.Provider(args.K8sProvider),
pulumi.DependsOn(args.Deps))
if err != nil {
return nil, err
}

// Create OperatorGroup (AllNamespaces mode — no targetNamespaces)
og, err := apiextensions.NewCustomResource(ctx, rn("og"),
&apiextensions.CustomResourceArgs{
ApiVersion: pulumi.String("operators.coreos.com/v1"),
Kind: pulumi.String("OperatorGroup"),
Metadata: &metav1.ObjectMetaArgs{
Name: pulumi.String("redhat-ods-operator-group"),
Namespace: pulumi.String(rhoaiNamespace),
},
},
pulumi.Provider(args.K8sProvider),
pulumi.DependsOn([]pulumi.Resource{ns}))
if err != nil {
return nil, err
}

// Create Subscription
sub, err := apiextensions.NewCustomResource(ctx, rn("sub"),
&apiextensions.CustomResourceArgs{
ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"),
Kind: pulumi.String("Subscription"),
Metadata: &metav1.ObjectMetaArgs{
Name: pulumi.String("rhods-operator"),
Namespace: pulumi.String(rhoaiNamespace),
},
OtherFields: map[string]interface{}{
"spec": map[string]interface{}{
"source": "redhat-operators",
"sourceNamespace": "openshift-marketplace",
"name": "rhods-operator",
"channel": "stable",
"installPlanApproval": "Automatic",
},
},
},
pulumi.Provider(args.K8sProvider),
pulumi.DependsOn([]pulumi.Resource{og}))
if err != nil {
return nil, err
}

// Wait for CSV to succeed (operator fully installed).
csvReady := pulumi.All(sub.ID(), args.Kubeconfig).ApplyT(
func(allArgs []interface{}) (string, error) {
kc := allArgs[1].(string)
if err := waitForCRCondition(goCtx, kc, csvGVR,
rhoaiNamespace, "rhods-operator",
"", "Succeeded", 20*time.Minute, true); err != nil {
return "", fmt.Errorf("waiting for RHOAI CSV: %w", err)
}
return "ready", nil
}).(pulumi.StringOutput)

// Create DataScienceCluster CR after RHOAI CSV is ready.
dscName := csvReady.ApplyT(func(_ string) string {
return "default-dsc"
}).(pulumi.StringOutput)

dsc, err := apiextensions.NewCustomResource(ctx, rn("dsc"),
&apiextensions.CustomResourceArgs{
ApiVersion: pulumi.String("datasciencecluster.opendatahub.io/v1"),
Kind: pulumi.String("DataScienceCluster"),
Metadata: &metav1.ObjectMetaArgs{
Name: dscName,
},
OtherFields: map[string]interface{}{
"spec": map[string]interface{}{
"components": map[string]interface{}{
"dashboard": map[string]interface{}{"managementState": "Managed"},
"workbenches": map[string]interface{}{"managementState": "Managed"},
"datasciencepipelines": map[string]interface{}{"managementState": "Managed"},
// Kserve depends on ServiceMesh and Serverless which are
// deployed as implicit dependencies of the AI profile.
"kserve": map[string]interface{}{"managementState": "Managed"},
"modelmeshserving": map[string]interface{}{"managementState": "Managed"},
"ray": map[string]interface{}{"managementState": "Managed"},
// Kueue webhook fails on SNC due to missing endpoints
"kueue": map[string]interface{}{"managementState": "Removed"},
"trustyai": map[string]interface{}{"managementState": "Managed"},
"codeflare": map[string]interface{}{"managementState": "Managed"},
"trainingoperator": map[string]interface{}{"managementState": "Removed"},
"modelregistry": map[string]interface{}{"managementState": "Removed"},
},
},
},
},
pulumi.Provider(args.K8sProvider))
if err != nil {
return nil, err
}

// Wait for DataScienceCluster to be ready.
dscReady := pulumi.All(dsc.ID(), args.Kubeconfig).ApplyT(
func(allArgs []interface{}) (string, error) {
kc := allArgs[1].(string)
if err := waitForCRCondition(goCtx, kc, dscGVR,
"", "default-dsc",
"Ready", "True", 40*time.Minute, false); err != nil {
return "", fmt.Errorf("waiting for DataScienceCluster: %w", err)
}
return "ready", nil
}).(pulumi.StringOutput)

ctx.Export("dscReady", dscReady)

return dsc, nil
}
Loading