From e206311ac334237a09a53058e88e264c23c872a8 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 16:53:38 -0800 Subject: [PATCH 1/8] fix: un namespace policies --- chart/templates/deploymentpolicy-crd.yaml | 2 +- docs/deployment_policy.md | 4 +-- .../helm-chart-test/deployment-policy.yaml | 1 - .../api/v1alpha1/deployment_policy_types.go | 2 +- .../api/v1alpha1/zz_generated.deepcopy.go | 4 +-- ...skyhook.nvidia.com_deploymentpolicies.yaml | 2 +- ...ymentpolicy_v1alpha1_deploymentpolicy.yaml | 1 - operator/internal/dal/dal.go | 7 +++-- operator/internal/dal/mock/DAL.go | 30 ++++++++----------- 9 files changed, 22 insertions(+), 31 deletions(-) diff --git a/chart/templates/deploymentpolicy-crd.yaml b/chart/templates/deploymentpolicy-crd.yaml index d8e77bb7..e8d762af 100644 --- a/chart/templates/deploymentpolicy-crd.yaml +++ b/chart/templates/deploymentpolicy-crd.yaml @@ -23,7 +23,7 @@ spec: listKind: DeploymentPolicyList plural: deploymentpolicies singular: deploymentpolicy - scope: Namespaced + scope: Cluster versions: - name: v1alpha1 schema: diff --git a/docs/deployment_policy.md b/docs/deployment_policy.md index be5dff22..054499d1 100644 --- a/docs/deployment_policy.md +++ b/docs/deployment_policy.md @@ -22,7 +22,6 @@ apiVersion: skyhook.nvidia.com/v1alpha1 kind: DeploymentPolicy metadata: name: my-policy - namespace: skyhook spec: # Default applies to nodes that don't match any compartment default: @@ -260,7 +259,7 @@ spec: ``` **Behavior**: -- DeploymentPolicy must be in the **same namespace** +- DeploymentPolicy is **cluster-scoped** (not namespaced) - Each node is assigned to a compartment based on selectors - Nodes not matching any compartment use the `default` settings @@ -284,7 +283,6 @@ apiVersion: skyhook.nvidia.com/v1alpha1 kind: DeploymentPolicy metadata: name: legacy-equivalent - namespace: skyhook spec: default: budget: diff --git a/k8s-tests/chainsaw/helm/helm-chart-test/deployment-policy.yaml b/k8s-tests/chainsaw/helm/helm-chart-test/deployment-policy.yaml index 07765806..906d8cc6 100644 --- a/k8s-tests/chainsaw/helm/helm-chart-test/deployment-policy.yaml +++ b/k8s-tests/chainsaw/helm/helm-chart-test/deployment-policy.yaml @@ -21,7 +21,6 @@ apiVersion: skyhook.nvidia.com/v1alpha1 kind: DeploymentPolicy metadata: name: helm-test-policy - namespace: skyhook spec: default: budget: diff --git a/operator/api/v1alpha1/deployment_policy_types.go b/operator/api/v1alpha1/deployment_policy_types.go index 17e59013..740cb8f0 100644 --- a/operator/api/v1alpha1/deployment_policy_types.go +++ b/operator/api/v1alpha1/deployment_policy_types.go @@ -150,7 +150,7 @@ type DeploymentPolicySpec struct { } // +kubebuilder:object:root=true -// +kubebuilder:resource:scope=Namespaced +// +kubebuilder:resource:scope=Cluster // DeploymentPolicy configures safe rollout defaults and compartment overrides type DeploymentPolicy struct { diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index 1dc1be66..a19c6fbe 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -1,3 +1,5 @@ +//go:build !ignore_autogenerated + /* * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 @@ -16,8 +18,6 @@ * limitations under the License. */ -//go:build !ignore_autogenerated - // Code generated by controller-gen. DO NOT EDIT. package v1alpha1 diff --git a/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml b/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml index 89281cae..c8a7688e 100644 --- a/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml +++ b/operator/config/crd/bases/skyhook.nvidia.com_deploymentpolicies.yaml @@ -28,7 +28,7 @@ spec: listKind: DeploymentPolicyList plural: deploymentpolicies singular: deploymentpolicy - scope: Namespaced + scope: Cluster versions: - name: v1alpha1 schema: diff --git a/operator/config/samples/deploymentpolicy_v1alpha1_deploymentpolicy.yaml b/operator/config/samples/deploymentpolicy_v1alpha1_deploymentpolicy.yaml index 701185d5..d472be34 100644 --- a/operator/config/samples/deploymentpolicy_v1alpha1_deploymentpolicy.yaml +++ b/operator/config/samples/deploymentpolicy_v1alpha1_deploymentpolicy.yaml @@ -24,7 +24,6 @@ apiVersion: skyhook.nvidia.com/v1alpha1 kind: DeploymentPolicy metadata: name: sample-policy - namespace: skyhook spec: # Default applies to nodes that don't match any compartment selector default: diff --git a/operator/internal/dal/dal.go b/operator/internal/dal/dal.go index a44bafe9..90bebb08 100644 --- a/operator/internal/dal/dal.go +++ b/operator/internal/dal/dal.go @@ -44,7 +44,7 @@ type DAL interface { GetPod(ctx context.Context, namespace, name string) (*corev1.Pod, error) GetPods(ctx context.Context, opts ...client.ListOption) (*corev1.PodList, error) GetDeploymentPolicies(ctx context.Context, opts ...client.ListOption) (*skyhookv1alpha1.DeploymentPolicyList, error) - GetDeploymentPolicy(ctx context.Context, namespace, name string) (*skyhookv1alpha1.DeploymentPolicy, error) + GetDeploymentPolicy(ctx context.Context, name string) (*skyhookv1alpha1.DeploymentPolicy, error) } type dal struct { @@ -153,10 +153,11 @@ func (e *dal) GetDeploymentPolicies(ctx context.Context, opts ...client.ListOpti return &policies, nil } -func (e *dal) GetDeploymentPolicy(ctx context.Context, namespace, name string) (*skyhookv1alpha1.DeploymentPolicy, error) { +func (e *dal) GetDeploymentPolicy(ctx context.Context, name string) (*skyhookv1alpha1.DeploymentPolicy, error) { var policy skyhookv1alpha1.DeploymentPolicy - if err := e.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &policy); err != nil { + // DeploymentPolicy is cluster-scoped, so no namespace is needed + if err := e.client.Get(ctx, types.NamespacedName{Name: name}, &policy); err != nil { if apierrors.IsNotFound(err) { return nil, nil } diff --git a/operator/internal/dal/mock/DAL.go b/operator/internal/dal/mock/DAL.go index 96815bfe..6c02c635 100644 --- a/operator/internal/dal/mock/DAL.go +++ b/operator/internal/dal/mock/DAL.go @@ -140,8 +140,8 @@ func (_c *MockDAL_GetDeploymentPolicies_Call) RunAndReturn(run func(ctx context. } // GetDeploymentPolicy provides a mock function for the type MockDAL -func (_mock *MockDAL) GetDeploymentPolicy(ctx context.Context, namespace string, name string) (*v1alpha1.DeploymentPolicy, error) { - ret := _mock.Called(ctx, namespace, name) +func (_mock *MockDAL) GetDeploymentPolicy(ctx context.Context, name string) (*v1alpha1.DeploymentPolicy, error) { + ret := _mock.Called(ctx, name) if len(ret) == 0 { panic("no return value specified for GetDeploymentPolicy") @@ -149,18 +149,18 @@ func (_mock *MockDAL) GetDeploymentPolicy(ctx context.Context, namespace string, var r0 *v1alpha1.DeploymentPolicy var r1 error - if returnFunc, ok := ret.Get(0).(func(context.Context, string, string) (*v1alpha1.DeploymentPolicy, error)); ok { - return returnFunc(ctx, namespace, name) + if returnFunc, ok := ret.Get(0).(func(context.Context, string) (*v1alpha1.DeploymentPolicy, error)); ok { + return returnFunc(ctx, name) } - if returnFunc, ok := ret.Get(0).(func(context.Context, string, string) *v1alpha1.DeploymentPolicy); ok { - r0 = returnFunc(ctx, namespace, name) + if returnFunc, ok := ret.Get(0).(func(context.Context, string) *v1alpha1.DeploymentPolicy); ok { + r0 = returnFunc(ctx, name) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(*v1alpha1.DeploymentPolicy) } } - if returnFunc, ok := ret.Get(1).(func(context.Context, string, string) error); ok { - r1 = returnFunc(ctx, namespace, name) + if returnFunc, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = returnFunc(ctx, name) } else { r1 = ret.Error(1) } @@ -174,13 +174,12 @@ type MockDAL_GetDeploymentPolicy_Call struct { // GetDeploymentPolicy is a helper method to define mock.On call // - ctx context.Context -// - namespace string // - name string -func (_e *MockDAL_Expecter) GetDeploymentPolicy(ctx interface{}, namespace interface{}, name interface{}) *MockDAL_GetDeploymentPolicy_Call { - return &MockDAL_GetDeploymentPolicy_Call{Call: _e.mock.On("GetDeploymentPolicy", ctx, namespace, name)} +func (_e *MockDAL_Expecter) GetDeploymentPolicy(ctx interface{}, name interface{}) *MockDAL_GetDeploymentPolicy_Call { + return &MockDAL_GetDeploymentPolicy_Call{Call: _e.mock.On("GetDeploymentPolicy", ctx, name)} } -func (_c *MockDAL_GetDeploymentPolicy_Call) Run(run func(ctx context.Context, namespace string, name string)) *MockDAL_GetDeploymentPolicy_Call { +func (_c *MockDAL_GetDeploymentPolicy_Call) Run(run func(ctx context.Context, name string)) *MockDAL_GetDeploymentPolicy_Call { _c.Call.Run(func(args mock.Arguments) { var arg0 context.Context if args[0] != nil { @@ -190,14 +189,9 @@ func (_c *MockDAL_GetDeploymentPolicy_Call) Run(run func(ctx context.Context, na if args[1] != nil { arg1 = args[1].(string) } - var arg2 string - if args[2] != nil { - arg2 = args[2].(string) - } run( arg0, arg1, - arg2, ) }) return _c @@ -208,7 +202,7 @@ func (_c *MockDAL_GetDeploymentPolicy_Call) Return(deploymentPolicy *v1alpha1.De return _c } -func (_c *MockDAL_GetDeploymentPolicy_Call) RunAndReturn(run func(ctx context.Context, namespace string, name string) (*v1alpha1.DeploymentPolicy, error)) *MockDAL_GetDeploymentPolicy_Call { +func (_c *MockDAL_GetDeploymentPolicy_Call) RunAndReturn(run func(ctx context.Context, name string) (*v1alpha1.DeploymentPolicy, error)) *MockDAL_GetDeploymentPolicy_Call { _c.Call.Return(run) return _c } From e0caf28f512a7d60541167760a7803f10eaf0a33 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 17:23:43 -0800 Subject: [PATCH 2/8] feat: add webhook support for validation policies exist --- .../api/v1alpha1/deployment_policy_webhook.go | 29 ++- .../deployment_policy_webhook_test.go | 130 ++++++++++++- operator/api/v1alpha1/skyhook_webhook.go | 46 ++++- operator/api/v1alpha1/skyhook_webhook_test.go | 154 ++++++++++++++- .../api/v1alpha1/zz_generated.deepcopy.go | 30 --- operator/config/webhook/manifests.yaml | 1 + .../internal/controller/cluster_state_v2.go | 25 ++- .../controller/cluster_state_v2_test.go | 182 ++++++++++++++++++ .../internal/controller/mock/SkyhookNodes.go | 25 ++- .../internal/controller/skyhook_controller.go | 11 +- .../internal/controller/webhook_controller.go | 49 ++++- .../controller/webhook_controller_test.go | 62 ++++++ 12 files changed, 686 insertions(+), 58 deletions(-) diff --git a/operator/api/v1alpha1/deployment_policy_webhook.go b/operator/api/v1alpha1/deployment_policy_webhook.go index 65ff04e1..cb893452 100644 --- a/operator/api/v1alpha1/deployment_policy_webhook.go +++ b/operator/api/v1alpha1/deployment_policy_webhook.go @@ -26,6 +26,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) @@ -36,7 +37,9 @@ var ( // SetupWebhookWithManager will setup the manager to manage the webhooks func (r *DeploymentPolicy) SetupWebhookWithManager(mgr ctrl.Manager) error { - deploymentPolicyWebhook := &DeploymentPolicyWebhook{} + deploymentPolicyWebhook := &DeploymentPolicyWebhook{ + Client: mgr.GetClient(), + } return ctrl.NewWebhookManagedBy(mgr). For(r). WithDefaulter(deploymentPolicyWebhook). @@ -46,7 +49,11 @@ func (r *DeploymentPolicy) SetupWebhookWithManager(mgr ctrl.Manager) error { //+kubebuilder:webhook:path=/mutate-skyhook-nvidia-com-v1alpha1-deploymentpolicy,mutating=true,failurePolicy=fail,sideEffects=None,groups=skyhook.nvidia.com,resources=deploymentpolicies,verbs=create;update,versions=v1alpha1,name=mdeploymentpolicy.kb.io,admissionReviewVersions=v1 +// DeploymentPolicyWebhook validates DeploymentPolicy resources at admission time. +// Includes a client to check if any Skyhooks reference this policy before allowing deletion. +// +kubebuilder:object:generate=false type DeploymentPolicyWebhook struct { + Client client.Client } var _ admission.CustomDefaulter = &DeploymentPolicyWebhook{} @@ -79,7 +86,7 @@ func (r *DeploymentPolicyWebhook) Default(ctx context.Context, obj runtime.Objec return nil } -//+kubebuilder:webhook:path=/validate-skyhook-nvidia-com-v1alpha1-deploymentpolicy,mutating=false,failurePolicy=fail,sideEffects=None,groups=skyhook.nvidia.com,resources=deploymentpolicies,verbs=create;update,versions=v1alpha1,name=vdeploymentpolicy.kb.io,admissionReviewVersions=v1 +//+kubebuilder:webhook:path=/validate-skyhook-nvidia-com-v1alpha1-deploymentpolicy,mutating=false,failurePolicy=fail,sideEffects=None,groups=skyhook.nvidia.com,resources=deploymentpolicies,verbs=create;update;delete,versions=v1alpha1,name=vdeploymentpolicy.kb.io,admissionReviewVersions=v1 var _ admission.CustomValidator = &DeploymentPolicyWebhook{} @@ -119,6 +126,24 @@ func (r *DeploymentPolicyWebhook) ValidateDelete(ctx context.Context, obj runtim deploymentPolicylog.Info("validate delete", "name", deploymentPolicy.Name) + // Check if any Skyhooks are still referencing this policy + skyhooks := &SkyhookList{} + if err := r.Client.List(ctx, skyhooks); err != nil { + return nil, fmt.Errorf("failed to list skyhooks to check for references: %w", err) + } + + referencingSkyhooks := []string{} + for _, skyhook := range skyhooks.Items { + if skyhook.Spec.DeploymentPolicy == deploymentPolicy.Name { + referencingSkyhooks = append(referencingSkyhooks, skyhook.Name) + } + } + + if len(referencingSkyhooks) > 0 { + return nil, fmt.Errorf("cannot delete DeploymentPolicy %q: still referenced by %d Skyhook(s): %v", + deploymentPolicy.Name, len(referencingSkyhooks), referencingSkyhooks) + } + return nil, nil } diff --git a/operator/api/v1alpha1/deployment_policy_webhook_test.go b/operator/api/v1alpha1/deployment_policy_webhook_test.go index 4b621f77..c1ea7b53 100644 --- a/operator/api/v1alpha1/deployment_policy_webhook_test.go +++ b/operator/api/v1alpha1/deployment_policy_webhook_test.go @@ -26,7 +26,13 @@ import ( ) var _ = Describe("DeploymentPolicy", func() { - deploymentPolicyWebhook := &DeploymentPolicyWebhook{} + var deploymentPolicyWebhook *DeploymentPolicyWebhook + + BeforeEach(func() { + deploymentPolicyWebhook = &DeploymentPolicyWebhook{ + Client: k8sClient, + } + }) Context("When creating DeploymentPolicy under Defaulting Webhook", func() { It("Should fill in the default value if a required field is empty", func() { @@ -255,4 +261,126 @@ var _ = Describe("DeploymentPolicy", func() { Expect(err).ToNot(HaveOccurred()) }) }) + + Context("When deleting DeploymentPolicy under Validating Webhook", func() { + It("should allow deletion when no Skyhooks reference it", func() { + policy := &DeploymentPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "unused-policy", + }, + Spec: DeploymentPolicySpec{ + Default: PolicyDefault{ + Budget: DeploymentBudget{ + Percent: ptr.To(100), + }, + Strategy: &DeploymentStrategy{ + Fixed: &FixedStrategy{}, + }, + }, + }, + } + + _, err := deploymentPolicyWebhook.ValidateDelete(ctx, policy) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should reject deletion when Skyhooks reference it", func() { + // Create a deployment policy + policy := &DeploymentPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "policy-in-use", + }, + Spec: DeploymentPolicySpec{ + Default: PolicyDefault{ + Budget: DeploymentBudget{ + Percent: ptr.To(100), + }, + Strategy: &DeploymentStrategy{ + Fixed: &FixedStrategy{}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, policy)).To(Succeed()) + + // Create a Skyhook that references the policy + skyhook := &Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook-using-policy", + }, + Spec: SkyhookSpec{ + DeploymentPolicy: "policy-in-use", + Packages: Packages{ + "test-pkg": { + PackageRef: PackageRef{ + Name: "test-pkg", + Version: "1.0.0", + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, skyhook)).To(Succeed()) + + // Try to delete the policy - should be rejected + _, err := deploymentPolicyWebhook.ValidateDelete(ctx, policy) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot delete DeploymentPolicy")) + Expect(err.Error()).To(ContainSubstring("policy-in-use")) + Expect(err.Error()).To(ContainSubstring("test-skyhook-using-policy")) + + // Cleanup + Expect(k8sClient.Delete(ctx, skyhook)).To(Succeed()) + Expect(k8sClient.Delete(ctx, policy)).To(Succeed()) + }) + + It("should allow deletion after Skyhooks no longer reference it", func() { + // Create a deployment policy + policy := &DeploymentPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "policy-to-be-freed", + }, + Spec: DeploymentPolicySpec{ + Default: PolicyDefault{ + Budget: DeploymentBudget{ + Percent: ptr.To(100), + }, + Strategy: &DeploymentStrategy{ + Fixed: &FixedStrategy{}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, policy)).To(Succeed()) + + // Create a Skyhook that references the policy + skyhook := &Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook-temp", + }, + Spec: SkyhookSpec{ + DeploymentPolicy: "policy-to-be-freed", + Packages: Packages{ + "test-pkg": { + PackageRef: PackageRef{ + Name: "test-pkg", + Version: "1.0.0", + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, skyhook)).To(Succeed()) + + // Delete the Skyhook + Expect(k8sClient.Delete(ctx, skyhook)).To(Succeed()) + + // Now deletion should succeed + _, err := deploymentPolicyWebhook.ValidateDelete(ctx, policy) + Expect(err).NotTo(HaveOccurred()) + + // Cleanup + Expect(k8sClient.Delete(ctx, policy)).To(Succeed()) + }) + }) }) diff --git a/operator/api/v1alpha1/skyhook_webhook.go b/operator/api/v1alpha1/skyhook_webhook.go index 4591077c..96554f6f 100644 --- a/operator/api/v1alpha1/skyhook_webhook.go +++ b/operator/api/v1alpha1/skyhook_webhook.go @@ -27,9 +27,12 @@ import ( "github.com/NVIDIA/skyhook/operator/internal/graph" semver "github.com/NVIDIA/skyhook/operator/internal/version" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" ) @@ -42,7 +45,9 @@ var ( // SetupWebhookWithManager will setup the manager to manage the webhooks func (r *Skyhook) SetupWebhookWithManager(mgr ctrl.Manager) error { - skyhookWebhook := &SkyhookWebhook{} + skyhookWebhook := &SkyhookWebhook{ + Client: mgr.GetClient(), + } return ctrl.NewWebhookManagedBy(mgr). For(r). WithDefaulter(skyhookWebhook). @@ -54,7 +59,11 @@ func (r *Skyhook) SetupWebhookWithManager(mgr ctrl.Manager) error { //+kubebuilder:webhook:path=/mutate-skyhook-nvidia-com-v1alpha1-skyhook,mutating=true,failurePolicy=fail,sideEffects=None,groups=skyhook.nvidia.com,resources=skyhooks,verbs=create;update,versions=v1alpha1,name=mskyhook.kb.io,admissionReviewVersions=v1 +// SkyhookWebhook validates Skyhook resources at admission time. +// Includes a client for validating references to DeploymentPolicies. +// +kubebuilder:object:generate=false type SkyhookWebhook struct { + Client client.Client } var _ admission.CustomDefaulter = &SkyhookWebhook{} @@ -90,7 +99,11 @@ func (r *SkyhookWebhook) ValidateCreate(ctx context.Context, obj runtime.Object) skyhooklog.Info("validate create", "name", skyhook.Name) - return nil, skyhook.Validate() + if err := skyhook.Validate(); err != nil { + return nil, err + } + + return nil, r.validateDeploymentPolicyExists(ctx, skyhook) } // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type @@ -103,7 +116,11 @@ func (r *SkyhookWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj runt skyhooklog.Info("validate update", "name", skyhook.Name) - return nil, skyhook.Validate() + if err := skyhook.Validate(); err != nil { + return nil, err + } + + return nil, r.validateDeploymentPolicyExists(ctx, skyhook) } // ValidateDelete implements webhook.Validator so a webhook will be registered for the type @@ -238,3 +255,26 @@ func (r *Skyhook) Validate() error { return nil } + +// validateDeploymentPolicyExists checks if the referenced DeploymentPolicy exists +func (r *SkyhookWebhook) validateDeploymentPolicyExists(ctx context.Context, skyhook *Skyhook) error { + // Skip validation if no deployment policy is specified + if skyhook.Spec.DeploymentPolicy == "" { + return nil + } + + // Check if the DeploymentPolicy exists (cluster-scoped, no namespace) + policy := &DeploymentPolicy{} + err := r.Client.Get(ctx, types.NamespacedName{ + Name: skyhook.Spec.DeploymentPolicy, + }, policy) + + if err != nil { + if apierrors.IsNotFound(err) { + return fmt.Errorf("deploymentPolicy %q not found", skyhook.Spec.DeploymentPolicy) + } + return fmt.Errorf("error checking if deploymentPolicy %q exists: %w", skyhook.Spec.DeploymentPolicy, err) + } + + return nil +} diff --git a/operator/api/v1alpha1/skyhook_webhook_test.go b/operator/api/v1alpha1/skyhook_webhook_test.go index d0ba6769..d46990ae 100644 --- a/operator/api/v1alpha1/skyhook_webhook_test.go +++ b/operator/api/v1alpha1/skyhook_webhook_test.go @@ -29,7 +29,13 @@ import ( ) var _ = Describe("Skyhook Webhook", func() { - skyhookWebhook := &SkyhookWebhook{} + var skyhookWebhook *SkyhookWebhook + + BeforeEach(func() { + skyhookWebhook = &SkyhookWebhook{ + Client: k8sClient, + } + }) Context("When creating Skyhook under Defaulting Webhook", func() { It("Should fill in the default value if a required field is empty", func() { @@ -552,4 +558,150 @@ var _ = Describe("Skyhook Webhook", func() { Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("deploymentPolicy and interruptionBudget are mutually exclusive")) }) + + It("should reject skyhook with non-existent deployment policy", func() { + skyhook := &Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook", + }, + Spec: SkyhookSpec{ + DeploymentPolicy: "non-existent-policy", + Packages: Packages{ + "test-pkg": { + PackageRef: PackageRef{ + Name: "test-pkg", + Version: "1.0.0", + }, + }, + }, + }, + } + + _, err := skyhookWebhook.ValidateCreate(ctx, skyhook) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("deploymentPolicy \"non-existent-policy\" not found")) + }) + + It("should accept skyhook when deployment policy exists", func() { + // Create a cluster-scoped deployment policy first + policy := &DeploymentPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-policy-webhook", + }, + Spec: DeploymentPolicySpec{ + Default: PolicyDefault{ + Budget: DeploymentBudget{ + Percent: ptr.To(100), + }, + Strategy: &DeploymentStrategy{ + Fixed: &FixedStrategy{}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, policy)).To(Succeed()) + + skyhook := &Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook-valid", + }, + Spec: SkyhookSpec{ + DeploymentPolicy: "test-policy-webhook", + Packages: Packages{ + "test-pkg": { + PackageRef: PackageRef{ + Name: "test-pkg", + Version: "1.0.0", + }, + }, + }, + }, + } + + _, err := skyhookWebhook.ValidateCreate(ctx, skyhook) + Expect(err).NotTo(HaveOccurred()) + + // Cleanup + Expect(k8sClient.Delete(ctx, policy)).To(Succeed()) + }) + + It("should reject skyhook update to reference non-existent deployment policy", func() { + // Create a Skyhook without deployment policy + skyhook := &Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook-update", + }, + Spec: SkyhookSpec{ + Packages: Packages{ + "test-pkg": { + PackageRef: PackageRef{ + Name: "test-pkg", + Version: "1.0.0", + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, skyhook)).To(Succeed()) + + // Try to update it to reference a non-existent policy + updatedSkyhook := skyhook.DeepCopy() + updatedSkyhook.Spec.DeploymentPolicy = "does-not-exist" + + _, err := skyhookWebhook.ValidateUpdate(ctx, skyhook, updatedSkyhook) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("deploymentPolicy \"does-not-exist\" not found")) + + // Cleanup + Expect(k8sClient.Delete(ctx, skyhook)).To(Succeed()) + }) + + It("should allow skyhook update to reference valid deployment policy", func() { + // Create a deployment policy + policy := &DeploymentPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-policy-for-update", + }, + Spec: DeploymentPolicySpec{ + Default: PolicyDefault{ + Budget: DeploymentBudget{ + Percent: ptr.To(100), + }, + Strategy: &DeploymentStrategy{ + Fixed: &FixedStrategy{}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, policy)).To(Succeed()) + + // Create a Skyhook without deployment policy + skyhook := &Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook-update-valid", + }, + Spec: SkyhookSpec{ + Packages: Packages{ + "test-pkg": { + PackageRef: PackageRef{ + Name: "test-pkg", + Version: "1.0.0", + }, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, skyhook)).To(Succeed()) + + // Update it to reference the valid policy - should succeed + updatedSkyhook := skyhook.DeepCopy() + updatedSkyhook.Spec.DeploymentPolicy = "valid-policy-for-update" + + _, err := skyhookWebhook.ValidateUpdate(ctx, skyhook, updatedSkyhook) + Expect(err).NotTo(HaveOccurred()) + + // Cleanup + Expect(k8sClient.Delete(ctx, skyhook)).To(Succeed()) + Expect(k8sClient.Delete(ctx, policy)).To(Succeed()) + }) }) diff --git a/operator/api/v1alpha1/zz_generated.deepcopy.go b/operator/api/v1alpha1/zz_generated.deepcopy.go index a19c6fbe..b4bb334c 100644 --- a/operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/operator/api/v1alpha1/zz_generated.deepcopy.go @@ -191,21 +191,6 @@ func (in *DeploymentPolicySpec) DeepCopy() *DeploymentPolicySpec { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *DeploymentPolicyWebhook) DeepCopyInto(out *DeploymentPolicyWebhook) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentPolicyWebhook. -func (in *DeploymentPolicyWebhook) DeepCopy() *DeploymentPolicyWebhook { - if in == nil { - return nil - } - out := new(DeploymentPolicyWebhook) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DeploymentStrategy) DeepCopyInto(out *DeploymentStrategy) { *out = *in @@ -741,18 +726,3 @@ func (in *SkyhookStatus) DeepCopy() *SkyhookStatus { in.DeepCopyInto(out) return out } - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *SkyhookWebhook) DeepCopyInto(out *SkyhookWebhook) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SkyhookWebhook. -func (in *SkyhookWebhook) DeepCopy() *SkyhookWebhook { - if in == nil { - return nil - } - out := new(SkyhookWebhook) - in.DeepCopyInto(out) - return out -} diff --git a/operator/config/webhook/manifests.yaml b/operator/config/webhook/manifests.yaml index 41f330a2..5bb67d66 100644 --- a/operator/config/webhook/manifests.yaml +++ b/operator/config/webhook/manifests.yaml @@ -83,6 +83,7 @@ webhooks: operations: - CREATE - UPDATE + - DELETE resources: - deploymentpolicies sideEffects: None diff --git a/operator/internal/controller/cluster_state_v2.go b/operator/internal/controller/cluster_state_v2.go index 640d631b..e0c18de9 100644 --- a/operator/internal/controller/cluster_state_v2.go +++ b/operator/internal/controller/cluster_state_v2.go @@ -121,7 +121,9 @@ func BuildState(skyhooks *v1alpha1.SkyhookList, nodes *corev1.NodeList, deployme // Assign all nodes to the default compartment for backwards compatibility for _, node := range ret.skyhooks[idx].GetNodes() { - ret.skyhooks[idx].AddCompartmentNode(v1alpha1.DefaultCompartmentName, node) + if err := ret.skyhooks[idx].AddCompartmentNode(v1alpha1.DefaultCompartmentName, node); err != nil { + return nil, fmt.Errorf("error adding node to default compartment: %w", err) + } } } else { ret.initializeCompartmentsFromPolicy(idx, &skyhook, deploymentPolicies) @@ -188,10 +190,12 @@ func (ret *clusterState) initializeCompartmentsFromPolicy(idx int, skyhook *v1al // If deployment policy was specified but not found, mark it for error handling if !policyFound { - // We must create a condition here to indicate the DeploymentPolicy is not found, - // because verifying its existence requires access to the client and cluster state. - // This cannot be handled in the webhook: by design, the webhook must remain stateless - // and should not be passed the client, as doing so would violate its required statelessness. + // Set a condition to indicate the DeploymentPolicy is not found. + // Note: The webhook also validates policy existence at creation/update time, + // but this runtime check is needed to handle cases where: + // 1. A policy is deleted after a Skyhook references it + // 2. The webhook was bypassed or disabled + // This provides defense-in-depth validation. ret.skyhooks[idx].GetSkyhook().AddCondition(metav1.Condition{ Type: fmt.Sprintf("%s/DeploymentPolicyNotFound", v1alpha1.METADATA_PREFIX), Status: metav1.ConditionTrue, @@ -303,7 +307,7 @@ type SkyhookNodes interface { GetCompartments() map[string]*wrapper.Compartment AddCompartment(name string, compartment *wrapper.Compartment) - AddCompartmentNode(name string, node wrapper.SkyhookNode) + AddCompartmentNode(name string, node wrapper.SkyhookNode) error AssignNodeToCompartment(node wrapper.SkyhookNode) (string, error) } @@ -1024,8 +1028,13 @@ func (skyhook *skyhookNodes) AddCompartment(name string, compartment *wrapper.Co skyhook.compartments[name] = compartment } -func (skyhook *skyhookNodes) AddCompartmentNode(name string, node wrapper.SkyhookNode) { - skyhook.compartments[name].AddNode(node) +func (skyhook *skyhookNodes) AddCompartmentNode(name string, node wrapper.SkyhookNode) error { + compartment, ok := skyhook.compartments[name] + if !ok { + return fmt.Errorf("compartment %q not found for skyhook %q - missing deployment policy", name, skyhook.skyhook.Name) + } + compartment.AddNode(node) + return nil } // compartmentMatch represents a compartment that matches a node diff --git a/operator/internal/controller/cluster_state_v2_test.go b/operator/internal/controller/cluster_state_v2_test.go index a8f5a95c..6c6082ef 100644 --- a/operator/internal/controller/cluster_state_v2_test.go +++ b/operator/internal/controller/cluster_state_v2_test.go @@ -424,6 +424,188 @@ var _ = Describe("Safe rollouts backwards compatibility", func() { }) }) +var _ = Describe("AddCompartmentNode", func() { + It("should return error when compartment doesn't exist", func() { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + }, + } + + skyhook := &v1alpha1.Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook", + }, + } + + skyhookNode, err := wrapper.NewSkyhookNode(node, skyhook) + Expect(err).NotTo(HaveOccurred()) + + skyhookNodes := &skyhookNodes{ + skyhook: wrapper.NewSkyhookWrapper(skyhook), + nodes: []wrapper.SkyhookNode{skyhookNode}, + compartments: make(map[string]*wrapper.Compartment), + } + + // Try to add node to non-existent compartment + err = skyhookNodes.AddCompartmentNode("non-existent-compartment", skyhookNode) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("compartment \"non-existent-compartment\" not found")) + Expect(err.Error()).To(ContainSubstring("test-skyhook")) + }) + + It("should successfully add node when compartment exists", func() { + compartment := wrapper.NewCompartmentWrapper(&v1alpha1.Compartment{ + Name: "test-compartment", + }, nil) + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-node", + }, + } + + skyhook := &v1alpha1.Skyhook{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-skyhook", + }, + } + + skyhookNode, err := wrapper.NewSkyhookNode(node, skyhook) + Expect(err).NotTo(HaveOccurred()) + + skyhookNodes := &skyhookNodes{ + skyhook: wrapper.NewSkyhookWrapper(skyhook), + nodes: []wrapper.SkyhookNode{skyhookNode}, + compartments: make(map[string]*wrapper.Compartment), + } + skyhookNodes.AddCompartment("test-compartment", compartment) + + // Should succeed + err = skyhookNodes.AddCompartmentNode("test-compartment", skyhookNode) + Expect(err).NotTo(HaveOccurred()) + Expect(len(compartment.GetNodes())).To(Equal(1)) + }) +}) + +var _ = Describe("partitionNodesIntoCompartments", func() { + It("should skip skyhooks with no deployment policy", func() { + skyhook := &v1alpha1.Skyhook{ + ObjectMeta: metav1.ObjectMeta{Name: "test-skyhook"}, + Spec: v1alpha1.SkyhookSpec{ + // No deployment policy + }, + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + } + skyhookNode, err := wrapper.NewSkyhookNode(node, skyhook) + Expect(err).NotTo(HaveOccurred()) + + skyhookNodes := &skyhookNodes{ + skyhook: wrapper.NewSkyhookWrapper(skyhook), + nodes: []wrapper.SkyhookNode{skyhookNode}, + compartments: make(map[string]*wrapper.Compartment), + } + + clusterState := &clusterState{ + skyhooks: []SkyhookNodes{skyhookNodes}, + } + + // Should not error, just skip + err = partitionNodesIntoCompartments(clusterState) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should skip skyhooks with empty compartments (missing policy)", func() { + skyhook := &v1alpha1.Skyhook{ + ObjectMeta: metav1.ObjectMeta{Name: "test-skyhook"}, + Spec: v1alpha1.SkyhookSpec{ + DeploymentPolicy: "missing-policy", // Policy doesn't exist + }, + } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, + } + skyhookNode, err := wrapper.NewSkyhookNode(node, skyhook) + Expect(err).NotTo(HaveOccurred()) + + skyhookNodes := &skyhookNodes{ + skyhook: wrapper.NewSkyhookWrapper(skyhook), + nodes: []wrapper.SkyhookNode{skyhookNode}, + compartments: make(map[string]*wrapper.Compartment), // Empty - policy not found + } + + clusterState := &clusterState{ + skyhooks: []SkyhookNodes{skyhookNodes}, + } + + // Should skip gracefully instead of panicking + err = partitionNodesIntoCompartments(clusterState) + Expect(err).NotTo(HaveOccurred()) + }) + + It("should partition nodes when compartments exist", func() { + compartment := wrapper.NewCompartmentWrapper(&v1alpha1.Compartment{ + Name: "test-compartment", + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{"type": "gpu"}, + }, + }, nil) + + defaultCompartment := wrapper.NewCompartmentWrapper(&v1alpha1.Compartment{ + Name: v1alpha1.DefaultCompartmentName, + }, nil) + + skyhook := &v1alpha1.Skyhook{ + ObjectMeta: metav1.ObjectMeta{Name: "test-skyhook"}, + Spec: v1alpha1.SkyhookSpec{ + DeploymentPolicy: "test-policy", + }, + } + + node1 := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gpu-node", + Labels: map[string]string{"type": "gpu"}, + }, + } + node2 := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cpu-node", + Labels: map[string]string{"type": "cpu"}, + }, + } + + skyhookNode1, err := wrapper.NewSkyhookNode(node1, skyhook) + Expect(err).NotTo(HaveOccurred()) + skyhookNode2, err := wrapper.NewSkyhookNode(node2, skyhook) + Expect(err).NotTo(HaveOccurred()) + + skyhookNodes := &skyhookNodes{ + skyhook: wrapper.NewSkyhookWrapper(skyhook), + nodes: []wrapper.SkyhookNode{skyhookNode1, skyhookNode2}, + compartments: make(map[string]*wrapper.Compartment), + } + skyhookNodes.AddCompartment("test-compartment", compartment) + skyhookNodes.AddCompartment(v1alpha1.DefaultCompartmentName, defaultCompartment) + + clusterState := &clusterState{ + skyhooks: []SkyhookNodes{skyhookNodes}, + } + + // Should succeed + err = partitionNodesIntoCompartments(clusterState) + Expect(err).NotTo(HaveOccurred()) + + // Verify nodes were assigned correctly + Expect(len(compartment.GetNodes())).To(Equal(1)) + Expect(compartment.GetNodes()[0].GetNode().Name).To(Equal("gpu-node")) + Expect(len(defaultCompartment.GetNodes())).To(Equal(1)) + Expect(defaultCompartment.GetNodes()[0].GetNode().Name).To(Equal("cpu-node")) + }) +}) + var _ = Describe("CleanupRemovedNodes", func() { It("should cleanup removed nodes from all status maps", func() { // Create mock skyhook nodes diff --git a/operator/internal/controller/mock/SkyhookNodes.go b/operator/internal/controller/mock/SkyhookNodes.go index 5829630a..3e539749 100644 --- a/operator/internal/controller/mock/SkyhookNodes.go +++ b/operator/internal/controller/mock/SkyhookNodes.go @@ -103,9 +103,20 @@ func (_c *MockSkyhookNodes_AddCompartment_Call) RunAndReturn(run func(name strin } // AddCompartmentNode provides a mock function for the type MockSkyhookNodes -func (_mock *MockSkyhookNodes) AddCompartmentNode(name string, node wrapper.SkyhookNode) { - _mock.Called(name, node) - return +func (_mock *MockSkyhookNodes) AddCompartmentNode(name string, node wrapper.SkyhookNode) error { + ret := _mock.Called(name, node) + + if len(ret) == 0 { + panic("no return value specified for AddCompartmentNode") + } + + var r0 error + if returnFunc, ok := ret.Get(0).(func(string, wrapper.SkyhookNode) error); ok { + r0 = returnFunc(name, node) + } else { + r0 = ret.Error(0) + } + return r0 } // MockSkyhookNodes_AddCompartmentNode_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'AddCompartmentNode' @@ -138,13 +149,13 @@ func (_c *MockSkyhookNodes_AddCompartmentNode_Call) Run(run func(name string, no return _c } -func (_c *MockSkyhookNodes_AddCompartmentNode_Call) Return() *MockSkyhookNodes_AddCompartmentNode_Call { - _c.Call.Return() +func (_c *MockSkyhookNodes_AddCompartmentNode_Call) Return(err error) *MockSkyhookNodes_AddCompartmentNode_Call { + _c.Call.Return(err) return _c } -func (_c *MockSkyhookNodes_AddCompartmentNode_Call) RunAndReturn(run func(name string, node wrapper.SkyhookNode)) *MockSkyhookNodes_AddCompartmentNode_Call { - _c.Run(run) +func (_c *MockSkyhookNodes_AddCompartmentNode_Call) RunAndReturn(run func(name string, node wrapper.SkyhookNode) error) *MockSkyhookNodes_AddCompartmentNode_Call { + _c.Call.Return(run) return _c } diff --git a/operator/internal/controller/skyhook_controller.go b/operator/internal/controller/skyhook_controller.go index c8eaef22..ec88de64 100644 --- a/operator/internal/controller/skyhook_controller.go +++ b/operator/internal/controller/skyhook_controller.go @@ -2308,6 +2308,13 @@ func partitionNodesIntoCompartments(clusterState *clusterState) error { continue } + // Skip if no compartments exist (e.g., deployment policy not found) + // The webhook should prevent this at admission time, and the controller sets a condition at runtime, + // but we guard here to prevent panics if the policy goes missing + if len(skyhook.GetCompartments()) == 0 { + continue + } + // Clear all compartments before reassigning nodes to prevent stale nodes // This ensures nodes are only in their current compartment based on current labels for _, compartment := range skyhook.GetCompartments() { @@ -2319,7 +2326,9 @@ func partitionNodesIntoCompartments(clusterState *clusterState) error { if err != nil { return fmt.Errorf("error assigning node %s: %w", node.GetNode().Name, err) } - skyhook.AddCompartmentNode(compartmentName, node) + if err := skyhook.AddCompartmentNode(compartmentName, node); err != nil { + return fmt.Errorf("error adding node %s to compartment %s: %w", node.GetNode().Name, compartmentName, err) + } } } diff --git a/operator/internal/controller/webhook_controller.go b/operator/internal/controller/webhook_controller.go index 7637800b..70a68097 100644 --- a/operator/internal/controller/webhook_controller.go +++ b/operator/internal/controller/webhook_controller.go @@ -23,6 +23,7 @@ import ( "context" "fmt" "net/http" + "reflect" "time" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -231,9 +232,9 @@ func (r *WebhookController) CheckOrUpdateWebhookConfigurations(ctx context.Conte } needUpdate := false + expectedRules := webhookRule() for i := range existingValidating.Webhooks { - if len(existingValidating.Webhooks[i].ClientConfig.CABundle) == 0 { - existingValidating.Webhooks[i].ClientConfig.CABundle = caBundle + if validatingWebhookNeedsUpdate(&existingValidating.Webhooks[i], caBundle, expectedRules) { needUpdate = true } } @@ -257,8 +258,7 @@ func (r *WebhookController) CheckOrUpdateWebhookConfigurations(ctx context.Conte needUpdate = false for i := range existingMutating.Webhooks { - if len(existingMutating.Webhooks[i].ClientConfig.CABundle) == 0 { - existingMutating.Webhooks[i].ClientConfig.CABundle = caBundle + if mutatingWebhookNeedsUpdate(&existingMutating.Webhooks[i], caBundle, expectedRules) { needUpdate = true } } @@ -369,7 +369,7 @@ func webhookRule() []admissionregistrationv1.RuleWithOperations { }, }, { - Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create, admissionregistrationv1.Update}, + Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create, admissionregistrationv1.Update, admissionregistrationv1.Delete}, Rule: admissionregistrationv1.Rule{ APIGroups: []string{v1alpha1.GroupVersion.Group}, APIVersions: []string{v1alpha1.GroupVersion.Version}, @@ -379,6 +379,45 @@ func webhookRule() []admissionregistrationv1.RuleWithOperations { } } +// validatingWebhookNeedsUpdate checks if a validating webhook needs to be updated with new CABundle or Rules +// Returns true if updates were made to the webhook +func validatingWebhookNeedsUpdate(webhook *admissionregistrationv1.ValidatingWebhook, caBundle []byte, expectedRules []admissionregistrationv1.RuleWithOperations) bool { + needUpdate := false + + // Check if CABundle needs to be set + if len(webhook.ClientConfig.CABundle) == 0 { + webhook.ClientConfig.CABundle = caBundle + needUpdate = true + } + + // Check if rules need to be updated + if !reflect.DeepEqual(webhook.Rules, expectedRules) { + webhook.Rules = expectedRules + needUpdate = true + } + + return needUpdate +} + +// mutatingWebhookNeedsUpdate checks if a mutating webhook needs to be updated +func mutatingWebhookNeedsUpdate(webhook *admissionregistrationv1.MutatingWebhook, caBundle []byte, expectedRules []admissionregistrationv1.RuleWithOperations) bool { + needUpdate := false + + // Check if CABundle needs to be set + if len(webhook.ClientConfig.CABundle) == 0 { + webhook.ClientConfig.CABundle = caBundle + needUpdate = true + } + + // Check if rules need to be updated + if !reflect.DeepEqual(webhook.Rules, expectedRules) { + webhook.Rules = expectedRules + needUpdate = true + } + + return needUpdate +} + // WebhookSecretReadyzCheck is a readyz check for the webhook secret, if it does not exist, it will return an error // if it exists, it will wait for the secret to be ready, this makes sure that we don't start the operator // if the webhook secret is not ready diff --git a/operator/internal/controller/webhook_controller_test.go b/operator/internal/controller/webhook_controller_test.go index ec6a1983..52a1ae2d 100644 --- a/operator/internal/controller/webhook_controller_test.go +++ b/operator/internal/controller/webhook_controller_test.go @@ -233,6 +233,68 @@ var _ = Describe("WebhookController", Ordered, func() { }) }) + Describe("webhook rules comparison", func() { + It("should detect when rules are different", func() { + oldRules := []admissionregistrationv1.RuleWithOperations{ + { + Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create}, + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{v1alpha1.GroupVersion.Group}, + APIVersions: []string{v1alpha1.GroupVersion.Version}, + Resources: []string{"skyhooks"}, + }, + }, + } + + webhook := admissionregistrationv1.ValidatingWebhook{ + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: []byte("existing-ca"), + }, + Rules: oldRules, + } + + caBundle := []byte("new-ca") + expectedRules := webhookRule() + + needsUpdate := validatingWebhookNeedsUpdate(&webhook, caBundle, expectedRules) + Expect(needsUpdate).To(BeTrue(), "should detect rules mismatch") + Expect(webhook.Rules).To(Equal(expectedRules), "rules should be updated") + }) + + It("should not update when rules are identical", func() { + expectedRules := webhookRule() + + webhook := admissionregistrationv1.ValidatingWebhook{ + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: []byte("existing-ca"), + }, + Rules: expectedRules, + } + + caBundle := []byte("existing-ca") + + needsUpdate := validatingWebhookNeedsUpdate(&webhook, caBundle, expectedRules) + Expect(needsUpdate).To(BeFalse(), "should not detect changes when rules are identical") + }) + + It("should update CABundle when empty", func() { + expectedRules := webhookRule() + + webhook := admissionregistrationv1.MutatingWebhook{ + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + CABundle: nil, // Empty CABundle + }, + Rules: expectedRules, + } + + caBundle := []byte("new-ca") + + needsUpdate := mutatingWebhookNeedsUpdate(&webhook, caBundle, expectedRules) + Expect(needsUpdate).To(BeTrue(), "should detect empty CABundle") + Expect(webhook.ClientConfig.CABundle).To(Equal(caBundle), "CABundle should be updated") + }) + }) + Describe("Disk and Secret-to-Disk Sync Logic", func() { It("should write and read cert and key files correctly", func() { err := writeCertAndKey([]byte(cachedCert.TLSCert), []byte(cachedCert.TLSKey), tmpDir) From 1f2bfb92c301cd5a23f47e532fab6d5535701120 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 18:54:58 -0800 Subject: [PATCH 3/8] fix(ci): allow for manual run of workflows --- .github/workflows/operator-ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/operator-ci.yaml b/.github/workflows/operator-ci.yaml index 213a8ee9..cb55eb7d 100644 --- a/.github/workflows/operator-ci.yaml +++ b/.github/workflows/operator-ci.yaml @@ -18,6 +18,7 @@ name: Operator CI on: + workflow_dispatch: {} pull_request: branches: - main From 5c43076faba115a8e3fdf3fbec4f9078798b87f2 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 19:04:40 -0800 Subject: [PATCH 4/8] fix(ci): operator PR to run on all operator paths, ignoring branch --- .github/workflows/operator-ci.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/operator-ci.yaml b/.github/workflows/operator-ci.yaml index cb55eb7d..346ec8f0 100644 --- a/.github/workflows/operator-ci.yaml +++ b/.github/workflows/operator-ci.yaml @@ -20,8 +20,6 @@ name: Operator CI on: workflow_dispatch: {} pull_request: - branches: - - main paths: - operator/** - containers/operator.Dockerfile From b120311f689ebff0dfaa5b2c3fb7aba15a7ba495 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 19:46:03 -0800 Subject: [PATCH 5/8] feat(ci): make ci coverage include new deployment policies suite --- .github/workflows/operator-ci.yaml | 75 +++++++++++++++++++++++------- operator/Makefile | 47 +++++++++++++++---- 2 files changed, 97 insertions(+), 25 deletions(-) diff --git a/.github/workflows/operator-ci.yaml b/.github/workflows/operator-ci.yaml index 346ec8f0..fb81a685 100644 --- a/.github/workflows/operator-ci.yaml +++ b/.github/workflows/operator-ci.yaml @@ -54,18 +54,22 @@ jobs: # Standard E2E tests on all supported K8s versions k8s-version: ["1.31.14", "1.32.11", "1.33.7", "1.34.3", "1.35.0"] test-suite: ["e2e"] + make-targets: ["setup-kind-cluster e2e-tests"] include: - # Deployment policy tests on 15-node cluster (K8s 1.34 only) + # Deployment policy tests on 15-node cluster (K8s 1.35 only) - k8s-version: "1.35.0" test-suite: deployment-policy kind-config: k8s-tests/chainsaw/deployment-policy/kind-config.yaml - make-target: deployment-policy-tests + make-targets: "setup-kind-cluster deployment-policy-tests" # CLI e2e tests on K8s 1.34 only - k8s-version: "1.35.0" test-suite: cli-e2e - make-target: cli-e2e-tests + make-targets: "setup-kind-cluster cli-e2e-tests" + - k8s-version: "1.35.0" + test-suite: unit-tests + make-targets: "vet lint unit-tests" fail-fast: false # Continue testing other versions if one fails - name: ${{ matrix.test-suite }}-tests (k8s-${{ matrix.k8s-version }}) + name: ${{ matrix.test-suite }} (k8s-${{ matrix.k8s-version }}) steps: - uses: actions/checkout@v4 with: @@ -77,16 +81,18 @@ jobs: go-version: ${{ env.GO_VERSION }} cache-dependency-path: operator/go.sum - name: Log in to the Container registry + if: matrix.test-suite != 'unit-tests' # unit tests don't need a container registry login uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Create Kubernetes KinD Cluster v${{ matrix.k8s-version }} + if: matrix.test-suite != 'unit-tests' # unit tests don't need a kind cluster id: kind uses: helm/kind-action@v1 with: - version: v0.30.0 + version: v0.31.0 node_image: kindest/node:v${{ matrix.k8s-version }} config: ${{ matrix.kind-config || 'operator/config/local-dev/kind-config.yaml' }} cluster_name: kind @@ -118,18 +124,53 @@ jobs: - name: Run ${{ matrix.test-suite }} tests run: | cd operator - if [ "${{ matrix.test-suite }}" = "e2e" ]; then - make setup-kind-cluster - make test - elif [ "${{ matrix.test-suite }}" = "cli-e2e" ]; then - make setup-kind-cluster - make ${{ matrix.make-target }} - else - make ${{ matrix.make-target }} - fi - # Upload coverage to Coveralls using goveralls (Go-specific tool) - - name: Upload coverage to Coveralls - if: matrix.test-suite == 'e2e' && matrix.k8s-version == '1.35.0' + make ${{ matrix.make-targets }} merge-coverage + # Save coverage artifacts from any test suite that generates them + - name: Upload coverage artifact + if: hashFiles('operator/reporting/cover.out') != '' + uses: actions/upload-artifact@v4 + with: + name: coverage-${{ matrix.test-suite }}-k8s-${{ matrix.k8s-version }} + path: operator/reporting/cover.out + retention-days: 1 + if-no-files-found: ignore + + # Merge coverage from all test suites and upload to Coveralls + upload-coverage: + runs-on: ubuntu-latest + needs: [tests] + # Only upload coverage for PRs and main branch pushes, not for tags + if: success() && !startsWith(github.ref, 'refs/tags/') + steps: + - uses: actions/checkout@v4 + - name: Setup Go ${{ env.GO_VERSION }} + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: Download all coverage artifacts + uses: actions/download-artifact@v4 + with: + pattern: coverage-* + path: coverage-artifacts + merge-multiple: false + - name: Merge coverage files + run: | + cd operator + mkdir -p reporting + # Combine all coverage files + for file in ../coverage-artifacts/*/cover.out; do + if [ -f "$file" ]; then + echo "Merging coverage from $file" + cat "$file" >> reporting/all-cover.out + fi + done + # Create final merged coverage file + echo "mode: set" > reporting/cover.out + tail -n +2 reporting/all-cover.out | sed '/mode: set/d' >> reporting/cover.out + # Show total coverage + echo "📊 Total Combined Coverage:" + go tool cover -func reporting/cover.out | grep total + - name: Upload to Coveralls uses: coverallsapp/github-action@v2 with: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/operator/Makefile b/operator/Makefile index b228dd3e..acac5812 100644 --- a/operator/Makefile +++ b/operator/Makefile @@ -142,7 +142,7 @@ $(REPORTING): mkdir -p $@ .PHONY: test -test:: reporting manifests generate fmt vet lint unit-tests e2e-tests helm-tests operator-agent-tests ## Run all tests. +test:: reporting manifests generate fmt vet lint unit-tests e2e-tests cli-e2e-tests helm-tests operator-agent-tests ## Run all tests. ifndef CI ## we double define test so we can do thing different if in ci vs local @@ -176,11 +176,30 @@ e2e-tests: chainsaw install run ## Run end to end tests. $(MAKE) kill go tool covdata textfmt -i=$(REPORTING)/int -o reporting/int.coverprofile -cli-e2e-tests: chainsaw install run build-cli ## Run CLI end to end tests. +cli-e2e-tests: chainsaw install run build-cli-with-coverage ## Run CLI end to end tests with coverage. ## Tests the kubectl-skyhook CLI commands against a real cluster ## Requires nodes labeled with skyhook.nvidia.com/test-node=skyhooke2e + @mkdir -p $(REPORTING)/cli-cover + @# Replace CLI binary with a wrapper that sets GOCOVERDIR for coverage collection + @mv bin/skyhook bin/skyhook.orig 2>/dev/null || true + @echo '#!/bin/bash' > bin/skyhook + @echo 'SCRIPT_DIR="$$(cd "$$(dirname "$${BASH_SOURCE[0]}")" && pwd)"' >> bin/skyhook + @echo 'export GOCOVERDIR="$$SCRIPT_DIR/../reporting/cli-cover"' >> bin/skyhook + @echo 'exec "$$SCRIPT_DIR/skyhook-cover" "$$@"' >> bin/skyhook + @chmod +x bin/skyhook $(CHAINSAW) test --test-dir ../k8s-tests/chainsaw/cli $(CHAINSAW_ARGS) $(MAKE) kill + @# Restore original CLI binary + @mv bin/skyhook.orig bin/skyhook 2>/dev/null || true + @# Extract coverage from operator running in cluster + go tool covdata textfmt -i=$(REPORTING)/int -o reporting/cli-int.coverprofile + @# Extract coverage from CLI binary if any was generated + @if [ -d "$(REPORTING)/cli-cover" ] && [ -n "$$(ls -A $(REPORTING)/cli-cover 2>/dev/null)" ]; then \ + go tool covdata textfmt -i=$(REPORTING)/cli-cover -o=$(REPORTING)/cli-e2e.coverprofile; \ + echo "✅ CLI coverage collected"; \ + else \ + echo "â„šī¸ No CLI coverage data (this is OK - CLI is covered by unit tests)"; \ + fi helm-tests: helm chainsaw ## Here we need to run the operator so that the old CRD can deleted along with @@ -197,12 +216,14 @@ operator-agent-tests: chainsaw install ## Run operator agent tests. $(CHAINSAW) test --test-dir ../k8s-tests/operator-agent $(CHAINSAW_ARGS) $(MAKE) kill ## ../k8s-tests/operator-agent/setup.sh kind-worker teardown + go tool covdata textfmt -i=$(REPORTING)/int -o reporting/int.coverprofile deployment-policy-tests: chainsaw install run ## Run all deployment policy E2E tests (requires 15-node cluster). ## requires a 15-node cluster to be running with access ## use 'make create-deployment-policy-cluster' to create the cluster $(CHAINSAW) test --test-dir ../k8s-tests/chainsaw/deployment-policy $(CHAINSAW_ARGS) $(MAKE) kill + go tool covdata textfmt -i=$(REPORTING)/int -o reporting/int.coverprofile create-deployment-policy-cluster: ## Create a 15-node Kind cluster for deployment policy tests. @echo "🔧 Creating 15-node Kind cluster for deployment policy tests..." @@ -255,11 +276,14 @@ endif .PHONY: merage-coverage merge-coverage: ## merge coverage file into one so we can run totals and html reporting - cat $(REPORTING)/*.coverprofile > reporting/temp-cover.out - echo "mode: set" > $(REPORTING)/cover.out - ## skip first line with +2 - tail -n +2 $(REPORTING)/temp-cover.out | sed '/mode: set/d' >> $(REPORTING)/cover.out - $(sedrp) 's|^/.*skyhook/operator/(.*)$$|github\.com/NVIDIA/skyhook/operator/\1|g' $(REPORTING)/cover.out + @if ls $(REPORTING)/*.coverprofile 1> /dev/null 2>&1; then \ + cat $(REPORTING)/*.coverprofile > reporting/temp-cover.out; \ + echo "mode: set" > $(REPORTING)/cover.out; \ + tail -n +2 $(REPORTING)/temp-cover.out | sed '/mode: set/d' >> $(REPORTING)/cover.out; \ + $(sedrp) 's|^/.*skyhook/operator/(.*)$$|github\.com/NVIDIA/skyhook/operator/\1|g' $(REPORTING)/cover.out; \ + else \ + echo "No coverage files found - skipping merge"; \ + fi .PHONY: lint lint: golangci-lint license-check ## Run golangci-lint linter & yamllint @@ -280,15 +304,22 @@ build-manager: manifests generate fmt vet lint ## Build manager binary. .PHONY: build-cli CLI_LDFLAGS := -ldflags "-X github.com/NVIDIA/skyhook/operator/internal/version.VERSION=$(VERSION) -X github.com/NVIDIA/skyhook/operator/internal/version.GIT_SHA=$(GIT_SHA)" -build-cli: fmt vet ## Build CLI binary. +build-cli: ## Build CLI binary. go build $(GOFLAGS) $(CLI_LDFLAGS) -o bin/skyhook cmd/cli/main.go +build-cli-with-coverage: ## Build CLI binary with coverage instrumentation. + go build $(GOFLAGS) -cover $(CLI_LDFLAGS) -o bin/skyhook-cover cmd/cli/main.go + .PHONY: run ENABLE_WEBHOOKS?=false BACKGROUND?=true AGENT_IMAGE?=$(IMG_REPO)/agentless:6.2.0 LOG_LEVEL?=info +ifndef CI +run: manifests generate reporting install kill ## Run a controller from your host. +else run: manifests generate fmt vet lint reporting install kill ## Run a controller from your host. +endif mkdir -p $(REPORTING)/int rm -rf $(REPORTING)/int/* go build $(GOFLAGS) -cover $(GO_LDFLAGS) -o $(LOCALBIN)/manager cmd/manager/main.go From e6edfdb3bf17ef2a3087970f5451d4860ec1af0b Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 21:33:18 -0800 Subject: [PATCH 6/8] feat(ci): split docker platforms speed up docker build time --- .github/workflows/operator-ci.yaml | 163 +++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 41 deletions(-) diff --git a/.github/workflows/operator-ci.yaml b/.github/workflows/operator-ci.yaml index fb81a685..0becd2b4 100644 --- a/.github/workflows/operator-ci.yaml +++ b/.github/workflows/operator-ci.yaml @@ -177,17 +177,60 @@ jobs: file: operator/reporting/cover.out format: golang - # Build multi-platform container image and push to registry - build-and-push-operator: + # Compute image tags and version metadata once for reuse + compute-metadata: runs-on: ubuntu-latest - needs: [tests] # Don't run the build and push if tests fail - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + needs: [tests] + outputs: + git-sha: ${{ steps.meta.outputs.git-sha }} + version: ${{ steps.meta.outputs.version }} + tags: ${{ steps.meta.outputs.tags }} + steps: + - uses: actions/checkout@v4 + - name: Fetch all tags + run: git fetch --tags --force + - name: Compute metadata + id: meta + run: | + export GIT_SHA=$(git rev-parse --short ${{ github.sha }}) + echo "git-sha=${GIT_SHA}" >> $GITHUB_OUTPUT + + case ${{ github.ref_type }} in + branch) + export VERSION=$(git tag --list 'operator*' --sort=-v:refname | head -n 1 | cut -d/ -f2)+${GIT_SHA} + TAGS="${GIT_SHA} $(echo "${VERSION}" | tr + -)" + ;; + tag) + export VERSION=$(echo "${{ github.ref_name }}" | cut -f 2 -d /) + TAGS="${GIT_SHA} ${VERSION} latest" + ;; + *) + echo "Unknown ref type: ${{ github.ref_type }}" + exit 1 + ;; + esac + + echo "version=${VERSION}" >> $GITHUB_OUTPUT + echo "tags=${TAGS}" >> $GITHUB_OUTPUT + echo "đŸ“Ļ Version: ${VERSION}" + echo "đŸˇī¸ Tags: ${TAGS}" + + # Build container images on native architecture runners (much faster than QEMU) + build-operator: + runs-on: ${{ matrix.runner }} + needs: [compute-metadata] + strategy: + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm permissions: contents: read packages: write attestations: write id-token: write - # steps: - name: Checkout repository uses: actions/checkout@v4 @@ -199,60 +242,98 @@ jobs: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - # Setup for multi-platform builds (linux/amd64, linux/arm64) - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - # Build and tag container image based on git ref type - - name: Build the operator container image + # Build and tag container image for single platform on native hardware + - name: Build the operator container image (${{ matrix.platform }}) id: build env: - platforms: ${{ env.PLATFORMS }} + GIT_SHA: ${{ needs.compute-metadata.outputs.git-sha }} + VERSION: ${{ needs.compute-metadata.outputs.version }} run: | - apt-get update && apt-get install -y make git jq + sudo apt-get update && sudo apt-get install -y jq cd operator - # if this is a tag build, use the tag as the version, otherwise use the sha - git fetch --all - export GIT_SHA=$(git rev-parse --short ${{ github.sha }}) - TAGS="-t ${REGISTRY@L}/${{env.IMAGE_NAME}}/operator:${GIT_SHA}" - case ${{ github.ref_type }} in - branch) - # The last tag + current git sha - export OPERATOR_VERSION=$(git tag --list 'operator*' --sort=-v:refname | head -n 1 | cut -d/ -f2)+${GIT_SHA} - TAGS="$TAGS -t ${REGISTRY@L}/${{env.IMAGE_NAME}}/operator:$(echo "${OPERATOR_VERSION}" | tr + -)" - ;; - tag) - # The version part of the tag - export OPERATOR_VERSION=$(echo "${{ github.ref_name }}" | cut -f 2 -d /) - TAGS="$TAGS -t ${REGISTRY@L}/${{env.IMAGE_NAME}}/operator:${OPERATOR_VERSION} -t ${REGISTRY@L}/${{env.IMAGE_NAME}}/operator:latest" - ;; - *) - echo "Unkown type ${{ github.ref_type }}" - exit 1 - ;; - esac + PLATFORM_TAG=$(echo "${{ matrix.platform }}" | tr '/' '-') + + # Lowercase for Docker compliance + IMAGE_NAME=$(echo "${{env.IMAGE_NAME}}" | tr '[:upper:]' '[:lower:]') + REGISTRY=$(echo "${{env.REGISTRY}}" | tr '[:upper:]' '[:lower:]') + + # Build platform-specific tags for all target tags + TAGS="" + for TAG in ${{ needs.compute-metadata.outputs.tags }}; do + TAGS="$TAGS -t ${REGISTRY}/${IMAGE_NAME}/operator:${TAG}-${PLATFORM_TAG}" + done + set -x docker buildx build \ --build-arg GIT_SHA=${GIT_SHA} \ - --build-arg VERSION=${OPERATOR_VERSION} \ - --build-arg GO_VERSION=${GO_VERSION} \ + --build-arg VERSION=${VERSION} \ + --build-arg GO_VERSION=${{ env.GO_VERSION }} \ --push \ - --platform ${{ env.PLATFORMS }} \ + --platform ${{ matrix.platform }} \ + --provenance=false \ ${TAGS@L} \ --metadata-file=metadata.json \ -f ../containers/operator.Dockerfile . - cat metadata.json + echo "digest=$(cat metadata.json | jq -r .\"containerimage.digest\")" >> $GITHUB_OUTPUT - cat $GITHUB_OUTPUT + + # Create multi-platform manifest from individual architecture builds + create-manifest: + runs-on: ubuntu-latest + needs: [compute-metadata, build-operator] + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Create and push multi-platform manifests, then delete platform-specific tags + - name: Create manifests and cleanup + id: manifest + run: | + sudo apt-get update && sudo apt-get install -y jq + + # Lowercase for Docker compliance + IMAGE_NAME=$(echo "${{env.IMAGE_NAME}}" | tr '[:upper:]' '[:lower:]') + REGISTRY=$(echo "${{env.REGISTRY}}" | tr '[:upper:]' '[:lower:]') + + # Create manifest for each tag combining amd64 and arm64 images + for TAG in ${{ needs.compute-metadata.outputs.tags }}; do + FULL_TAG="${REGISTRY}/${IMAGE_NAME}/operator:${TAG}" + echo "đŸ“Ļ Creating manifest for $FULL_TAG" + docker manifest create $FULL_TAG \ + ${FULL_TAG}-linux-amd64 \ + ${FULL_TAG}-linux-arm64 + docker manifest push $FULL_TAG + echo "✅ Pushed $FULL_TAG" + done + + # Get digest of the main tag (git sha) for attestation + MAIN_TAG="${REGISTRY}/${IMAGE_NAME}/operator:${{ needs.compute-metadata.outputs.git-sha }}" + DIGEST=$(docker manifest inspect $MAIN_TAG | jq -r '.manifests[0].digest') + echo "digest=$DIGEST" >> $GITHUB_OUTPUT + echo "subject-name=${REGISTRY}/${IMAGE_NAME}/operator" >> $GITHUB_OUTPUT + + # Note: Platform-specific tags (e.g., v1.0.0-linux-amd64) are left in registry + # as intermediate artifacts. Users should pull the multi-platform manifest tags. + # GitHub Container Registry doesn't easily support programmatic tag deletion. + echo "✅ Multi-platform manifests created successfully" - # Generate supply chain security attestation + # Generate supply chain security attestation for the multi-platform manifest - name: Generate artifact attestation uses: actions/attest-build-provenance@v2 with: - subject-name: ${{ env.REGISTRY }}/${{env.IMAGE_NAME}}/operator - subject-digest: ${{ steps.build.outputs.digest }} + subject-name: ${{ steps.manifest.outputs.subject-name }} + subject-digest: ${{ steps.manifest.outputs.digest }} push-to-registry: true From 71bc04568de4111cc9e6eb3088b4b9a075891d75 Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Tue, 23 Dec 2025 22:52:55 -0800 Subject: [PATCH 7/8] fix: bad webhook rules --- chart/templates/validating-webhook.yaml | 1 + .../internal/controller/webhook_controller.go | 170 ++++++++++++++---- .../controller/webhook_controller_test.go | 6 +- 3 files changed, 137 insertions(+), 40 deletions(-) diff --git a/chart/templates/validating-webhook.yaml b/chart/templates/validating-webhook.yaml index a061083e..6081dcfb 100644 --- a/chart/templates/validating-webhook.yaml +++ b/chart/templates/validating-webhook.yaml @@ -53,6 +53,7 @@ webhooks: operations: - CREATE - UPDATE + - DELETE resources: - deploymentpolicies scope: '*' diff --git a/operator/internal/controller/webhook_controller.go b/operator/internal/controller/webhook_controller.go index 70a68097..140c1f24 100644 --- a/operator/internal/controller/webhook_controller.go +++ b/operator/internal/controller/webhook_controller.go @@ -42,6 +42,29 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ) +const ( + // Webhook configuration names + validatingWebhookConfigName = "skyhook-operator-validating-webhook" + mutatingWebhookConfigName = "skyhook-operator-mutating-webhook" + + // Webhook names + skyhookValidatingWebhookName = "validate-skyhook.nvidia.com" + deploymentPolicyValidatingWebhookName = "validate-deploymentpolicy.nvidia.com" + skyhookMutatingWebhookName = "mutate-skyhook.nvidia.com" + deploymentPolicyMutatingWebhookName = "mutate-deploymentpolicy.nvidia.com" + + // Webhook paths + skyhookValidatingPath = "/validate-skyhook-nvidia-com-v1alpha1-skyhook" + deploymentPolicyValidatingPath = "/validate-skyhook-nvidia-com-v1alpha1-deploymentpolicy" + skyhookMutatingPath = "/mutate-skyhook-nvidia-com-v1alpha1-skyhook" + deploymentPolicyMutatingPath = "/mutate-skyhook-nvidia-com-v1alpha1-deploymentpolicy" + + // Certificate management + certRotationThreshold = 168 * time.Hour // 7 days + certValidityDurationYear = 365 * 24 * time.Hour // 1 year + expirationAnnotationKey = v1alpha1.METADATA_PREFIX + "/expiration" +) + // This project used to use cert-manager to generate the webhook certificates. // This removes the dependency on cert-manager and simplifies the deployment. // This also removes the need to have a specific issuer, and just uses a self-signed cert. @@ -161,7 +184,7 @@ func (r *WebhookController) GetOrCreateWebhookCertSecret(ctx context.Context, se if err != nil { if errors.IsNotFound(err) { // not found, create it - webhookCert, err := generateCert(r.opts.ServiceName, r.namespace, 365*24*time.Hour) // TODO: this should be configured + webhookCert, err := generateCert(r.opts.ServiceName, r.namespace, certValidityDurationYear) if err != nil { return nil, err } @@ -186,16 +209,18 @@ func (r *WebhookController) GetOrCreateWebhookCertSecret(ctx context.Context, se return secret, nil } +// CheckOrUpdateWebhookCertSecret checks if the webhook secret is going to expire in the next 7 days or if the cert on disk is different from the secret +// if it is, it will generate a new cert and update the secret func (r *WebhookController) CheckOrUpdateWebhookCertSecret(ctx context.Context, secret *corev1.Secret) (bool, error) { equal, err := compareCertOnDiskToSecret(r.certDir, secret) if err != nil { return false, err } - // check if the secret is going to expire in the next 168 hours or if the cert on disk is different from the secret - if !equal || secret.Annotations[fmt.Sprintf("%s/expiration", v1alpha1.METADATA_PREFIX)] < time.Now().Add(168*time.Hour).Format(time.RFC3339) { + // check if the secret is going to expire in the next 7 days or if the cert on disk is different from the secret + if !equal || secret.Annotations[expirationAnnotationKey] < time.Now().Add(certRotationThreshold).Format(time.RFC3339) { // expired, generate a new cert - webhookCert, err := generateCert(r.opts.ServiceName, r.namespace, 365*24*time.Hour) // TODO: this should be configured + webhookCert, err := generateCert(r.opts.ServiceName, r.namespace, certValidityDurationYear) if err != nil { return false, err } @@ -208,7 +233,7 @@ func (r *WebhookController) CheckOrUpdateWebhookCertSecret(ctx context.Context, secret.Data["ca.crt"] = webhookCert.CABytes secret.Data["tls.crt"] = []byte(webhookCert.TLSCert) secret.Data["tls.key"] = []byte(webhookCert.TLSKey) - secret.Annotations[fmt.Sprintf("%s/expiration", v1alpha1.METADATA_PREFIX)] = webhookCert.Expiration.Format(time.RFC3339) + secret.Annotations[expirationAnnotationKey] = webhookCert.Expiration.Format(time.RFC3339) return true, r.Update(ctx, secret) } @@ -216,75 +241,130 @@ func (r *WebhookController) CheckOrUpdateWebhookCertSecret(ctx context.Context, return false, nil } +// CheckOrUpdateWebhookConfigurations checks if the webhook configurations are need to be updated with the new cert +// if it is, it will update the webhook configurations func (r *WebhookController) CheckOrUpdateWebhookConfigurations(ctx context.Context, secret *corev1.Secret) (bool, error) { - // Update only CABundle fields of existing webhook configurations created by Helm caBundle := secret.Data["ca.crt"] - changed := false - // ValidatingWebhookConfiguration - validatingName := webhookValidatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret).GetName() + validatingChanged, err := r.updateValidatingWebhookConfiguration(ctx, caBundle) + if err != nil { + return false, err + } + + mutatingChanged, err := r.updateMutatingWebhookConfiguration(ctx, caBundle) + if err != nil { + return false, err + } + + return validatingChanged || mutatingChanged, nil +} + +// updateValidatingWebhookConfiguration updates the ValidatingWebhookConfiguration with the provided CABundle +func (r *WebhookController) updateValidatingWebhookConfiguration(ctx context.Context, caBundle []byte) (bool, error) { existingValidating := &admissionregistrationv1.ValidatingWebhookConfiguration{} - if err := r.Get(ctx, types.NamespacedName{Name: validatingName}, existingValidating); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: validatingWebhookConfigName}, existingValidating); err != nil { if errors.IsNotFound(err) { - return false, fmt.Errorf("ValidatingWebhookConfiguration %q not found; creation is handled by the Helm chart. Ensure the chart is installed and webhooks are enabled: %w", validatingName, err) + return false, fmt.Errorf("ValidatingWebhookConfiguration %q not found; creation is handled by the Helm chart. Ensure the chart is installed and webhooks are enabled: %w", validatingWebhookConfigName, err) } - return false, fmt.Errorf("failed to get ValidatingWebhookConfiguration %q: %w", validatingName, err) + return false, fmt.Errorf("failed to get ValidatingWebhookConfiguration %q: %w", validatingWebhookConfigName, err) } needUpdate := false - expectedRules := webhookRule() for i := range existingValidating.Webhooks { + expectedRules := r.getValidatingWebhookRules(existingValidating.Webhooks[i].Name) + if expectedRules == nil { + continue // Unknown webhook, skip + } if validatingWebhookNeedsUpdate(&existingValidating.Webhooks[i], caBundle, expectedRules) { needUpdate = true } } + if needUpdate { if err := r.Update(ctx, existingValidating); err != nil { return false, err - } else { - changed = true } + return true, nil } - // MutatingWebhookConfiguration - mutatingName := webhookMutatingWebhookConfiguration(r.namespace, r.opts.ServiceName, secret).GetName() + return false, nil +} + +// updateMutatingWebhookConfiguration updates the MutatingWebhookConfiguration with the provided CABundle +func (r *WebhookController) updateMutatingWebhookConfiguration(ctx context.Context, caBundle []byte) (bool, error) { existingMutating := &admissionregistrationv1.MutatingWebhookConfiguration{} - if err := r.Get(ctx, types.NamespacedName{Name: mutatingName}, existingMutating); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: mutatingWebhookConfigName}, existingMutating); err != nil { if errors.IsNotFound(err) { - return changed, fmt.Errorf("MutatingWebhookConfiguration %q not found; creation is handled by the Helm chart. Ensure the chart is installed and webhooks are enabled: %w", mutatingName, err) + return false, fmt.Errorf("MutatingWebhookConfiguration %q not found; creation is handled by the Helm chart. Ensure the chart is installed and webhooks are enabled: %w", mutatingWebhookConfigName, err) } - return false, fmt.Errorf("failed to get MutatingWebhookConfiguration %q: %w", mutatingName, err) + return false, fmt.Errorf("failed to get MutatingWebhookConfiguration %q: %w", mutatingWebhookConfigName, err) } - needUpdate = false + needUpdate := false for i := range existingMutating.Webhooks { + expectedRules := r.getMutatingWebhookRules(existingMutating.Webhooks[i].Name) + if expectedRules == nil { + continue // Unknown webhook, skip + } if mutatingWebhookNeedsUpdate(&existingMutating.Webhooks[i], caBundle, expectedRules) { needUpdate = true } } + if needUpdate { if err := r.Update(ctx, existingMutating); err != nil { return false, err - } else { - changed = true } + return true, nil } - return changed, nil + return false, nil +} + +// getValidatingWebhookRules returns the expected rules for a validating webhook by name +func (r *WebhookController) getValidatingWebhookRules(webhookName string) []admissionregistrationv1.RuleWithOperations { + switch webhookName { + case skyhookValidatingWebhookName: + return skyhookRules() + case deploymentPolicyValidatingWebhookName: + return deploymentPolicyValidatingRules() + default: + return nil + } +} + +// getMutatingWebhookRules returns the expected rules for a mutating webhook by name +func (r *WebhookController) getMutatingWebhookRules(webhookName string) []admissionregistrationv1.RuleWithOperations { + switch webhookName { + case skyhookMutatingWebhookName: + return skyhookRules() + case deploymentPolicyMutatingWebhookName: + return deploymentPolicyMutatingRules() + default: + return nil + } } // webhookValidatingWebhookConfiguration returns a new validating webhook configuration. func webhookValidatingWebhookConfiguration(namespace, serviceName string, secret *corev1.Secret) *admissionregistrationv1.ValidatingWebhookConfiguration { conf := admissionregistrationv1.ValidatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ - Name: "skyhook-operator-validating-webhook", + Name: validatingWebhookConfigName, }, Webhooks: []admissionregistrationv1.ValidatingWebhook{ { - Name: "validate-skyhook.nvidia.com", - ClientConfig: webhookClient(serviceName, namespace, "/validate-skyhook-nvidia-com-v1alpha1-skyhook", secret), + Name: skyhookValidatingWebhookName, + ClientConfig: webhookClient(serviceName, namespace, skyhookValidatingPath, secret), FailurePolicy: ptr(admissionregistrationv1.Fail), - Rules: webhookRule(), + Rules: skyhookRules(), + SideEffects: ptr(admissionregistrationv1.SideEffectClassNone), + AdmissionReviewVersions: []string{"v1"}, + }, + { + Name: deploymentPolicyValidatingWebhookName, + ClientConfig: webhookClient(serviceName, namespace, deploymentPolicyValidatingPath, secret), + FailurePolicy: ptr(admissionregistrationv1.Fail), + Rules: deploymentPolicyValidatingRules(), SideEffects: ptr(admissionregistrationv1.SideEffectClassNone), AdmissionReviewVersions: []string{"v1"}, }, @@ -298,22 +378,22 @@ func webhookValidatingWebhookConfiguration(namespace, serviceName string, secret func webhookMutatingWebhookConfiguration(namespace, serviceName string, secret *corev1.Secret) *admissionregistrationv1.MutatingWebhookConfiguration { conf := admissionregistrationv1.MutatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ - Name: "skyhook-operator-mutating-webhook", + Name: mutatingWebhookConfigName, }, Webhooks: []admissionregistrationv1.MutatingWebhook{ { - Name: "mutate-skyhook.nvidia.com", - ClientConfig: webhookClient(serviceName, namespace, "/mutate-skyhook-nvidia-com-v1alpha1-skyhook", secret), + Name: skyhookMutatingWebhookName, + ClientConfig: webhookClient(serviceName, namespace, skyhookMutatingPath, secret), FailurePolicy: ptr(admissionregistrationv1.Fail), - Rules: webhookRule(), + Rules: skyhookRules(), SideEffects: ptr(admissionregistrationv1.SideEffectClassNone), AdmissionReviewVersions: []string{"v1"}, }, { - Name: "mutate-deploymentpolicy.nvidia.com", - ClientConfig: webhookClient(serviceName, namespace, "/mutate-skyhook-nvidia-com-v1alpha1-deploymentpolicy", secret), + Name: deploymentPolicyMutatingWebhookName, + ClientConfig: webhookClient(serviceName, namespace, deploymentPolicyMutatingPath, secret), FailurePolicy: ptr(admissionregistrationv1.Fail), - Rules: webhookRule(), + Rules: deploymentPolicyMutatingRules(), SideEffects: ptr(admissionregistrationv1.SideEffectClassNone), AdmissionReviewVersions: []string{"v1"}, }, @@ -358,7 +438,7 @@ func webhookClient(serviceName, namespace, path string, secret *corev1.Secret) a } } -func webhookRule() []admissionregistrationv1.RuleWithOperations { +func skyhookRules() []admissionregistrationv1.RuleWithOperations { return []admissionregistrationv1.RuleWithOperations{ { Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create, admissionregistrationv1.Update}, @@ -368,8 +448,24 @@ func webhookRule() []admissionregistrationv1.RuleWithOperations { Resources: []string{"skyhooks"}, }, }, + } +} + +// deploymentPolicyValidatingRules adds the delete operation to the mutating webhook rules, otherwise they are the same +func deploymentPolicyValidatingRules() []admissionregistrationv1.RuleWithOperations { + mutrules := deploymentPolicyMutatingRules() + oprs := mutrules[0].Operations + newops := make([]admissionregistrationv1.OperationType, len(oprs)) + copy(newops, oprs) + newops = append(newops, admissionregistrationv1.Delete) + mutrules[0].Operations = newops + return mutrules +} + +func deploymentPolicyMutatingRules() []admissionregistrationv1.RuleWithOperations { + return []admissionregistrationv1.RuleWithOperations{ { - Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create, admissionregistrationv1.Update, admissionregistrationv1.Delete}, + Operations: []admissionregistrationv1.OperationType{admissionregistrationv1.Create, admissionregistrationv1.Update}, Rule: admissionregistrationv1.Rule{ APIGroups: []string{v1alpha1.GroupVersion.Group}, APIVersions: []string{v1alpha1.GroupVersion.Version}, diff --git a/operator/internal/controller/webhook_controller_test.go b/operator/internal/controller/webhook_controller_test.go index 52a1ae2d..370924a2 100644 --- a/operator/internal/controller/webhook_controller_test.go +++ b/operator/internal/controller/webhook_controller_test.go @@ -254,7 +254,7 @@ var _ = Describe("WebhookController", Ordered, func() { } caBundle := []byte("new-ca") - expectedRules := webhookRule() + expectedRules := skyhookRules() needsUpdate := validatingWebhookNeedsUpdate(&webhook, caBundle, expectedRules) Expect(needsUpdate).To(BeTrue(), "should detect rules mismatch") @@ -262,7 +262,7 @@ var _ = Describe("WebhookController", Ordered, func() { }) It("should not update when rules are identical", func() { - expectedRules := webhookRule() + expectedRules := skyhookRules() webhook := admissionregistrationv1.ValidatingWebhook{ ClientConfig: admissionregistrationv1.WebhookClientConfig{ @@ -278,7 +278,7 @@ var _ = Describe("WebhookController", Ordered, func() { }) It("should update CABundle when empty", func() { - expectedRules := webhookRule() + expectedRules := skyhookRules() webhook := admissionregistrationv1.MutatingWebhook{ ClientConfig: admissionregistrationv1.WebhookClientConfig{ From b010dd5f2be9f3fe7087ce3f033ca8e6c192649f Mon Sep 17 00:00:00 2001 From: Brian Lockwood Date: Wed, 24 Dec 2025 13:18:39 -0800 Subject: [PATCH 8/8] feat: added new tests to cover the new webhook setup --- .../helm/helm-webhook-test/chainsaw-test.yaml | 82 ++++++++++++++++++- .../skyhook-missing-policy.yaml | 27 ++++++ .../skyhook-valid-policy.yaml | 27 ++++++ .../valid-deploymentpolicy.yaml | 27 ++++++ .../helm/helm-webhook-test/values.yaml | 2 +- 5 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-missing-policy.yaml create mode 100644 k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-valid-policy.yaml create mode 100644 k8s-tests/chainsaw/helm/helm-webhook-test/valid-deploymentpolicy.yaml diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/chainsaw-test.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/chainsaw-test.yaml index e1bd65b9..2f6a68a7 100644 --- a/k8s-tests/chainsaw/helm/helm-webhook-test/chainsaw-test.yaml +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/chainsaw-test.yaml @@ -20,7 +20,15 @@ kind: Test metadata: name: helm-chart-webhook spec: - description: This test asserts that the helm chart is working as expected. Specifically it asserts that webhooks work as expected. Validating an invalid skyhook should fail. Validating an invalid deployment policy should fail. + description: | + Test webhook validation for Skyhook and DeploymentPolicy resources: + - Invalid Skyhook (bad dependencies) should be rejected + - Invalid DeploymentPolicy (bad config) should be rejected + - Skyhook with non-existent policy reference should be rejected + - Skyhook with valid policy reference should be accepted + - Updating Skyhook to non-existent policy should be rejected + - Deleting DeploymentPolicy in use should be rejected + - Deleting DeploymentPolicy after Skyhook removed should succeed concurrent: false timeouts: assert: 180s @@ -67,6 +75,78 @@ spec: cat err.txt exit 1 fi + + # Test: Create Skyhook with non-existent DeploymentPolicy (should fail) + - script: + content: | + echo "Testing: Create Skyhook with non-existent DeploymentPolicy..." + kubectl apply -f skyhook-missing-policy.yaml 2>err.txt + ec=$? + cat err.txt + if [ $ec -eq 0 ]; then + echo "ERROR: Skyhook with non-existent policy was accepted" + exit 1 + fi + if ! grep -q "deploymentPolicy \"non-existent-policy\" not found" err.txt; then + echo "ERROR: Did not get expected policy not found error" + cat err.txt + exit 1 + fi + echo "✅ Correctly rejected Skyhook with missing policy" + + # Test: Create valid DeploymentPolicy + - apply: + file: valid-deploymentpolicy.yaml + + # Test: Create Skyhook with existing DeploymentPolicy (should succeed) + - apply: + file: skyhook-valid-policy.yaml + + # Test: Update Skyhook to reference non-existent policy (should fail) + - script: + content: | + echo "Testing: Update Skyhook to reference non-existent policy..." + kubectl patch skyhook skyhook-valid-policy --type=merge -p '{"spec":{"deploymentPolicy":"does-not-exist"}}' 2>err.txt + ec=$? + cat err.txt + if [ $ec -eq 0 ]; then + echo "ERROR: Update to non-existent policy was accepted" + exit 1 + fi + if ! grep -q "deploymentPolicy \"does-not-exist\" not found" err.txt; then + echo "ERROR: Did not get expected policy not found error" + cat err.txt + exit 1 + fi + echo "✅ Correctly rejected update to missing policy" + + # Test: Delete DeploymentPolicy while Skyhook references it (should fail) + - script: + content: | + echo "Testing: Delete DeploymentPolicy while in use..." + kubectl delete deploymentpolicy test-policy 2>err.txt + ec=$? + cat err.txt + if [ $ec -eq 0 ]; then + echo "ERROR: Policy deletion was accepted while in use" + exit 1 + fi + if ! grep -q "still referenced by" err.txt; then + echo "ERROR: Did not get expected 'in use' error" + cat err.txt + exit 1 + fi + echo "✅ Correctly rejected deletion of policy in use" + + # Test: Delete Skyhook, then delete policy (should succeed) + - script: + content: | + echo "Testing: Delete Skyhook then policy..." + kubectl delete skyhook skyhook-valid-policy + echo "✅ Skyhook deleted" + kubectl delete deploymentpolicy test-policy + echo "✅ Policy deleted after Skyhook removed" + - assert: file: assert-webhook.yaml - script: diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-missing-policy.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-missing-policy.yaml new file mode 100644 index 00000000..f670e358 --- /dev/null +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-missing-policy.yaml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: Skyhook +metadata: + name: skyhook-missing-policy +spec: + deploymentPolicy: non-existent-policy + packages: + test-pkg: + version: "1.2.3" + image: ghcr.io/nvidia/skyhook/agentless + diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-valid-policy.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-valid-policy.yaml new file mode 100644 index 00000000..88c2e978 --- /dev/null +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/skyhook-valid-policy.yaml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: Skyhook +metadata: + name: skyhook-valid-policy +spec: + deploymentPolicy: test-policy + packages: + test-pkg: + version: "1.2.3" + image: ghcr.io/nvidia/skyhook/agentless + diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/valid-deploymentpolicy.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/valid-deploymentpolicy.yaml new file mode 100644 index 00000000..e6f7cfd1 --- /dev/null +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/valid-deploymentpolicy.yaml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: DeploymentPolicy +metadata: + name: test-policy +spec: + default: + budget: + percent: 100 + strategy: + fixed: {} + diff --git a/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml b/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml index c9a2158a..98ece9d8 100644 --- a/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml +++ b/k8s-tests/chainsaw/helm/helm-webhook-test/values.yaml @@ -20,7 +20,7 @@ controllerManager: # for more info refer to the README image: repository: ghcr.io/nvidia/skyhook/operator - tag: v0.9.0-76cb952 + tag: v0.10.0-c55c93d7 digest: "" webhook: enable: true