From fa8dbd5762fdfd50b13ae8c3e9d15196b1b264f5 Mon Sep 17 00:00:00 2001 From: Sascha Schwarze Date: Fri, 19 Jul 2024 14:25:17 +0200 Subject: [PATCH] Add feature flag treat-pod-as-always-schedulable The feature flag allows to declare that Pods in the system will eventually all get scheduled and Revisions should therefore not be marked unschedulable Signed-off-by: Sascha Schwarze --- config/core/configmaps/features.yaml | 11 +++++- pkg/apis/config/features.go | 3 ++ pkg/apis/config/features_test.go | 29 +++++++++++++++ .../revision/reconcile_resources.go | 12 ++++--- pkg/reconciler/revision/table_test.go | 35 ++++++++++++++++++- 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/config/core/configmaps/features.yaml b/config/core/configmaps/features.yaml index e88b945ddc9d..6061613d7be8 100644 --- a/config/core/configmaps/features.yaml +++ b/config/core/configmaps/features.yaml @@ -22,7 +22,7 @@ metadata: app.kubernetes.io/component: controller app.kubernetes.io/version: devel annotations: - knative.dev/example-checksum: "9553535b" + knative.dev/example-checksum: "beb98480" data: _example: |- ################################ @@ -251,3 +251,12 @@ data: # Default queue proxy resource requests and limits to good values for most cases if set. queueproxy.resource-defaults: "disabled" + + # treat-pod-as-always-schedulable can be used to define that Pods in the system will always be + # scheduled, and a Revision should not be marked unschedulable. + # Setting this to `enabled` makes sense if you have cluster-autoscaling set up for you cluster + # where unschedulable Pods trigger the addition of a new Node and are therefore a short and + # transient state. + # + # See https://github.com/knative/serving/issues/14862 + treat-pod-as-always-schedulable: "disabled" diff --git a/pkg/apis/config/features.go b/pkg/apis/config/features.go index d2d1ab22f7d6..46054e39063b 100644 --- a/pkg/apis/config/features.go +++ b/pkg/apis/config/features.go @@ -113,6 +113,7 @@ func defaultFeaturesConfig() *Features { SecurePodDefaults: Disabled, TagHeaderBasedRouting: Disabled, AutoDetectHTTP2: Disabled, + TreatPodAsAlwaysSchedulable: Disabled, } } @@ -129,6 +130,7 @@ func NewFeaturesConfigFromMap(data map[string]string) (*Features, error) { asFlag("queueproxy.resource-defaults", &nc.QueueProxyResourceDefaults), asSecurePodDefaultsFlag("secure-pod-defaults", &nc.SecurePodDefaults), asFlag("tag-header-based-routing", &nc.TagHeaderBasedRouting), + asFlag("treat-pod-as-always-schedulable", &nc.TreatPodAsAlwaysSchedulable), asFlag(FeatureContainerSpecAddCapabilities, &nc.ContainerSpecAddCapabilities), asFlag(FeaturePodSpecAffinity, &nc.PodSpecAffinity), asFlag(FeaturePodSpecDNSConfig, &nc.PodSpecDNSConfig), @@ -198,6 +200,7 @@ type Features struct { SecurePodDefaults Flag TagHeaderBasedRouting Flag AutoDetectHTTP2 Flag + TreatPodAsAlwaysSchedulable Flag } // asFlag parses the value at key as a Flag into the target, if it exists. diff --git a/pkg/apis/config/features_test.go b/pkg/apis/config/features_test.go index 49135294b19a..dc2b44786994 100644 --- a/pkg/apis/config/features_test.go +++ b/pkg/apis/config/features_test.go @@ -79,6 +79,7 @@ func TestFeaturesConfiguration(t *testing.T) { SecurePodDefaults: Enabled, QueueProxyResourceDefaults: Enabled, TagHeaderBasedRouting: Enabled, + TreatPodAsAlwaysSchedulable: Enabled, }), data: map[string]string{ "multi-container": "Enabled", @@ -101,6 +102,7 @@ func TestFeaturesConfiguration(t *testing.T) { "secure-pod-defaults": "Enabled", "queueproxy.resource-defaults": "Enabled", "tag-header-based-routing": "Enabled", + "treat-pod-as-always-schedulable": "Enabled", }, }, { name: "multi-container Allowed", @@ -759,6 +761,33 @@ func TestFeaturesConfiguration(t *testing.T) { data: map[string]string{ "tag-header-based-routing": "Restricted", }, + }, { + name: "treat-pod-as-always-schedulable Allowed", + wantErr: false, + wantFeatures: defaultWith(&Features{ + TreatPodAsAlwaysSchedulable: Allowed, + }), + data: map[string]string{ + "treat-pod-as-always-schedulable": "Allowed", + }, + }, { + name: "treat-pod-as-always-schedulable Enabled", + wantErr: false, + wantFeatures: defaultWith(&Features{ + TreatPodAsAlwaysSchedulable: Enabled, + }), + data: map[string]string{ + "treat-pod-as-always-schedulable": "Enabled", + }, + }, { + name: "treat-pod-as-always-schedulable Disabled", + wantErr: false, + wantFeatures: defaultWith(&Features{ + TreatPodAsAlwaysSchedulable: Disabled, + }), + data: map[string]string{ + "treat-pod-as-always-schedulable": "Disabled", + }, }} for _, tt := range configTests { diff --git a/pkg/reconciler/revision/reconcile_resources.go b/pkg/reconciler/revision/reconcile_resources.go index cdb90c553794..83a761e3844f 100644 --- a/pkg/reconciler/revision/reconcile_resources.go +++ b/pkg/reconciler/revision/reconcile_resources.go @@ -36,6 +36,7 @@ import ( "knative.dev/pkg/kmp" "knative.dev/pkg/logging" "knative.dev/pkg/logging/logkey" + apicfg "knative.dev/serving/pkg/apis/config" v1 "knative.dev/serving/pkg/apis/serving/v1" "knative.dev/serving/pkg/networking" "knative.dev/serving/pkg/reconciler/revision/config" @@ -90,10 +91,13 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision) // Update the revision status if pod cannot be scheduled (possibly resource constraints) // If pod cannot be scheduled then we expect the container status to be empty. - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { - rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message) - break + treatPodAsAlwaysSchedulable := config.FromContext(ctx).Features.TreatPodAsAlwaysSchedulable + if treatPodAsAlwaysSchedulable != apicfg.Enabled { + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { + rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message) + break + } } } diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go index 016828a5b081..45557feb79be 100644 --- a/pkg/reconciler/revision/table_test.go +++ b/pkg/reconciler/revision/table_test.go @@ -616,6 +616,32 @@ func TestReconcile(t *testing.T) { Object: pa("foo", "pod-schedule-error", WithReachabilityUnreachable), }}, Key: "foo/pod-schedule-error", + }, { + Name: "surface no pod schedule errors if treat-pod-as-always-schedulable is enabled", + // Test the propagation of the scheduling errors of Pod into the + // revision is not happening when treat-pod-as-always-schedulable + // is enabled. + Objects: []runtime.Object{ + Revision("foo", "pod-no-schedule-error", + WithLogURL, + MarkActivating("Deploying", ""), + WithRoutingState(v1.RoutingStateActive, fc), + withDefaultContainerStatuses(), + MarkDeploying("Deploying"), + WithRevisionObservedGeneration(1), + MarkContainerHealthyUnknown("Deploying"), + ), + pa("foo", "pod-no-schedule-error", WithReachabilityReachable), // PA can't be ready, since no traffic. + pod(t, "foo", "pod-no-schedule-error", WithUnschedulableContainer("Insufficient energy", "Unschedulable")), + deploy(t, "foo", "pod-no-schedule-error"), + image("foo", "pod-no-schedule-error"), + }, + Ctx: defaultconfig.ToContext(context.Background(), &defaultconfig.Config{ + Features: &defaultconfig.Features{ + TreatPodAsAlwaysSchedulable: defaultconfig.Enabled, + }, + }), + Key: "foo/pod-no-schedule-error", }, { Name: "ready steady state", // Test the transition that Reconcile makes when Endpoints become ready on the @@ -813,11 +839,18 @@ func TestReconcile(t *testing.T) { resolver: &nopResolver{}, } + cfg := reconcilerTestConfig() + + apiCfgOverride := defaultconfig.FromContext(ctx) + if apiCfgOverride != nil { + cfg.Config.Features = apiCfgOverride.Features + } + return revisionreconciler.NewReconciler(ctx, logging.FromContext(ctx), servingclient.Get(ctx), listers.GetRevisionLister(), controller.GetEventRecorder(ctx), r, controller.Options{ ConfigStore: &testConfigStore{ - config: reconcilerTestConfig(), + config: cfg, }, }) }))