Skip to content

Commit 7e8cf0b

Browse files
authored
Merge pull request #8614 from shaikenov/shaikenov-scaledown-unprocessed-node-tracking
feat(nodeScaleDownTime): add a new metric to track unprocessed nodes during scaleDown
2 parents 70caf22 + e86caa9 commit 7e8cf0b

File tree

6 files changed

+187
-0
lines changed

6 files changed

+187
-0
lines changed

cluster-autoscaler/config/autoscaling_options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ type AutoscalingOptions struct {
354354
CapacitybufferControllerEnabled bool
355355
// CapacitybufferPodInjectionEnabled tells if CA should injects fake pods for capacity buffers that are ready for provisioning
356356
CapacitybufferPodInjectionEnabled bool
357+
// MaxNodeSkipEvalTimeTrackerEnabled is used to enabled/disable the tracking of maximum evaluation time of a node being skipped during ScaleDown.
358+
MaxNodeSkipEvalTimeTrackerEnabled bool
357359
}
358360

359361
// KubeClientOptions specify options for kube client

cluster-autoscaler/config/flags/flags.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ var (
232232
nodeDeletionCandidateTTL = flag.Duration("node-deletion-candidate-ttl", time.Duration(0), "Maximum time a node can be marked as removable before the marking becomes stale. This sets the TTL of Cluster-Autoscaler's state if the Cluste-Autoscaler deployment becomes inactive")
233233
capacitybufferControllerEnabled = flag.Bool("capacity-buffer-controller-enabled", false, "Whether to enable the default controller for capacity buffers or not")
234234
capacitybufferPodInjectionEnabled = flag.Bool("capacity-buffer-pod-injection-enabled", false, "Whether to enable pod list processor that processes ready capacity buffers and injects fake pods accordingly")
235+
maxNodeSkipEvalTimeTrackerEnabled = flag.Bool("max-node-skip-eval-time-tracker-enabled", false, "Whether to enable the tracking of the maximum time of node being skipped during ScaleDown")
235236

236237
// Deprecated flags
237238
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)")
@@ -422,6 +423,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
422423
NodeDeletionCandidateTTL: *nodeDeletionCandidateTTL,
423424
CapacitybufferControllerEnabled: *capacitybufferControllerEnabled,
424425
CapacitybufferPodInjectionEnabled: *capacitybufferPodInjectionEnabled,
426+
MaxNodeSkipEvalTimeTrackerEnabled: *maxNodeSkipEvalTimeTrackerEnabled,
425427
}
426428
}
427429

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nodeevaltracker
18+
19+
import (
20+
"time"
21+
22+
"k8s.io/autoscaler/cluster-autoscaler/metrics"
23+
)
24+
25+
// MaxNodeSkipEvalTime is a time tracker for the biggest evaluation time of a node during ScaleDown
26+
type MaxNodeSkipEvalTime struct {
27+
// lastEvalTime is the time of previous currentlyUnneededNodeNames parsing
28+
lastEvalTime time.Time
29+
// nodeNamesWithTimeStamps is maps of nodeNames with their time of last successful evaluation
30+
nodeNamesWithTimeStamps map[string]time.Time
31+
}
32+
33+
// NewMaxNodeSkipEvalTime returns LongestNodeScaleDownEvalTime with lastEvalTime set to currentTime
34+
func NewMaxNodeSkipEvalTime(currentTime time.Time) *MaxNodeSkipEvalTime {
35+
return &MaxNodeSkipEvalTime{lastEvalTime: currentTime}
36+
}
37+
38+
// Retrieves the time of the last evaluation of a node.
39+
func (l *MaxNodeSkipEvalTime) get(nodeName string) time.Time {
40+
if _, ok := l.nodeNamesWithTimeStamps[nodeName]; ok {
41+
return l.nodeNamesWithTimeStamps[nodeName]
42+
}
43+
return l.lastEvalTime
44+
}
45+
46+
// getMin() returns the minimum time in nodeNamesWithTimeStamps or time of last evaluation
47+
func (l *MaxNodeSkipEvalTime) getMin() time.Time {
48+
minimumTime := l.lastEvalTime
49+
for _, val := range l.nodeNamesWithTimeStamps {
50+
if minimumTime.After(val) {
51+
minimumTime = val
52+
}
53+
}
54+
return minimumTime
55+
}
56+
57+
// Update returns the longest evaluation time for the nodes in nodeNamesWithTimeStamps
58+
// and changes nodeNamesWithTimeStamps for nodeNames.
59+
func (l *MaxNodeSkipEvalTime) Update(nodeNames []string, currentTime time.Time) time.Duration {
60+
newNodes := make(map[string]time.Time)
61+
for _, nodeName := range nodeNames {
62+
newNodes[nodeName] = l.get(nodeName)
63+
}
64+
l.nodeNamesWithTimeStamps = newNodes
65+
l.lastEvalTime = currentTime
66+
minimumTime := l.getMin()
67+
longestDuration := currentTime.Sub(minimumTime)
68+
metrics.ObserveMaxNodeSkipEvalDurationSeconds(longestDuration)
69+
return longestDuration
70+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package nodeevaltracker
18+
19+
import (
20+
"testing"
21+
"time"
22+
23+
"github.com/stretchr/testify/assert"
24+
)
25+
26+
func TestMaxNodeSkipEvalTime(t *testing.T) {
27+
type testCase struct {
28+
name string
29+
unprocessedNodes [][]string
30+
wantMaxSkipEvalTimeSeconds []int
31+
}
32+
start := time.Now()
33+
testCases := []testCase{
34+
{
35+
name: "Only one node is skipped in one iteration",
36+
unprocessedNodes: [][]string{{}, {"n1"}, {}, {}},
37+
wantMaxSkipEvalTimeSeconds: []int{0, 1, 0, 0},
38+
},
39+
{
40+
name: "No nodes are skipped in the first iteration",
41+
unprocessedNodes: [][]string{{}, {"n1", "n2"}, {"n2", "n3"}, {}},
42+
wantMaxSkipEvalTimeSeconds: []int{0, 1, 2, 0},
43+
},
44+
{
45+
name: "Some nodes are skipped in the first iteration",
46+
unprocessedNodes: [][]string{{"n1", "n2"}, {"n1", "n2"}, {"n2", "n3"}, {}},
47+
wantMaxSkipEvalTimeSeconds: []int{1, 2, 3, 0},
48+
},
49+
{
50+
name: "Overlapping node sets are skipped in different iteration",
51+
unprocessedNodes: [][]string{{}, {"n1", "n2"}, {"n1"}, {"n2"}, {}},
52+
wantMaxSkipEvalTimeSeconds: []int{0, 1, 2, 1, 0},
53+
},
54+
{
55+
name: "Disjoint node sets are skipped in each iteration",
56+
unprocessedNodes: [][]string{{"n1"}, {"n2"}, {"n3"}, {"n4"}, {}},
57+
wantMaxSkipEvalTimeSeconds: []int{1, 1, 1, 1, 0},
58+
},
59+
{
60+
name: "None of the nodes are skipped in each iteration",
61+
unprocessedNodes: [][]string{{}, {}, {}},
62+
wantMaxSkipEvalTimeSeconds: []int{0, 0, 0},
63+
},
64+
}
65+
for _, tc := range testCases {
66+
t.Run(tc.name, func(t *testing.T) {
67+
t.Parallel()
68+
timestamp := start
69+
maxNodeSkipEvalTime := NewMaxNodeSkipEvalTime(start)
70+
for i := 0; i < len(tc.unprocessedNodes); i++ {
71+
timestamp = timestamp.Add(1 * time.Second)
72+
assert.Equal(t, time.Duration(tc.wantMaxSkipEvalTimeSeconds[i])*time.Second, maxNodeSkipEvalTime.Update(tc.unprocessedNodes[i], timestamp))
73+
assert.Equal(t, len(tc.unprocessedNodes[i]), len(maxNodeSkipEvalTime.nodeNamesWithTimeStamps))
74+
}
75+
})
76+
}
77+
}

cluster-autoscaler/core/scaledown/planner/planner.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
ca_context "k8s.io/autoscaler/cluster-autoscaler/context"
2727
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown"
2828
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/eligibility"
29+
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/nodeevaltracker"
2930
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb"
3031
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource"
3132
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unneeded"
@@ -76,6 +77,7 @@ type Planner struct {
7677
cc controllerReplicasCalculator
7778
scaleDownSetProcessor nodes.ScaleDownSetProcessor
7879
scaleDownContext *nodes.ScaleDownContext
80+
maxNodeSkipEvalTime *nodeevaltracker.MaxNodeSkipEvalTime
7981
}
8082

8183
// New creates a new Planner object.
@@ -91,6 +93,11 @@ func New(autoscalingCtx *ca_context.AutoscalingContext, processors *processors.A
9193
unneededNodes.LoadFromExistingTaints(autoscalingCtx.ListerRegistry, time.Now(), autoscalingCtx.AutoscalingOptions.NodeDeletionCandidateTTL)
9294
}
9395

96+
var maxNodeSkipEvalTime *nodeevaltracker.MaxNodeSkipEvalTime
97+
if autoscalingCtx.AutoscalingOptions.MaxNodeSkipEvalTimeTrackerEnabled {
98+
maxNodeSkipEvalTime = nodeevaltracker.NewMaxNodeSkipEvalTime(time.Now())
99+
}
100+
94101
return &Planner{
95102
autoscalingCtx: autoscalingCtx,
96103
unremovableNodes: unremovable.NewNodes(),
@@ -104,6 +111,7 @@ func New(autoscalingCtx *ca_context.AutoscalingContext, processors *processors.A
104111
scaleDownSetProcessor: processors.ScaleDownSetProcessor,
105112
scaleDownContext: nodes.NewDefaultScaleDownContext(),
106113
minUpdateInterval: minUpdateInterval,
114+
maxNodeSkipEvalTime: maxNodeSkipEvalTime,
107115
}
108116
}
109117

@@ -277,13 +285,16 @@ func (p *Planner) categorizeNodes(podDestinations map[string]bool, scaleDownCand
277285
}
278286
p.nodeUtilizationMap = utilizationMap
279287
timer := time.NewTimer(p.autoscalingCtx.ScaleDownSimulationTimeout)
288+
var skippedNodes []string
280289

281290
for i, node := range currentlyUnneededNodeNames {
282291
if timedOut(timer) {
292+
skippedNodes = currentlyUnneededNodeNames[i:]
283293
klog.Warningf("%d out of %d nodes skipped in scale down simulation due to timeout.", len(currentlyUnneededNodeNames)-i, len(currentlyUnneededNodeNames))
284294
break
285295
}
286296
if len(removableList)-atomicScaleDownNodesCount >= p.unneededNodesLimit() {
297+
skippedNodes = currentlyUnneededNodeNames[i:]
287298
klog.V(4).Infof("%d out of %d nodes skipped in scale down simulation: there are already %d unneeded nodes so no point in looking for more. Total atomic scale down nodes: %d", len(currentlyUnneededNodeNames)-i, len(currentlyUnneededNodeNames), len(removableList), atomicScaleDownNodesCount)
288299
break
289300
}
@@ -306,6 +317,7 @@ func (p *Planner) categorizeNodes(podDestinations map[string]bool, scaleDownCand
306317
p.unremovableNodes.AddTimeout(unremovable, unremovableTimeout)
307318
}
308319
}
320+
p.handleUnprocessedNodes(skippedNodes)
309321
p.unneededNodes.Update(removableList, p.latestUpdate)
310322
if unremovableCount > 0 {
311323
klog.V(1).Infof("%v nodes found to be unremovable in simulation, will re-check them at %v", unremovableCount, unremovableTimeout)
@@ -372,6 +384,15 @@ func (p *Planner) unneededNodesLimit() int {
372384
return limit
373385
}
374386

387+
// handleUnprocessedNodes is used to track the longest time that a node is being skipped during ScaleDown
388+
func (p *Planner) handleUnprocessedNodes(unprocessedNodeNames []string) {
389+
// if p.maxNodeSkipEvalTime is nil (flag is disabled) do not do anything
390+
if p.maxNodeSkipEvalTime == nil {
391+
return
392+
}
393+
p.maxNodeSkipEvalTime.Update(unprocessedNodeNames, time.Now())
394+
}
395+
375396
// getKnownOwnerRef returns ownerRef that is known by CA and CA knows the logic of how this controller recreates pods.
376397
func getKnownOwnerRef(ownerRefs []metav1.OwnerReference) *metav1.OwnerReference {
377398
for _, ownerRef := range ownerRefs {

cluster-autoscaler/metrics/metrics.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,14 @@ var (
425425
Buckets: k8smetrics.ExponentialBuckets(1, 2, 6), // 1, 2, 4, ..., 32
426426
}, []string{"instance_type", "cpu_count", "namespace_count"},
427427
)
428+
429+
maxNodeSkipEvalDurationSeconds = k8smetrics.NewGauge(
430+
&k8smetrics.GaugeOpts{
431+
Namespace: caNamespace,
432+
Name: "max_node_skip_eval_duration_seconds",
433+
Help: "Maximum evaluation time of a node being skipped during ScaleDown.",
434+
},
435+
)
428436
)
429437

430438
// RegisterAll registers all metrics.
@@ -461,6 +469,7 @@ func RegisterAll(emitPerNodeGroupMetrics bool) {
461469
legacyregistry.MustRegister(nodeTaintsCount)
462470
legacyregistry.MustRegister(inconsistentInstancesMigsCount)
463471
legacyregistry.MustRegister(binpackingHeterogeneity)
472+
legacyregistry.MustRegister(maxNodeSkipEvalDurationSeconds)
464473

465474
if emitPerNodeGroupMetrics {
466475
legacyregistry.MustRegister(nodesGroupMinNodes)
@@ -748,3 +757,9 @@ func UpdateInconsistentInstancesMigsCount(migCount int) {
748757
func ObserveBinpackingHeterogeneity(instanceType, cpuCount, namespaceCount string, pegCount int) {
749758
binpackingHeterogeneity.WithLabelValues(instanceType, cpuCount, namespaceCount).Observe(float64(pegCount))
750759
}
760+
761+
// ObserveMaxNodeSkipEvalDurationSeconds records the longest time during which node was skipped during ScaleDown.
762+
// If a node is skipped multiple times consecutively, we store only the earliest timestamp.
763+
func ObserveMaxNodeSkipEvalDurationSeconds(duration time.Duration) {
764+
maxNodeSkipEvalDurationSeconds.Set(duration.Seconds())
765+
}

0 commit comments

Comments
 (0)