diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index 36969df87204..9ac790d063e9 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -378,6 +378,10 @@ For example, for a node label of `foo=bar`, you would tag the ASG with: } ``` +When a node group is scaled down to 0, Cluster Autoscaler also removes any cached +node information for that group. The template will be regenerated once new nodes +are created. + ### How can I prevent Cluster Autoscaler from scaling down a particular node? From CA 1.0, node will be excluded from scale-down if it has the diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go index 3801c2967bb7..616de295ed51 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go @@ -117,6 +117,27 @@ func (p *MixedTemplateNodeInfoProvider) Process(ctx *context.AutoscalingContext, p.nodeInfoCache[id] = cacheItem{NodeInfo: nodeInfoCopy, added: time.Now()} } } + + // Invalidate cache entries for node groups that were scaled down to zero and have no nodes + for _, nodeGroup := range ctx.CloudProvider.NodeGroups() { + size, err := nodeGroup.TargetSize() + if err != nil { + if instances, errN := nodeGroup.Nodes(); errN == nil { + size = len(instances) + } else { + continue + } + } + // We should only invalidate if both target size is 0 and there are no nodes + if size == 0 && p.nodeInfoCache != nil { + // Check if there are any nodes in this group + instances, err := nodeGroup.Nodes() + if err == nil && len(instances) == 0 { + delete(p.nodeInfoCache, nodeGroup.Id()) + } + } + } + for _, nodeGroup := range ctx.CloudProvider.NodeGroups() { id := nodeGroup.Id() seenGroups[id] = true diff --git a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go index 6eebbf2f4c77..34f1cd8021da 100644 --- a/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go +++ b/cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go @@ -306,6 +306,51 @@ func TestGetNodeInfosCacheExpired(t *testing.T) { } +func TestCacheEntryRemovedWhenGroupScaledToZero(t *testing.T) { + now := time.Now() + n1 := BuildTestNode("n1", 1000, 1000) + SetNodeReadyState(n1, true, now.Add(-2*time.Minute)) + + provider := testprovider.NewTestAutoprovisioningCloudProvider(nil, nil, nil, nil, nil, nil) + provider.AddNodeGroup("ng1", 0, 10, 1) + provider.AddNode("ng1", n1) + + podLister := kube_util.NewTestPodLister([]*apiv1.Pod{}) + registry := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil) + + snapshot := testsnapshot.NewTestSnapshotOrDie(t) + err := snapshot.SetClusterState([]*apiv1.Node{n1}, nil, drasnapshot.Snapshot{}) + assert.NoError(t, err) + + ctx := context.AutoscalingContext{ + CloudProvider: provider, + ClusterSnapshot: snapshot, + AutoscalingKubeClients: context.AutoscalingKubeClients{ + ListerRegistry: registry, + }, + } + + niProcessor := NewMixedTemplateNodeInfoProvider(&cacheTtl, false) + _, err = niProcessor.Process(&ctx, []*apiv1.Node{n1}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + assert.NoError(t, err) + _, found := niProcessor.nodeInfoCache["ng1"] + assert.True(t, found) + + // scale node group to zero + provider.GetNodeGroup("ng1").(*testprovider.TestNodeGroup).SetTargetSize(0) + provider.DeleteNode(n1) + + snapshot = testsnapshot.NewTestSnapshotOrDie(t) + err = snapshot.SetClusterState([]*apiv1.Node{}, nil, drasnapshot.Snapshot{}) + assert.NoError(t, err) + ctx.ClusterSnapshot = snapshot + + _, err = niProcessor.Process(&ctx, []*apiv1.Node{}, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now) + assert.NoError(t, err) + _, found = niProcessor.nodeInfoCache["ng1"] + assert.False(t, found) +} + func assertEqualNodeCapacities(t *testing.T, expected, actual *apiv1.Node) { t.Helper() assert.NotEqual(t, actual.Status, nil, "")