Skip to content

Commit 5b116b6

Browse files
committed
mhc: Don't set OwnerRemediated or ExternallyRemediated on deleting machines
1 parent d7e314f commit 5b116b6

File tree

2 files changed

+69
-23
lines changed

2 files changed

+69
-23
lines changed

internal/controllers/machinehealthcheck/machinehealthcheck_controller.go

+28-18
Original file line numberDiff line numberDiff line change
@@ -445,12 +445,15 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
445445
if err != nil {
446446
conditions.MarkFalse(m, clusterv1.ExternalRemediationTemplateAvailableCondition, clusterv1.ExternalRemediationTemplateNotFoundReason, clusterv1.ConditionSeverityError, err.Error())
447447

448-
v1beta2conditions.Set(t.Machine, metav1.Condition{
449-
Type: clusterv1.MachineExternallyRemediatedV1Beta2Condition,
450-
Status: metav1.ConditionFalse,
451-
Reason: clusterv1.MachineExternallyRemediatedRemediationTemplateNotFoundV1Beta2Reason,
452-
Message: fmt.Sprintf("Error retrieving remediation template %s %s", m.Spec.RemediationTemplate.Kind, klog.KRef(m.Spec.RemediationTemplate.Namespace, m.Spec.RemediationTemplate.Name)),
453-
})
448+
// Only set condition if not already deleting.
449+
if t.Machine.DeletionTimestamp.IsZero() {
450+
v1beta2conditions.Set(t.Machine, metav1.Condition{
451+
Type: clusterv1.MachineExternallyRemediatedV1Beta2Condition,
452+
Status: metav1.ConditionFalse,
453+
Reason: clusterv1.MachineExternallyRemediatedRemediationTemplateNotFoundV1Beta2Reason,
454+
Message: fmt.Sprintf("Error retrieving remediation template %s %s", m.Spec.RemediationTemplate.Kind, klog.KRef(m.Spec.RemediationTemplate.Namespace, m.Spec.RemediationTemplate.Name)),
455+
})
456+
}
454457
errList = append(errList, errors.Wrapf(err, "error retrieving remediation template %v %q for machine %q in namespace %q within cluster %q", m.Spec.RemediationTemplate.GroupVersionKind(), m.Spec.RemediationTemplate.Name, t.Machine.Name, t.Machine.Namespace, m.Spec.ClusterName))
455458
return errList
456459
}
@@ -481,21 +484,27 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
481484
if err := r.Client.Create(ctx, to); err != nil {
482485
conditions.MarkFalse(m, clusterv1.ExternalRemediationRequestAvailableCondition, clusterv1.ExternalRemediationRequestCreationFailedReason, clusterv1.ConditionSeverityError, err.Error())
483486

484-
v1beta2conditions.Set(t.Machine, metav1.Condition{
485-
Type: clusterv1.MachineExternallyRemediatedV1Beta2Condition,
486-
Status: metav1.ConditionFalse,
487-
Reason: clusterv1.MachineExternallyRemediatedRemediationRequestCreationFailedV1Beta2Reason,
488-
Message: "Please check controller logs for errors",
489-
})
487+
// Only set condition if not already deleting.
488+
if t.Machine.DeletionTimestamp.IsZero() {
489+
v1beta2conditions.Set(t.Machine, metav1.Condition{
490+
Type: clusterv1.MachineExternallyRemediatedV1Beta2Condition,
491+
Status: metav1.ConditionFalse,
492+
Reason: clusterv1.MachineExternallyRemediatedRemediationRequestCreationFailedV1Beta2Reason,
493+
Message: "Please check controller logs for errors",
494+
})
495+
}
490496
errList = append(errList, errors.Wrapf(err, "error creating remediation request for machine %q in namespace %q within cluster %q", t.Machine.Name, t.Machine.Namespace, t.Machine.Spec.ClusterName))
491497
return errList
492498
}
493499

494-
v1beta2conditions.Set(t.Machine, metav1.Condition{
495-
Type: clusterv1.MachineExternallyRemediatedV1Beta2Condition,
496-
Status: metav1.ConditionFalse,
497-
Reason: clusterv1.MachineExternallyRemediatedWaitingForRemediationV1Beta2Reason,
498-
})
500+
// Only set condition if not already deleting.
501+
if t.Machine.DeletionTimestamp.IsZero() {
502+
v1beta2conditions.Set(t.Machine, metav1.Condition{
503+
Type: clusterv1.MachineExternallyRemediatedV1Beta2Condition,
504+
Status: metav1.ConditionFalse,
505+
Reason: clusterv1.MachineExternallyRemediatedWaitingForRemediationV1Beta2Reason,
506+
})
507+
}
499508
} else {
500509
logger.Info("Target has failed health check, marking for remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
501510
// NOTE: MHC is responsible for creating MachineOwnerRemediatedCondition if missing or to trigger another remediation if the previous one is completed;
@@ -504,7 +513,8 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
504513
conditions.MarkFalse(t.Machine, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "")
505514
}
506515

507-
if ownerRemediatedCondition := v1beta2conditions.Get(t.Machine, clusterv1.MachineOwnerRemediatedV1Beta2Condition); ownerRemediatedCondition == nil || ownerRemediatedCondition.Status == metav1.ConditionTrue {
516+
// If the machine is already in deletion, the OwnerRemediated condition should not be set.
517+
if ownerRemediatedCondition := v1beta2conditions.Get(t.Machine, clusterv1.MachineOwnerRemediatedV1Beta2Condition); t.Machine.DeletionTimestamp.IsZero() && (ownerRemediatedCondition == nil || ownerRemediatedCondition.Status == metav1.ConditionTrue) {
508518
v1beta2conditions.Set(t.Machine, metav1.Condition{
509519
Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition,
510520
Status: metav1.ConditionFalse,

internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go

+41-5
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) {
376376
assertMachinesNotHealthy(g, mhc, 0)
377377
})
378378

379-
t.Run("it marks unhealthy machines for remediation when there is one unhealthy Machine", func(t *testing.T) {
379+
t.Run("it marks unhealthy machines for remediation when there is one unhealthy Machine and skips deleting machines", func(t *testing.T) {
380380
g := NewWithT(t)
381381
cluster := createCluster(g, ns.Name)
382382

@@ -404,7 +404,16 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) {
404404
machineLabels(mhc.Spec.Selector.MatchLabels),
405405
)
406406
defer cleanup2()
407-
machines = append(machines, unhealthyMachines...)
407+
// Unhealthy nodes and machines but already in deletion.
408+
_, unhealthyMachinesDeleting, cleanup3 := createMachinesWithNodes(g, cluster,
409+
count(1),
410+
createNodeRefForMachine(true),
411+
nodeStatus(corev1.ConditionUnknown),
412+
machineLabels(mhc.Spec.Selector.MatchLabels),
413+
machineDeleting(),
414+
)
415+
defer cleanup3()
416+
machines = append(append(machines, unhealthyMachines...), unhealthyMachinesDeleting...)
408417
targetMachines := make([]string, len(machines))
409418
for i, m := range machines {
410419
targetMachines[i] = m.Name
@@ -419,7 +428,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) {
419428
}
420429
return &mhc.Status
421430
}).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{
422-
ExpectedMachines: 3,
431+
ExpectedMachines: 4,
423432
CurrentHealthy: 2,
424433
RemediationsAllowed: 2,
425434
ObservedGeneration: 1,
@@ -441,7 +450,7 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) {
441450
},
442451
}))
443452

444-
assertMachinesNotHealthy(g, mhc, 1)
453+
assertMachinesNotHealthy(g, mhc, 2)
445454
assertMachinesOwnerRemediated(g, mhc, 1)
446455
})
447456

@@ -2450,6 +2459,8 @@ type machinesWithNodes struct {
24502459
labels map[string]string
24512460
failureReason string
24522461
failureMessage string
2462+
finalizers []string
2463+
deleted bool
24532464
}
24542465

24552466
type machineWithNodesOption func(m *machinesWithNodes)
@@ -2496,6 +2507,13 @@ func machineFailureMessage(s string) machineWithNodesOption {
24962507
}
24972508
}
24982509

2510+
func machineDeleting() machineWithNodesOption {
2511+
return func(m *machinesWithNodes) {
2512+
m.finalizers = append(m.finalizers, "test.cluster.io/deleting")
2513+
m.deleted = true
2514+
}
2515+
}
2516+
24992517
func createMachinesWithNodes(
25002518
g *WithT,
25012519
c *clusterv1.Cluster,
@@ -2535,6 +2553,9 @@ func createMachinesWithNodes(
25352553
Name: infraMachine.GetName(),
25362554
Namespace: infraMachine.GetNamespace(),
25372555
}
2556+
if len(o.finalizers) > 0 {
2557+
machine.Finalizers = o.finalizers
2558+
}
25382559
g.Expect(env.Create(ctx, machine)).To(Succeed())
25392560
fmt.Printf("machine created: %s\n", machine.GetName())
25402561

@@ -2598,6 +2619,12 @@ func createMachinesWithNodes(
25982619
machine.Status.FailureMessage = ptr.To(o.failureMessage)
25992620
}
26002621

2622+
// Set deletiontimestamp before updating status to ensure its not reconciled
2623+
// without having the deletionTimestamp set.
2624+
if o.deleted {
2625+
g.Expect(env.Delete(ctx, machine)).To(Succeed())
2626+
}
2627+
26012628
// Adding one second to ensure there is a difference from the
26022629
// original time so that the patch works. That is, ensure the
26032630
// precision isn't lost during conversions.
@@ -2618,7 +2645,16 @@ func createMachinesWithNodes(
26182645
}
26192646
}
26202647
for _, m := range machines {
2621-
g.Expect(env.Delete(ctx, m)).To(Succeed())
2648+
if m.DeletionTimestamp.IsZero() {
2649+
g.Expect(env.Delete(ctx, m)).To(Succeed())
2650+
}
2651+
if len(m.Finalizers) > 1 {
2652+
g.Expect(env.Get(ctx, util.ObjectKey(m), m)).To(Succeed())
2653+
machinePatchHelper, err := patch.NewHelper(m, env.Client)
2654+
g.Expect(err).ToNot(HaveOccurred())
2655+
m.Finalizers = nil
2656+
g.Expect(machinePatchHelper.Patch(ctx, m)).To(Succeed())
2657+
}
26222658
}
26232659
for _, im := range infraMachines {
26242660
if err := env.Delete(ctx, im); !apierrors.IsNotFound(err) {

0 commit comments

Comments
 (0)