Skip to content

Commit 429463a

Browse files
committed
Agents stop update managedcluster status when clock is out of sync.
Signed-off-by: xuezhaojun <[email protected]>
1 parent 17e05da commit 429463a

File tree

4 files changed

+23
-8
lines changed

4 files changed

+23
-8
lines changed

pkg/registration/hub/lease/clocksynccontroller.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,15 @@ func (c *clockSyncController) sync(ctx context.Context, syncCtx factory.SyncCont
103103
return err
104104
}
105105
// When the agent's lease get renewed, the "now" on hub should close to the RenewTime on agent.
106-
// If the two time are not close(over 1 lease duration), we assume the clock is out of sync.
107-
oneLeaseDuration := time.Duration(LeaseDurationSeconds) * time.Second
106+
// If the two time are not close(the same duration in the lease controller), we assume the clock is out of sync.
107+
// Then, if the Clock is out of sync, the agent will not be able to update the status of managed cluster.
108+
leaseDuration := time.Duration(leaseDurationTimes*cluster.Spec.LeaseDurationSeconds) * time.Second
109+
if leaseDuration == 0 {
110+
leaseDuration = time.Duration(LeaseDurationSeconds*leaseDurationTimes) * time.Second
111+
}
112+
108113
if err := c.updateClusterStatusClockSynced(ctx, cluster,
109-
now.Sub(observedLease.Spec.RenewTime.Time) < oneLeaseDuration && observedLease.Spec.RenewTime.Time.Sub(now) < oneLeaseDuration); err != nil {
114+
now.Sub(observedLease.Spec.RenewTime.Time) < leaseDuration && observedLease.Spec.RenewTime.Time.Sub(now) < leaseDuration); err != nil {
110115
return err
111116
}
112117
return nil

pkg/registration/hub/lease/clocksynccontroller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func TestClockSyncController(t *testing.T) {
6363
testinghelpers.NewManagedCluster(),
6464
},
6565
leases: []runtime.Object{
66-
testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(61*time.Second)),
66+
testinghelpers.NewManagedClusterLease("managed-cluster-lease", now.Add(301*time.Second)),
6767
},
6868
validateActions: func(t *testing.T, leaseActions, clusterActions []clienttesting.Action) {
6969
expected := metav1.Condition{

pkg/registration/spoke/managedcluster/status_controller.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type managedClusterStatusController struct {
3131
patcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]
3232
hubClusterLister clusterv1listers.ManagedClusterLister
3333
hubEventRecorder kevents.EventRecorder
34+
recorder events.Recorder
3435
}
3536

3637
type statusReconcile interface {
@@ -97,6 +98,7 @@ func newManagedClusterStatusController(
9798
},
9899
hubClusterLister: hubClusterInformer.Lister(),
99100
hubEventRecorder: hubEventRecorder,
101+
recorder: recorder,
100102
}
101103
}
102104

@@ -121,6 +123,13 @@ func (c *managedClusterStatusController) sync(ctx context.Context, syncCtx facto
121123
}
122124
}
123125

126+
// check if managedcluster's clock is out of sync, if so, the agent will not be able to update the status of managed cluster.
127+
outOfSynced := meta.IsStatusConditionFalse(newCluster.Status.Conditions, clusterv1.ManagedClusterConditionClockSynced)
128+
if outOfSynced {
129+
c.recorder.Eventf("ClockOutOfSync", "The managed cluster's clock is out of sync, the agent will not be able to update the status of managed cluster.")
130+
return fmt.Errorf("the managed cluster's clock is out of sync, the agent will not be able to update the status of managed cluster.")
131+
}
132+
124133
changed, err := c.patcher.PatchStatus(ctx, newCluster, newCluster.Status, cluster.Status)
125134
if err != nil {
126135
errs = append(errs, err)

test/integration/registration/managedcluster_lease_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,14 +194,15 @@ var _ = ginkgo.Describe("Cluster Lease Update", func() {
194194
return err
195195
}
196196
// The default lease duration is 60s.
197-
// The renewTime is 2 leaseDuration before the hub's now, so the clock should be out of sync.
198-
// The renewTime + 5 * leaseDuration > now, so the available condition should be true
199-
lease.Spec.RenewTime = &metav1.MicroTime{Time: now.Add(-120 * time.Second)}
197+
// The renewTime + 5 * leaseDuration < now, so:
198+
// * the clock should be out of sync
199+
// * the available condition should be true
200+
lease.Spec.RenewTime = &metav1.MicroTime{Time: now.Add(-301 * time.Second)}
200201
_, err = kubeClient.CoordinationV1().Leases(managedClusterName).Update(context.TODO(), lease, metav1.UpdateOptions{})
201202
return err
202203
}, eventuallyInterval, eventuallyTimeout).ShouldNot(gomega.HaveOccurred())
203204

204-
assertAvailableCondition(managedClusterName, metav1.ConditionTrue, 0)
205+
assertAvailableCondition(managedClusterName, metav1.ConditionUnknown, 0)
205206
assertCloclSyncedCondition(managedClusterName, metav1.ConditionFalse, 0)
206207
})
207208
})

0 commit comments

Comments
 (0)