Skip to content

Commit 559c04a

Browse files
committed
add logic to retry if update fails due to conflict
Signed-off-by: Rahul Sharma <[email protected]>
1 parent c5d513d commit 559c04a

File tree

5 files changed

+150
-10
lines changed

5 files changed

+150
-10
lines changed

internal/conditions/clusterpolicy.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"k8s.io/apimachinery/pkg/api/meta"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2525
"k8s.io/apimachinery/pkg/types"
26+
"k8s.io/client-go/util/retry"
2627
"sigs.k8s.io/controller-runtime/pkg/client"
2728
"sigs.k8s.io/controller-runtime/pkg/log"
2829

@@ -55,14 +56,13 @@ func (u *clusterPolicyUpdater) SetConditionsError(ctx context.Context, cr any, r
5556
return u.setConditions(ctx, clusterPolicyCr, Error, reason, message)
5657
}
5758

58-
func (u *clusterPolicyUpdater) setConditions(ctx context.Context, cr *nvidiav1.ClusterPolicy, statusType, reason, message string) error {
59+
// updateConditions updates the conditions of the ClusterPolicy CR
60+
func (u *clusterPolicyUpdater) updateConditions(ctx context.Context, cr *nvidiav1.ClusterPolicy, statusType, reason, message string) error {
5961
reqLogger := log.FromContext(ctx)
6062
// Fetch latest instance and update state to avoid version mismatch
6163
instance := &nvidiav1.ClusterPolicy{}
62-
err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance)
63-
if err != nil {
64-
reqLogger.Error(err, "Failed to get ClusterPolicy instance for status update", "name", cr.Name)
65-
return err
64+
if err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance); err != nil {
65+
return fmt.Errorf("failed to get ClusterPolicy instance for status update: %w", err)
6666
}
6767

6868
switch statusType {
@@ -99,3 +99,18 @@ func (u *clusterPolicyUpdater) setConditions(ctx context.Context, cr *nvidiav1.C
9999

100100
return u.client.Status().Update(ctx, instance)
101101
}
102+
103+
// setConditions updates the conditions of the ClusterPolicy CR
104+
// with retry on conflict to handle version mismatches
105+
func (u *clusterPolicyUpdater) setConditions(ctx context.Context, cr *nvidiav1.ClusterPolicy, statusType, reason, message string) error {
106+
reqLogger := log.FromContext(ctx)
107+
108+
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
109+
return u.updateConditions(ctx, cr, statusType, reason, message)
110+
})
111+
112+
if err != nil {
113+
reqLogger.Error(err, "Failed to update ClusterPolicy status after retries", "name", cr.Name)
114+
}
115+
return err
116+
}

internal/conditions/nvidiadriver.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"k8s.io/apimachinery/pkg/api/meta"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2525
"k8s.io/apimachinery/pkg/types"
26+
"k8s.io/client-go/util/retry"
2627
"sigs.k8s.io/controller-runtime/pkg/client"
2728
"sigs.k8s.io/controller-runtime/pkg/log"
2829

@@ -61,14 +62,13 @@ func (u *nvDriverUpdater) SetConditionsError(ctx context.Context, cr any, reason
6162
return u.setConditions(ctx, nvDriverCr, Error, reason, message)
6263
}
6364

64-
func (u *nvDriverUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, statusType, reason, message string) error {
65+
// updateConditions updates the conditions of the NVIDIADriver CR
66+
func (u *nvDriverUpdater) updateConditions(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, statusType, reason, message string) error {
6567
reqLogger := log.FromContext(ctx)
6668
// Fetch latest instance and update state to avoid version mismatch
6769
instance := &nvidiav1alpha1.NVIDIADriver{}
68-
err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance)
69-
if err != nil {
70-
reqLogger.Error(err, "Failed to get NVIDIADriver instance for status update", "name", cr.Name)
71-
return err
70+
if err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance); err != nil {
71+
return fmt.Errorf("failed to get NVIDIADriver instance for status update: %w", err)
7272
}
7373

7474
switch statusType {
@@ -113,3 +113,18 @@ func (u *nvDriverUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.
113113

114114
return u.client.Status().Update(ctx, instance)
115115
}
116+
117+
// setConditions updates the conditions of the NVIDIADriver CR
118+
// with retry on conflict to handle version mismatches
119+
func (u *nvDriverUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, statusType, reason, message string) error {
120+
reqLogger := log.FromContext(ctx)
121+
122+
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
123+
return u.updateConditions(ctx, cr, statusType, reason, message)
124+
})
125+
126+
if err != nil {
127+
reqLogger.Error(err, "Failed to update NVIDIADriver status after retries", "name", cr.Name)
128+
}
129+
return err
130+
}

vendor/k8s.io/client-go/util/retry/OWNERS

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/k8s.io/client-go/util/retry/util.go

Lines changed: 105 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/modules.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,7 @@ k8s.io/client-go/util/flowcontrol
989989
k8s.io/client-go/util/homedir
990990
k8s.io/client-go/util/jsonpath
991991
k8s.io/client-go/util/keyutil
992+
k8s.io/client-go/util/retry
992993
k8s.io/client-go/util/workqueue
993994
# k8s.io/component-base v0.34.1
994995
## explicit; go 1.24.0

0 commit comments

Comments
 (0)