@@ -25,6 +25,7 @@ import (
2525 "k8s.io/apimachinery/pkg/types"
2626 "sigs.k8s.io/controller-runtime/pkg/client"
2727 "sigs.k8s.io/controller-runtime/pkg/log"
28+ "k8s.io/client-go/util/retry"
2829
2930 nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
3031)
@@ -63,53 +64,60 @@ func (u *nvDriverUpdater) SetConditionsError(ctx context.Context, cr any, reason
6364
6465func (u * nvDriverUpdater ) setConditions (ctx context.Context , cr * nvidiav1alpha1.NVIDIADriver , statusType , reason , message string ) error {
6566 reqLogger := log .FromContext (ctx )
66- // Fetch latest instance and update state to avoid version mismatch
67- instance := & nvidiav1alpha1.NVIDIADriver {}
68- err := u .client .Get (ctx , types.NamespacedName {Name : cr .Name }, instance )
69- if err != nil {
70- reqLogger .Error (err , "Failed to get NVIDIADriver instance for status update" , "name" , cr .Name )
71- return err
72- }
7367
74- switch statusType {
75- case Ready :
76- meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
77- Type : Ready ,
78- Status : metav1 .ConditionTrue ,
79- Reason : reason ,
80- Message : message ,
81- })
82-
83- meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
84- Type : Error ,
85- Status : metav1 .ConditionFalse ,
86- Reason : Ready ,
87- })
88- case Error :
89- meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
90- Type : Ready ,
91- Status : metav1 .ConditionFalse ,
92- Reason : Error ,
93- })
94-
95- meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
96- Type : Error ,
97- Status : metav1 .ConditionTrue ,
98- Reason : reason ,
99- Message : message ,
100- })
101-
102- // Ensure status.state is not empty when updating the CR status.
103- // The caller should set the state appropriately in the CR
104- // depending on the error condition.
105- instance .Status .State = cr .Status .State
106- if instance .Status .State == "" {
107- instance .Status .State = nvidiav1alpha1 .NotReady
68+ return retry .RetryOnConflict (retry .DefaultBackoff , func () error {
69+ // Fetch latest instance and update state to avoid version mismatch
70+ instance := & nvidiav1alpha1.NVIDIADriver {}
71+ err := u .client .Get (ctx , types.NamespacedName {Name : cr .Name }, instance )
72+ if err != nil {
73+ reqLogger .Error (err , "Failed to get NVIDIADriver instance for status update" , "name" , cr .Name )
74+ return err
10875 }
109- default :
110- reqLogger .Error (nil , "Unknown status type provided" , "statusType" , statusType )
111- return fmt .Errorf ("unknown status type provided: %s" , statusType )
112- }
11376
114- return u .client .Status ().Update (ctx , instance )
77+ switch statusType {
78+ case Ready :
79+ meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
80+ Type : Ready ,
81+ Status : metav1 .ConditionTrue ,
82+ Reason : reason ,
83+ Message : message ,
84+ })
85+
86+ meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
87+ Type : Error ,
88+ Status : metav1 .ConditionFalse ,
89+ Reason : Ready ,
90+ })
91+ case Error :
92+ meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
93+ Type : Ready ,
94+ Status : metav1 .ConditionFalse ,
95+ Reason : Error ,
96+ })
97+
98+ meta .SetStatusCondition (& instance .Status .Conditions , metav1.Condition {
99+ Type : Error ,
100+ Status : metav1 .ConditionTrue ,
101+ Reason : reason ,
102+ Message : message ,
103+ })
104+
105+ // Ensure status.state is not empty when updating the CR status.
106+ // The caller should set the state appropriately in the CR
107+ // depending on the error condition.
108+ instance .Status .State = cr .Status .State
109+ if instance .Status .State == "" {
110+ instance .Status .State = nvidiav1alpha1 .NotReady
111+ }
112+ default :
113+ reqLogger .Error (nil , "Unknown status type provided" , "statusType" , statusType )
114+ return fmt .Errorf ("unknown status type provided: %s" , statusType )
115+ }
116+
117+ err = u .client .Status ().Update (ctx , instance )
118+ if err != nil {
119+ reqLogger .Error (err , "Failed to update NVIDIADriver status" , "name" , instance .Name )
120+ }
121+ return err
122+ })
115123}
0 commit comments