@@ -41,14 +41,14 @@ func newWorker(log *slog.Logger, cfg *Config, store *routeStore) *probingWorker
4141// the provided context is canceled or Stop is called. Safe to call concurrently
4242// with IsRunning/Stop.
4343func (w * probingWorker ) Start (ctx context.Context ) {
44- if w . IsRunning ( ) {
44+ if ! w . running . CompareAndSwap ( false , true ) {
4545 return
4646 }
4747 ctx , cancel := context .WithCancel (ctx )
4848 w .cancelMu .Lock ()
4949 w .cancel = cancel
5050 w .cancelMu .Unlock ()
51- w . running . Store ( true )
51+
5252 w .wg .Add (1 )
5353 go func () {
5454 defer w .wg .Done ()
@@ -87,11 +87,21 @@ func (w *probingWorker) Run(ctx context.Context) {
8787 "scheduler" , w .cfg .Scheduler .String (),
8888 )
8989
90- // Listener runs in parallel and is retried with backoff on failure .
90+ // Listener runs in parallel.
9191 w .wg .Add (1 )
9292 go func () {
9393 defer w .wg .Done ()
94- w .listen (ctx )
94+ err := w .cfg .ListenFunc (ctx )
95+ if err != nil {
96+ w .log .Error ("listener error" , "error" , err )
97+
98+ // Cancel the worker to stop the run loop.
99+ w .cancelMu .Lock ()
100+ if w .cancel != nil {
101+ w .cancel ()
102+ }
103+ w .cancelMu .Unlock ()
104+ }
95105 }()
96106
97107 // Single reusable timer; we re-arm it whenever the earliest due changes.
@@ -222,50 +232,6 @@ func (w *probingWorker) runProbe(parent context.Context, rk RouteKey, mr managed
222232 w .applyProbeResult (& mr , outcome .OK )
223233}
224234
225- // listen runs cfg.ListenFunc until it returns nil or ctx is canceled, retrying
226- // with exponential backoff on error. Backoff sleeps are ctx-cancelable.
227- // Contract: ListenFunc should return nil on a clean, permanent exit; transient
228- // failures should be reported as errors to trigger backoff/retry.
229- func (w * probingWorker ) listen (ctx context.Context ) {
230- backoff := w .cfg .ListenBackoff
231- attempt := 0
232- for {
233- if ctx .Err () != nil {
234- return
235- }
236-
237- if err := w .cfg .ListenFunc (ctx ); err == nil {
238- // Listener exited cleanly; we’re done.
239- return
240- } else {
241- w .log .Error ("listener error" , "error" , err )
242- }
243-
244- // Calculate the backoff duration.
245- attempt ++
246- d := backoff .Initial
247- for i := 1 ; i < attempt ; i ++ {
248- d = time .Duration (float64 (d ) * backoff .Multiplier )
249- if d > backoff .Max {
250- d = backoff .Max
251- break
252- }
253- }
254-
255- // Cancelable sleep between retries.
256- t := time .NewTimer (d )
257- select {
258- case <- t .C :
259- // Backoff timer fired; retry.
260- case <- ctx .Done ():
261- if ! t .Stop () {
262- <- t .C
263- }
264- return
265- }
266- }
267- }
268-
269235// applyProbeResult updates liveness state and reconciles kernel routes:
270236//
271237// Up -> Netlink.RouteAdd
@@ -309,8 +275,7 @@ func (w *probingWorker) applyProbeResult(mr *managedRoute, ok bool) {
309275 // No kernel operation required.
310276 }
311277
312- // Persist the updated snapshot back into the store.
313- w .store .Set (key , cur )
278+ // No need to store.Set since we mutated the managedRoute in place.
314279}
315280
316281// validateRoute enforces IPv4-only Src/Dst/NextHop. This worker currently
0 commit comments