Added a lock

finbarrtimbers · finbarrtimbers · commit 3be6ae368dec · 2025-11-14T21:11:55.000-07:00
diff --git a/open_instruct/grpo_fast.py b/open_instruct/grpo_fast.py
@@ -2465,6 +2465,7 @@ def weight_sync_thread(
     policy_group: ModelGroup,
     actor_manager: ActorManager,
     weight_sync_metrics_Q: Queue,
+    params_lock: threading.Lock,
     resume_training_step: int = 1,
 ):
     """Thread function that handles weight sync operations and actor manager coordination."""
@@ -2484,23 +2485,26 @@ def weight_sync_thread(
             logger.debug("[Weight Sync Thread] Starting weight sync")
 
             # Set actors to stop
-            ray.get(actor_manager.set_should_stop.remote(True))
-            logger.debug("[Weight Sync Thread] Set should_stop to True for weight sync")
-
-            # Broadcast weights to vLLM engines
-            # First get the futures
-            weight_broadcast_futures: list[ray.ObjectRef] = [m.broadcast_to_vllm.remote() for m in policy_group.models]
-
-            # Wait for all weight updates to complete and collect individual timings
-            _, actor_sync_times = ray_get_with_progress(
-                weight_broadcast_futures,
-                desc="[Weight Sync Thread] Waiting for weight updates to complete",
-                enable=args.verbose,
-            )
+            with params_lock:
+                ray.get(actor_manager.set_should_stop.remote(True))
+                logger.debug("[Weight Sync Thread] Set should_stop to True for weight sync")
+
+                # Broadcast weights to vLLM engines
+                # First get the futures
+                weight_broadcast_futures: list[ray.ObjectRef] = [
+                    m.broadcast_to_vllm.remote() for m in policy_group.models
+                ]
+
+                # Wait for all weight updates to complete and collect individual timings
+                _, actor_sync_times = ray_get_with_progress(
+                    weight_broadcast_futures,
+                    desc="[Weight Sync Thread] Waiting for weight updates to complete",
+                    enable=args.verbose,
+                )
 
-            # Allow actors to resume
-            ray.get(actor_manager.set_should_stop.remote(False))
-            logger.debug("[Weight Sync Thread] Set should_stop to False after weight sync")
+                # Allow actors to resume
+                ray.get(actor_manager.set_should_stop.remote(False))
+                logger.debug("[Weight Sync Thread] Set should_stop to False after weight sync")
 
         # Calculate distribution statistics
         sync_time_stats = {
@@ -2946,6 +2950,7 @@ def run_training(
     model_dims: utils.ModelDims,
     checkpoint_state=None,
 ):
+    params_lock = threading.Lock()
     if resume_training_step > 1:
         logger.info(f"[Main Thread] Resuming training from step {resume_training_step}")
 
@@ -2959,6 +2964,7 @@ def run_training(
         policy_group,
         actor_manager,
         weight_sync_metrics_Q,
+        params_lock,
         resume_training_step,
     )
 
@@ -3117,14 +3123,17 @@ def health_check_fn():
                 if iter_dataloader is not None:
                     client_state["shuffling_iterator_state"] = iter_dataloader.get_state()
 
-                ray_get_with_progress(
-                    [
-                        policy_group.models[i].save_checkpoint_state.remote(args.checkpoint_state_dir, client_state)
-                        for i in range(args.world_size)
-                    ],
-                    desc=f"Saving checkpoint state at step {training_step}",
-                )
-                logger.info(f"Saved checkpoint state at step {training_step} to {args.checkpoint_state_dir}")
+                with params_lock:
+                    ray_get_with_progress(
+                        [
+                            policy_group.models[i].save_checkpoint_state.remote(
+                                args.checkpoint_state_dir, client_state
+                            )
+                            for i in range(args.world_size)
+                        ],
+                        desc=f"Saving checkpoint state at step {training_step}",
+                    )
+                    logger.info(f"Saved checkpoint state at step {training_step} to {args.checkpoint_state_dir}")
 
         maybe_evaluate(
             args,