Emerge-Lab · Aditya-Gupta26 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
@@ -188,6 +188,14 @@ wosac_aggregate_results = True
 human_replay_eval = False
 ; Control only the self-driving car
 human_replay_control_mode = "control_sdc_only"
+; If True, render evaluation videos at eval_render_interval
+eval_render_enabled = False
+; How often to render evaluation videos (in epochs)
+eval_render_interval = 1000
+; Number of maps to render during evaluation
+eval_render_num_maps = 3
+; Show expert trajectories in evaluation renders
+eval_render_show_human_logs = True
 
 [render]
 ; Mode to render a bunch of maps with a given policy

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
@@ -1518,7 +1518,8 @@ void set_active_agents(Drive *env) {
             static_agent_indices[env->static_agent_count] = i;
             env->static_agent_count++;
             env->agents[i].active_agent = 0;
-            if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) {
+            if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 ||
+                env->active_agent_count == env->num_agents) {
                 expert_static_agent_indices[env->expert_static_agent_count] = i;
                 env->expert_static_agent_count++;
                 env->agents[i].mark_as_expert = 1;

diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
@@ -101,6 +101,32 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int
             Vector3 prev_point = {0};
             bool has_prev = false;
 
+            Agent *agent = &env->agents[idx];
+            for (int j = 0; j < agent->trajectory_length; j++) {
+                float x = agent->log_trajectory_x[j];
+                float y = agent->log_trajectory_y[j];
+                float valid = agent->log_valid[j];
+
+                if (!valid) {
+                    has_prev = false;
+                    continue;
+                }
+
+                Vector3 curr_point = {x, y, 0.5f};
+
+                if (has_prev) {
+                    DrawLine3D(prev_point, curr_point, Fade(LIGHTGREEN, 0.6f));
+                }
+
+                prev_point = curr_point;
+                has_prev = true;
+            }
+        }
+        for (int i = 0; i < env->expert_static_agent_count; i++) {
+            int idx = env->expert_static_agent_indices[i];
+            Vector3 prev_point = {0};
+            bool has_prev = false;
+
             Agent *agent = &env->agents[idx];
             for (int j = 0; j < agent->trajectory_length; j++) {
                 float x = agent->log_trajectory_x[j];
@@ -193,7 +219,7 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett
 
 int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
              int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
-             const char *output_agent, int num_maps, int zoom_in) {
+             const char *output_agent, int num_maps, int zoom_in, const char *control_mode_override) {
 
     // Parse configuration from INI file
     env_init_config conf = {0};
@@ -228,6 +254,22 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
     }
     fclose(policy_file);
 
+    // Override control_mode if specified via CLI
+    int control_mode_int = conf.control_mode;
+    if (control_mode_override != NULL) {
+        if (strcmp(control_mode_override, "control_vehicles") == 0) {
+            control_mode_int = 0;
+        } else if (strcmp(control_mode_override, "control_agents") == 0) {
+            control_mode_int = 1;
+        } else if (strcmp(control_mode_override, "control_wosac") == 0) {
+            control_mode_int = 2;
+        } else if (strcmp(control_mode_override, "control_sdc_only") == 0) {
+            control_mode_int = 3;
+        } else {
+            fprintf(stderr, "Warning: Unknown control mode '%s', using config value\n", control_mode_override);
+        }
+    }
+
     // Initialize environment with all config values from INI [env] section
     Drive env = {
         .action_type = conf.action_type,
@@ -253,7 +295,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
         .offroad_behavior = conf.offroad_behavior,
         .init_steps = conf.init_steps,
         .init_mode = conf.init_mode,
-        .control_mode = conf.control_mode,
+        .control_mode = control_mode_int, // Use overridden or config value
         .reward_bounds =
             {
                 {conf.reward_bound_goal_radius_min, conf.reward_bound_goal_radius_max},
@@ -275,7 +317,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
             },
         .map_name = (char *)map_name,
     };
-
+    printf("Control Mode : %.2d\n", control_mode_int);
     allocate(&env);
 
     // Check if map has any active agents
@@ -323,6 +365,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
 
     Weights *weights = load_weights(policy_name);
     printf("Active agents in map: %d\n", env.active_agent_count);
+    printf("Static expert agents in the map :%d\n", env.expert_static_agent_count);
     DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning);
 
     int frame_count = env.episode_length > 0 ? env.episode_length : TRAJECTORY_LENGTH_DEFAULT;
@@ -446,6 +489,7 @@ int main(int argc, char *argv[]) {
     int frame_skip = 1;
     int zoom_in = 0;
     const char *view_mode = "both";
+    const char *control_mode_override = NULL;
 
     // File paths and num_maps (not in [env] section)
     const char *map_name = NULL;
@@ -518,10 +562,18 @@ int main(int argc, char *argv[]) {
                 num_maps = atoi(argv[i + 1]);
                 i++;
             }
+        } else if (strcmp(argv[i], "--control-mode") == 0) {
+            if (i + 1 < argc) {
+                control_mode_override = argv[i + 1];
+                i++;
+            } else {
+                fprintf(stderr, "Error: --control-mode option requires a value\n");
+                return 1;
+            }
         }
     }
 
     eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
-             output_agent, num_maps, zoom_in);
+             output_agent, num_maps, zoom_in, control_mode_override);
     return 0;
 }
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
@@ -587,6 +587,69 @@ def train(self):
         ):
             pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
 
+        # Render evaluation videos showing policy controlling only SDC with human replays
+        if self.config["eval"].get("eval_render_enabled", False) and (
+            self.epoch % self.config["eval"].get("eval_render_interval", 1000) == 0 or done_training
+        ):
+            model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}")
+            model_files = glob.glob(os.path.join(model_dir, "model_*.pt"))
+
+            if model_files:
+                # Take the latest checkpoint
+                latest_cpt = max(model_files, key=os.path.getctime)
+                bin_path = f"{model_dir}.bin"
+
+                # Export to .bin for rendering with raylib
+                try:
+                    export_args = {"env_name": self.config["env"], "load_model_path": latest_cpt, **self.config}
+
+                    export(
+                        args=export_args,
+                        env_name=self.config["env"],
+                        vecenv=self.vecenv,
+                        policy=self.uncompiled_policy,
+                        path=bin_path,
+                        silent=True,
+                    )
+
+                    bin_path_epoch = f"{model_dir}_epoch_{self.epoch:06d}.bin"
+                    shutil.copy2(bin_path, bin_path_epoch)
+
+                    env_cfg = getattr(self.vecenv, "driver_env", None)
+                    wandb_log = True if hasattr(self.logger, "wandb") and self.logger.wandb else False
+                    wandb_run = self.logger.wandb if hasattr(self.logger, "wandb") else None
+                    if self.render_async:
+                        render_proc = multiprocessing.Process(
+                            target=pufferlib.utils.render_eval_videos,
+                            args=(
+                                self.config,
+                                env_cfg,
+                                self.logger.run_id,
+                                wandb_log,
+                                self.epoch,
+                                self.global_step,
+                                bin_path,
+                                self.render_async,
+                                self.render_queue,
+                            ),
+                        )
+                        render_proc.start()
+                        self.render_processes.append(render_proc)
+                    else:
+                        pufferlib.utils.render_eval_videos(
+                            self.config,
+                            env_cfg,
+                            self.logger.run_id,
+                            wandb_log,
+                            self.epoch,
+                            self.global_step,
+                            bin_path,
+                            self.render_async,
+                            wandb_run=wandb_run,
+                        )
+                except Exception as e:
+                    print(f"Failed to render evaluation videos: {e}")
+
     def check_render_queue(self):
         """Check if any async render jobs finished and log them."""
         if not self.render_async or not hasattr(self, "render_queue"):
@@ -597,21 +660,39 @@ def check_render_queue(self):
                 result = self.render_queue.get_nowait()
                 step = result["step"]
                 videos = result["videos"]
+                is_eval = result.get("eval", False)  # Check if these are eval videos
 
                 # Log to wandb if available
                 if hasattr(self.logger, "wandb") and self.logger.wandb:
                     import wandb
 
                     payload = {}
-                    if videos["output_topdown"]:
-                        payload["render/world_state"] = [wandb.Video(p, format="mp4") for p in videos["output_topdown"]]
-                    if videos["output_agent"]:
-                        payload["render/agent_view"] = [wandb.Video(p, format="mp4") for p in videos["output_agent"]]
+                    if is_eval:
+                        # Use eval_render namespace for eval videos
+                        if videos.get("output_topdown"):
+                            payload["eval_render/world_state"] = [
+                                wandb.Video(p, format="mp4") for p in videos["output_topdown"]
+                            ]
+                        if videos.get("output_agent"):
+                            payload["eval_render/agent_view"] = [
+                                wandb.Video(p, format="mp4") for p in videos["output_agent"]
+                            ]
+                    else:
+                        # Use render namespace for training videos
+                        if videos.get("output_topdown"):
+                            payload["render/world_state"] = [
+                                wandb.Video(p, format="mp4") for p in videos["output_topdown"]
+                            ]
+                        if videos.get("output_agent"):
+                            payload["render/agent_view"] = [
+                                wandb.Video(p, format="mp4") for p in videos["output_agent"]
+                            ]
 
                     if payload:
                         # Custom step for render logs to prevent monotonic logic wandb errors
                         payload["render_step"] = step
                         self.logger.wandb.log(payload)
+                        print(f"Logged async {'eval ' if is_eval else ''}render videos to wandb (step {step})")
 
         except queue.Empty:
             pass
@@ -1693,7 +1774,7 @@ def render_task(map_path):
         if render_configs.get("show_lasers", False):
             cmd.append("--lasers")
         if render_configs.get("show_human_logs", False):
-            cmd.append("--show-human-logs")
+            cmd.append("--log-trajectories")
         if render_configs.get("zoom_in", False):
             cmd.append("--zoom-in")
         cmd.extend(["--view", view_mode])