Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ wosac_aggregate_results = True
human_replay_eval = False
; Control only the self-driving car
human_replay_control_mode = "control_sdc_only"
; If True, render evaluation videos at eval_render_interval
eval_render_enabled = False
; How often to render evaluation videos (in epochs)
eval_render_interval = 1000
; Number of maps to render during evaluation
eval_render_num_maps = 3
; Show expert trajectories in evaluation renders
eval_render_show_human_logs = True

[render]
; Mode to render a bunch of maps with a given policy
Expand Down
3 changes: 2 additions & 1 deletion pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,8 @@ void set_active_agents(Drive *env) {
static_agent_indices[env->static_agent_count] = i;
env->static_agent_count++;
env->agents[i].active_agent = 0;
if (env->agents[i].mark_as_expert == 1 || env->active_agent_count == env->num_agents) {
if (env->control_mode == CONTROL_SDC_ONLY || env->agents[i].mark_as_expert == 1 ||
env->active_agent_count == env->num_agents) {
expert_static_agent_indices[env->expert_static_agent_count] = i;
env->expert_static_agent_count++;
env->agents[i].mark_as_expert = 1;
Expand Down
60 changes: 56 additions & 4 deletions pufferlib/ocean/drive/visualize.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,32 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int
Vector3 prev_point = {0};
bool has_prev = false;

Agent *agent = &env->agents[idx];
for (int j = 0; j < agent->trajectory_length; j++) {
float x = agent->log_trajectory_x[j];
float y = agent->log_trajectory_y[j];
float valid = agent->log_valid[j];

if (!valid) {
has_prev = false;
continue;
}

Vector3 curr_point = {x, y, 0.5f};

if (has_prev) {
DrawLine3D(prev_point, curr_point, Fade(LIGHTGREEN, 0.6f));
}

prev_point = curr_point;
has_prev = true;
}
}
for (int i = 0; i < env->expert_static_agent_count; i++) {
int idx = env->expert_static_agent_indices[i];
Vector3 prev_point = {0};
bool has_prev = false;

Agent *agent = &env->agents[idx];
for (int j = 0; j < agent->trajectory_length; j++) {
float x = agent->log_trajectory_x[j];
Expand Down Expand Up @@ -193,7 +219,7 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett

int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
const char *output_agent, int num_maps, int zoom_in) {
const char *output_agent, int num_maps, int zoom_in, const char *control_mode_override) {

// Parse configuration from INI file
env_init_config conf = {0};
Expand Down Expand Up @@ -228,6 +254,22 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
}
fclose(policy_file);

// Override control_mode if specified via CLI
int control_mode_int = conf.control_mode;
if (control_mode_override != NULL) {
if (strcmp(control_mode_override, "control_vehicles") == 0) {
control_mode_int = 0;
} else if (strcmp(control_mode_override, "control_agents") == 0) {
control_mode_int = 1;
} else if (strcmp(control_mode_override, "control_wosac") == 0) {
control_mode_int = 2;
} else if (strcmp(control_mode_override, "control_sdc_only") == 0) {
control_mode_int = 3;
} else {
fprintf(stderr, "Warning: Unknown control mode '%s', using config value\n", control_mode_override);
}
}

// Initialize environment with all config values from INI [env] section
Drive env = {
.action_type = conf.action_type,
Expand All @@ -253,7 +295,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
.offroad_behavior = conf.offroad_behavior,
.init_steps = conf.init_steps,
.init_mode = conf.init_mode,
.control_mode = conf.control_mode,
.control_mode = control_mode_int, // Use overridden or config value
.reward_bounds =
{
{conf.reward_bound_goal_radius_min, conf.reward_bound_goal_radius_max},
Expand All @@ -275,7 +317,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
},
.map_name = (char *)map_name,
};

printf("Control Mode : %.2d\n", control_mode_int);
allocate(&env);

// Check if map has any active agents
Expand Down Expand Up @@ -323,6 +365,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o

Weights *weights = load_weights(policy_name);
printf("Active agents in map: %d\n", env.active_agent_count);
printf("Static expert agents in the map :%d\n", env.expert_static_agent_count);
DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning);

int frame_count = env.episode_length > 0 ? env.episode_length : TRAJECTORY_LENGTH_DEFAULT;
Expand Down Expand Up @@ -446,6 +489,7 @@ int main(int argc, char *argv[]) {
int frame_skip = 1;
int zoom_in = 0;
const char *view_mode = "both";
const char *control_mode_override = NULL;

// File paths and num_maps (not in [env] section)
const char *map_name = NULL;
Expand Down Expand Up @@ -518,10 +562,18 @@ int main(int argc, char *argv[]) {
num_maps = atoi(argv[i + 1]);
i++;
}
} else if (strcmp(argv[i], "--control-mode") == 0) {
if (i + 1 < argc) {
control_mode_override = argv[i + 1];
i++;
} else {
fprintf(stderr, "Error: --control-mode option requires a value\n");
return 1;
}
}
}

eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
output_agent, num_maps, zoom_in);
output_agent, num_maps, zoom_in, control_mode_override);
return 0;
}
91 changes: 86 additions & 5 deletions pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,69 @@ def train(self):
):
pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)

# Render evaluation videos showing policy controlling only SDC with human replays
if self.config["eval"].get("eval_render_enabled", False) and (
self.epoch % self.config["eval"].get("eval_render_interval", 1000) == 0 or done_training
):
model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}")
model_files = glob.glob(os.path.join(model_dir, "model_*.pt"))

if model_files:
# Take the latest checkpoint
latest_cpt = max(model_files, key=os.path.getctime)
bin_path = f"{model_dir}.bin"

# Export to .bin for rendering with raylib
try:
export_args = {"env_name": self.config["env"], "load_model_path": latest_cpt, **self.config}

export(
args=export_args,
env_name=self.config["env"],
vecenv=self.vecenv,
policy=self.uncompiled_policy,
path=bin_path,
silent=True,
)

bin_path_epoch = f"{model_dir}_epoch_{self.epoch:06d}.bin"
shutil.copy2(bin_path, bin_path_epoch)

env_cfg = getattr(self.vecenv, "driver_env", None)
wandb_log = True if hasattr(self.logger, "wandb") and self.logger.wandb else False
wandb_run = self.logger.wandb if hasattr(self.logger, "wandb") else None
if self.render_async:
render_proc = multiprocessing.Process(
target=pufferlib.utils.render_eval_videos,
args=(
self.config,
env_cfg,
self.logger.run_id,
wandb_log,
self.epoch,
self.global_step,
bin_path,
self.render_async,
self.render_queue,
),
)
render_proc.start()
self.render_processes.append(render_proc)
else:
pufferlib.utils.render_eval_videos(
self.config,
env_cfg,
self.logger.run_id,
wandb_log,
self.epoch,
self.global_step,
bin_path,
self.render_async,
wandb_run=wandb_run,
)
except Exception as e:
print(f"Failed to render evaluation videos: {e}")

def check_render_queue(self):
"""Check if any async render jobs finished and log them."""
if not self.render_async or not hasattr(self, "render_queue"):
Expand All @@ -597,21 +660,39 @@ def check_render_queue(self):
result = self.render_queue.get_nowait()
step = result["step"]
videos = result["videos"]
is_eval = result.get("eval", False) # Check if these are eval videos

# Log to wandb if available
if hasattr(self.logger, "wandb") and self.logger.wandb:
import wandb

payload = {}
if videos["output_topdown"]:
payload["render/world_state"] = [wandb.Video(p, format="mp4") for p in videos["output_topdown"]]
if videos["output_agent"]:
payload["render/agent_view"] = [wandb.Video(p, format="mp4") for p in videos["output_agent"]]
if is_eval:
# Use eval_render namespace for eval videos
if videos.get("output_topdown"):
payload["eval_render/world_state"] = [
wandb.Video(p, format="mp4") for p in videos["output_topdown"]
]
if videos.get("output_agent"):
payload["eval_render/agent_view"] = [
wandb.Video(p, format="mp4") for p in videos["output_agent"]
]
else:
# Use render namespace for training videos
if videos.get("output_topdown"):
payload["render/world_state"] = [
wandb.Video(p, format="mp4") for p in videos["output_topdown"]
]
if videos.get("output_agent"):
payload["render/agent_view"] = [
wandb.Video(p, format="mp4") for p in videos["output_agent"]
]

if payload:
# Custom step for render logs to prevent monotonic logic wandb errors
payload["render_step"] = step
self.logger.wandb.log(payload)
print(f"Logged async {'eval ' if is_eval else ''}render videos to wandb (step {step})")

except queue.Empty:
pass
Expand Down Expand Up @@ -1693,7 +1774,7 @@ def render_task(map_path):
if render_configs.get("show_lasers", False):
cmd.append("--lasers")
if render_configs.get("show_human_logs", False):
cmd.append("--show-human-logs")
cmd.append("--log-trajectories")
if render_configs.get("zoom_in", False):
cmd.append("--zoom-in")
cmd.extend(["--view", view_mode])
Expand Down
Loading