diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index f1f01114f..1a0e044ab 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -42,6 +42,8 @@ goal_target_distance = 30.0 collision_behavior = 0 ; Options: 0 - Ignore, 1 - Stop, 2 - Remove offroad_behavior = 0 +; Options 0 - False, 1 - True (if true, rescales the collision reward based on the ego speed of the agent) +speed_based_collisions_reward = 1 ; Number of steps before episode_length = 91 resample_frequency = 910 diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 2011402d2..f93691db3 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -205,6 +205,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->reward_goal_post_respawn = conf.reward_goal_post_respawn; env->episode_length = conf.episode_length; env->termination_mode = conf.termination_mode; + env->speed_based_collisions_reward = conf.speed_based_collisions_reward; env->collision_behavior = conf.collision_behavior; env->offroad_behavior = conf.offroad_behavior; env->max_controlled_agents = unpack(kwargs, "max_controlled_agents"); diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index 8bd3c7388..1e05fa521 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -51,6 +51,7 @@ void demo() { .dt = 0.1f, .episode_length = 300, .termination_mode = 0, + .speed_based_collisions_reward = 0, .collision_behavior = 0, .offroad_behavior = 0, .init_steps = 0, diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 09156a4b4..c39aab144 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -319,6 +319,7 @@ struct Drive { int max_controlled_agents; int logs_capacity; int goal_behavior; + int speed_based_collisions_reward; float goal_target_distance; char *ini_file; char *scenario_id; @@ -2002,6 +2003,26 @@ void respawn_agent(Drive *env, int agent_idx) { env->entities[agent_idx].steering_angle = 0.0f; } +float sigmoid(float x) { return 1.0 / (1.0 + exp(-x)); } + +float compute_collision_reward(Drive *env, int agent_idx) { + + // get speed of the vehicle relative to the environment (not the car it collided with) + float current_speed = sqrtf(env->entities[agent_idx].vx * env->entities[agent_idx].vx + + env->entities[agent_idx].vy * env->entities[agent_idx].vy); + + // get env reward + float collision_reward = env->reward_vehicle_collision; + + // normalize speed such that 0 m/s gets a 50% of the reward and 10 m/s gets 95% of the reward + float rescaled_speed = current_speed * 3.0f / 10.0f; // rescale so that 10 becomes 3 + + // compute the sigmoid form + float collision_reward_rescaled = collision_reward * sigmoid(rescaled_speed); + + return collision_reward_rescaled; +} + void c_step(Drive *env) { memset(env->rewards, 0, env->active_agent_count * sizeof(float)); memset(env->terminals, 0, env->active_agent_count * sizeof(unsigned char)); @@ -2061,8 +2082,14 @@ void c_step(Drive *env) { if (collision_state > 0) { if (collision_state == VEHICLE_COLLISION) { - env->rewards[i] += env->reward_vehicle_collision; - env->logs[i].episode_return += env->reward_vehicle_collision; + if (env->speed_based_collisions_reward == 1) { + float collision_reward = compute_collision_reward(env, agent_idx); + env->rewards[i] += collision_reward; + env->logs[i].episode_return += collision_reward; + } else { + env->rewards[i] += env->reward_vehicle_collision; + env->logs[i].episode_return += env->reward_vehicle_collision; + } env->logs[i].collision_rate = 1.0f; env->logs[i].collisions_per_agent += 1.0f; } else if (collision_state == OFFROAD) { diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 2b43a50f8..dfea01fea 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -30,6 +30,7 @@ def __init__( dt=0.1, episode_length=None, termination_mode=None, + speed_based_collisions_reward=0, resample_frequency=91, num_maps=100, num_agents=512, @@ -63,6 +64,7 @@ def __init__( self.human_agent_idx = human_agent_idx self.episode_length = episode_length self.termination_mode = termination_mode + self.speed_based_collisions_reward = speed_based_collisions_reward self.resample_frequency = resample_frequency self.dynamics_model = dynamics_model @@ -206,6 +208,7 @@ def __init__( dt=dt, episode_length=(int(episode_length) if episode_length is not None else None), termination_mode=(int(self.termination_mode) if self.termination_mode is not None else 0), + speed_based_collisions_reward=int(self.speed_based_collisions_reward), max_controlled_agents=self.max_controlled_agents, map_path=self.map_files[map_ids[i]], max_agents=nxt - cur, diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c index de235b4fa..c375d8641 100644 --- a/pufferlib/ocean/drive/visualize.c +++ b/pufferlib/ocean/drive/visualize.c @@ -241,6 +241,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o .dt = conf.dt, .episode_length = conf.episode_length, .termination_mode = conf.termination_mode, + .speed_based_collisions_reward = conf.speed_based_collisions_reward, .collision_behavior = conf.collision_behavior, .offroad_behavior = conf.offroad_behavior, .init_steps = conf.init_steps, diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h index 71c19debb..aabc3a7bd 100644 --- a/pufferlib/ocean/env_config.h +++ b/pufferlib/ocean/env_config.h @@ -25,6 +25,7 @@ typedef struct { float goal_target_distance; int episode_length; int termination_mode; + int speed_based_collisions_reward; int init_steps; int init_mode; int control_mode; @@ -84,6 +85,8 @@ static int handler(void *config, const char *section, const char *name, const ch env_config->episode_length = atoi(value); } else if (MATCH("env", "termination_mode")) { env_config->termination_mode = atoi(value); + } else if (MATCH("env", "speed_based_collisions_reward")) { + env_config->speed_based_collisions_reward = atoi(value); } else if (MATCH("env", "init_steps")) { env_config->init_steps = atoi(value); } else if (MATCH("env", "init_mode")) { diff --git a/pufferlib/resources/drive/puffer_drive_weights.bin b/pufferlib/resources/drive/puffer_drive_weights.bin deleted file mode 100644 index 87c1254ba..000000000 Binary files a/pufferlib/resources/drive/puffer_drive_weights.bin and /dev/null differ