Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ goal_target_distance = 30.0
collision_behavior = 0
; Options: 0 - Ignore, 1 - Stop, 2 - Remove
offroad_behavior = 0
; Options 0 - False, 1 - True (if true, rescales the collision reward based on the ego speed of the agent)
speed_based_collisions_reward = 1
; Number of steps before
episode_length = 91
resample_frequency = 910
Expand Down
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->reward_goal_post_respawn = conf.reward_goal_post_respawn;
env->episode_length = conf.episode_length;
env->termination_mode = conf.termination_mode;
env->speed_based_collisions_reward = conf.speed_based_collisions_reward;
env->collision_behavior = conf.collision_behavior;
env->offroad_behavior = conf.offroad_behavior;
env->max_controlled_agents = unpack(kwargs, "max_controlled_agents");
Expand Down
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/drive.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void demo() {
.dt = 0.1f,
.episode_length = 300,
.termination_mode = 0,
.speed_based_collisions_reward = 0,
.collision_behavior = 0,
.offroad_behavior = 0,
.init_steps = 0,
Expand Down
31 changes: 29 additions & 2 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ struct Drive {
int max_controlled_agents;
int logs_capacity;
int goal_behavior;
int speed_based_collisions_reward;
float goal_target_distance;
char *ini_file;
char *scenario_id;
Expand Down Expand Up @@ -2002,6 +2003,26 @@ void respawn_agent(Drive *env, int agent_idx) {
env->entities[agent_idx].steering_angle = 0.0f;
}

float sigmoid(float x) { return 1.0 / (1.0 + exp(-x)); }

float compute_collision_reward(Drive *env, int agent_idx) {

// get speed of the vehicle relative to the environment (not the car it collided with)
float current_speed = sqrtf(env->entities[agent_idx].vx * env->entities[agent_idx].vx +
env->entities[agent_idx].vy * env->entities[agent_idx].vy);

// get env reward
float collision_reward = env->reward_vehicle_collision;

// normalize speed such that 0 m/s gets a 50% of the reward and 10 m/s gets 95% of the reward
float rescaled_speed = current_speed * 3.0f / 10.0f; // rescale so that 10 becomes 3

// compute the sigmoid form
float collision_reward_rescaled = collision_reward * sigmoid(rescaled_speed);

return collision_reward_rescaled;
}

void c_step(Drive *env) {
memset(env->rewards, 0, env->active_agent_count * sizeof(float));
memset(env->terminals, 0, env->active_agent_count * sizeof(unsigned char));
Expand Down Expand Up @@ -2061,8 +2082,14 @@ void c_step(Drive *env) {

if (collision_state > 0) {
if (collision_state == VEHICLE_COLLISION) {
env->rewards[i] += env->reward_vehicle_collision;
env->logs[i].episode_return += env->reward_vehicle_collision;
if (env->speed_based_collisions_reward == 1) {
float collision_reward = compute_collision_reward(env, agent_idx);
env->rewards[i] += collision_reward;
env->logs[i].episode_return += collision_reward;
} else {
env->rewards[i] += env->reward_vehicle_collision;
env->logs[i].episode_return += env->reward_vehicle_collision;
}
env->logs[i].collision_rate = 1.0f;
env->logs[i].collisions_per_agent += 1.0f;
} else if (collision_state == OFFROAD) {
Expand Down
3 changes: 3 additions & 0 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
dt=0.1,
episode_length=None,
termination_mode=None,
speed_based_collisions_reward=0,
resample_frequency=91,
num_maps=100,
num_agents=512,
Expand Down Expand Up @@ -63,6 +64,7 @@ def __init__(
self.human_agent_idx = human_agent_idx
self.episode_length = episode_length
self.termination_mode = termination_mode
self.speed_based_collisions_reward = speed_based_collisions_reward
self.resample_frequency = resample_frequency
self.dynamics_model = dynamics_model

Expand Down Expand Up @@ -206,6 +208,7 @@ def __init__(
dt=dt,
episode_length=(int(episode_length) if episode_length is not None else None),
termination_mode=(int(self.termination_mode) if self.termination_mode is not None else 0),
speed_based_collisions_reward=int(self.speed_based_collisions_reward),
max_controlled_agents=self.max_controlled_agents,
map_path=self.map_files[map_ids[i]],
max_agents=nxt - cur,
Expand Down
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/visualize.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
.dt = conf.dt,
.episode_length = conf.episode_length,
.termination_mode = conf.termination_mode,
.speed_based_collisions_reward = conf.speed_based_collisions_reward,
.collision_behavior = conf.collision_behavior,
.offroad_behavior = conf.offroad_behavior,
.init_steps = conf.init_steps,
Expand Down
3 changes: 3 additions & 0 deletions pufferlib/ocean/env_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ typedef struct {
float goal_target_distance;
int episode_length;
int termination_mode;
int speed_based_collisions_reward;
int init_steps;
int init_mode;
int control_mode;
Expand Down Expand Up @@ -84,6 +85,8 @@ static int handler(void *config, const char *section, const char *name, const ch
env_config->episode_length = atoi(value);
} else if (MATCH("env", "termination_mode")) {
env_config->termination_mode = atoi(value);
} else if (MATCH("env", "speed_based_collisions_reward")) {
env_config->speed_based_collisions_reward = atoi(value);
} else if (MATCH("env", "init_steps")) {
env_config->init_steps = atoi(value);
} else if (MATCH("env", "init_mode")) {
Expand Down
Binary file removed pufferlib/resources/drive/puffer_drive_weights.bin
Binary file not shown.