Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pufferlib/ocean/g2048/binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ void my_log(Log* log, Dict* out) {
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);
dict_set(out, "lifetime_max_tile", log->lifetime_max_tile);
dict_set(out, "reached_16384", log->reached_16384);
dict_set(out, "reached_32768", log->reached_32768);
dict_set(out, "reached_65536", log->reached_65536);
dict_set(out, "reached_131072", log->reached_131072);
Expand Down
2 changes: 2 additions & 0 deletions pufferlib/ocean/g2048/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def evaluate(env_name, load_model_path):
episode_lengths = sum(n * l for n, l in zip(stats['n'], stats['episode_length'])) / num_episodes
max_tiles = sum(n * m for n, m in zip(stats['n'], stats['score'])) / num_episodes
merge_scores = sum(n * s for n, s in zip(stats['n'], stats['merge_score'])) / num_episodes
reached_16384 = sum(n * s for n, s in zip(stats['n'], stats['reached_16384'])) / num_episodes
reached_32768 = sum(n * s for n, s in zip(stats['n'], stats['reached_32768'])) / num_episodes
reached_65536 = sum(n * s for n, s in zip(stats['n'], stats['reached_65536'])) / num_episodes
reached_131072 = sum(n * s for n, s in zip(stats['n'], stats['reached_131072'])) / num_episodes
Expand All @@ -38,6 +39,7 @@ def evaluate(env_name, load_model_path):
# The stats from vecenv are averaged across envs that were done in the same tick. Cannot get the single max.
print(f"Episode length -- Avg: {episode_lengths:.1f}, Max: {max(stats['episode_length']):.1f}")
print(f"Merge score -- Avg: {merge_scores:.1f}, Max: {max(stats['merge_score']):.1f}")
print(f"Reached 16384 prob: {reached_16384*100:.2f} %")
print(f"Reached 32768 prob: {reached_32768*100:.2f} %")
print(f"Reached 65536 prob: {reached_65536*100:.2f} %")
print(f"Reached 131072 prob: {reached_131072*100:.2f} %")
Expand Down
57 changes: 30 additions & 27 deletions pufferlib/ocean/g2048/g2048.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,16 @@ static inline float calculate_perf(unsigned char max_tile) {
typedef struct Log {
float perf;
float score;
float merge_score;
float episode_return;
float episode_length;
float lifetime_max_tile;
float reached_32768;
float reached_65536;
float reached_131072;
float n;
} Log;
float merge_score;
float episode_return;
float episode_length;
float lifetime_max_tile;
float reached_16384;
float reached_32768;
float reached_65536;
float reached_131072;
float n;
} Log;

typedef struct Game {
Log log; // Required
Expand Down Expand Up @@ -130,13 +131,14 @@ void add_log(Game* game) {

game->log.score += (float)(1 << game->max_tile);
game->log.perf += calculate_perf(game->max_tile);
game->log.merge_score += (float)game->score;
game->log.episode_length += game->tick;
game->log.episode_return += game->episode_reward;
game->log.lifetime_max_tile += (float)(1 << game->lifetime_max_tile);
game->log.reached_32768 += (game->max_tile >= 15);
game->log.reached_65536 += (game->max_tile >= 16);
game->log.reached_131072 += (game->max_tile >= 17);
game->log.merge_score += (float)game->score;
game->log.episode_length += game->tick;
game->log.episode_return += game->episode_reward;
game->log.lifetime_max_tile += (float)(1 << game->lifetime_max_tile);
game->log.reached_16384 += (game->max_tile >= 14);
game->log.reached_32768 += (game->max_tile >= 15);
game->log.reached_65536 += (game->max_tile >= 16);
game->log.reached_131072 += (game->max_tile >= 17);
game->log.n += 1;
}

Expand Down Expand Up @@ -368,12 +370,12 @@ void c_step(Game* game) {
bool did_move = move(game, game->actions[0] + 1, &reward, &score_add);
game->tick++;

if (did_move) {
game->moves_made++;
place_tile_at_random_cell(game, get_new_tile());
game->score += score_add;
update_stats(game);
if (did_move) {
game->moves_made++;
// Refresh empty_count after merges so spawning uses the correct count.
update_stats(game);
place_tile_at_random_cell(game, get_new_tile());
game->score += score_add;

// Observations only change if the grid changes
update_observations(game);
Expand Down Expand Up @@ -413,11 +415,12 @@ void step_without_reset(Game* game) {
bool did_move = move(game, game->actions[0] + 1, &reward, &score_add);
game->tick++;

if (did_move) {
game->moves_made++;
place_tile_at_random_cell(game, get_new_tile());
game->score += score_add;
update_stats(game);
if (did_move) {
game->moves_made++;
// Refresh empty_count after merges so spawning uses the correct count.
update_stats(game);
place_tile_at_random_cell(game, get_new_tile());
game->score += score_add;
// Observations only change if the grid changes
update_observations(game);
}
Expand Down
Loading