diff --git a/pufferlib/ocean/g2048/binding.h b/pufferlib/ocean/g2048/binding.h index 1e7e3c411..ca0cec24a 100644 --- a/pufferlib/ocean/g2048/binding.h +++ b/pufferlib/ocean/g2048/binding.h @@ -21,6 +21,7 @@ void my_log(Log* log, Dict* out) { dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); dict_set(out, "lifetime_max_tile", log->lifetime_max_tile); + dict_set(out, "reached_16384", log->reached_16384); dict_set(out, "reached_32768", log->reached_32768); dict_set(out, "reached_65536", log->reached_65536); dict_set(out, "reached_131072", log->reached_131072); diff --git a/pufferlib/ocean/g2048/eval.py b/pufferlib/ocean/g2048/eval.py index 29a435596..c31f675d5 100644 --- a/pufferlib/ocean/g2048/eval.py +++ b/pufferlib/ocean/g2048/eval.py @@ -29,6 +29,7 @@ def evaluate(env_name, load_model_path): episode_lengths = sum(n * l for n, l in zip(stats['n'], stats['episode_length'])) / num_episodes max_tiles = sum(n * m for n, m in zip(stats['n'], stats['score'])) / num_episodes merge_scores = sum(n * s for n, s in zip(stats['n'], stats['merge_score'])) / num_episodes + reached_16384 = sum(n * s for n, s in zip(stats['n'], stats['reached_16384'])) / num_episodes reached_32768 = sum(n * s for n, s in zip(stats['n'], stats['reached_32768'])) / num_episodes reached_65536 = sum(n * s for n, s in zip(stats['n'], stats['reached_65536'])) / num_episodes reached_131072 = sum(n * s for n, s in zip(stats['n'], stats['reached_131072'])) / num_episodes @@ -38,6 +39,7 @@ def evaluate(env_name, load_model_path): # The stats from vecenv are averaged across envs that were done in the same tick. Cannot get the single max. print(f"Episode length -- Avg: {episode_lengths:.1f}, Max: {max(stats['episode_length']):.1f}") print(f"Merge score -- Avg: {merge_scores:.1f}, Max: {max(stats['merge_score']):.1f}") + print(f"Reached 16384 prob: {reached_16384*100:.2f} %") print(f"Reached 32768 prob: {reached_32768*100:.2f} %") print(f"Reached 65536 prob: {reached_65536*100:.2f} %") print(f"Reached 131072 prob: {reached_131072*100:.2f} %") diff --git a/pufferlib/ocean/g2048/g2048.h b/pufferlib/ocean/g2048/g2048.h index 2592d5b2d..011715353 100644 --- a/pufferlib/ocean/g2048/g2048.h +++ b/pufferlib/ocean/g2048/g2048.h @@ -39,15 +39,16 @@ static inline float calculate_perf(unsigned char max_tile) { typedef struct Log { float perf; float score; - float merge_score; - float episode_return; - float episode_length; - float lifetime_max_tile; - float reached_32768; - float reached_65536; - float reached_131072; - float n; -} Log; + float merge_score; + float episode_return; + float episode_length; + float lifetime_max_tile; + float reached_16384; + float reached_32768; + float reached_65536; + float reached_131072; + float n; +} Log; typedef struct Game { Log log; // Required @@ -130,13 +131,14 @@ void add_log(Game* game) { game->log.score += (float)(1 << game->max_tile); game->log.perf += calculate_perf(game->max_tile); - game->log.merge_score += (float)game->score; - game->log.episode_length += game->tick; - game->log.episode_return += game->episode_reward; - game->log.lifetime_max_tile += (float)(1 << game->lifetime_max_tile); - game->log.reached_32768 += (game->max_tile >= 15); - game->log.reached_65536 += (game->max_tile >= 16); - game->log.reached_131072 += (game->max_tile >= 17); + game->log.merge_score += (float)game->score; + game->log.episode_length += game->tick; + game->log.episode_return += game->episode_reward; + game->log.lifetime_max_tile += (float)(1 << game->lifetime_max_tile); + game->log.reached_16384 += (game->max_tile >= 14); + game->log.reached_32768 += (game->max_tile >= 15); + game->log.reached_65536 += (game->max_tile >= 16); + game->log.reached_131072 += (game->max_tile >= 17); game->log.n += 1; } @@ -368,12 +370,12 @@ void c_step(Game* game) { bool did_move = move(game, game->actions[0] + 1, &reward, &score_add); game->tick++; - if (did_move) { - game->moves_made++; - place_tile_at_random_cell(game, get_new_tile()); - game->score += score_add; - - update_stats(game); + if (did_move) { + game->moves_made++; + // Refresh empty_count after merges so spawning uses the correct count. + update_stats(game); + place_tile_at_random_cell(game, get_new_tile()); + game->score += score_add; // Observations only change if the grid changes update_observations(game); @@ -413,11 +415,12 @@ void step_without_reset(Game* game) { bool did_move = move(game, game->actions[0] + 1, &reward, &score_add); game->tick++; - if (did_move) { - game->moves_made++; - place_tile_at_random_cell(game, get_new_tile()); - game->score += score_add; - update_stats(game); + if (did_move) { + game->moves_made++; + // Refresh empty_count after merges so spawning uses the correct count. + update_stats(game); + place_tile_at_random_cell(game, get_new_tile()); + game->score += score_add; // Observations only change if the grid changes update_observations(game); }