@@ -8,9 +8,8 @@ DATASETS="allenai/Dolci-RLZero-Math-7B 1.0"
88
99# math evals
1010# EVALS="minerva_math_500::hamish_zs_reasoning_deepseek"
11- EVALS=" aime:zs_cot_r1::pass_at_32_2024_dapo ,aime:zs_cot_r1::pass_at_32_2025_dapo ,minerva_math_500::hamish_zs_reasoning_dapo "
11+ EVALS=" aime:zs_cot_r1::pass_at_32_2024_rlzero ,aime:zs_cot_r1::pass_at_32_2025_rlzero ,minerva_math_500::hamish_zs_reasoning_rlzero "
1212
13- # AIME 2024, 2025 local evals
1413LOCAL_EVALS=" allenai/Dolci-RLZero-Math-7B 16"
1514LOCAL_EVAL_SPLITS=" train"
1615
@@ -26,7 +25,7 @@ python mason.py \
2625 --task_name ${EXP_NAME} \
2726 --cluster ${cluster} \
2827 --workspace ai2/olmo-instruct \
29- --priority urgent \
28+ --priority high \
3029 --pure_docker_mode \
3130 --image ${BEAKER_IMAGE} \
3231 --preemptible \
@@ -72,7 +71,7 @@ python open_instruct/grpo_fast.py \
7271 --lr_scheduler_type constant \
7372 --apply_verifiable_reward true \
7473 --seed 1 \
75- --local_eval_every 100 \
74+ --local_eval_every 25 \
7675 --save_freq 100 \
7776 --beaker_eval_freq 100 \
7877 --checkpoint_state_freq 100 \
@@ -81,14 +80,11 @@ python open_instruct/grpo_fast.py \
8180 --vllm_enable_prefix_caching \
8281 --clip_higher 0.272 \
8382 --output_dir /output/olmo3-7b-rlzero-math/checkpoints \
84- --mask_truncated_completions True \
83+ --mask_truncated_completions False \
8584 --oe_eval_max_length 32768 \
8685 --try_launch_beaker_eval_jobs_on_weka True \
8786 --eval_priority high \
8887 --eval_on_step_0 True \
8988 --oe_eval_tasks $EVALS \
89+ --oe_eval_gpu_multiplier 4 \
9090 --oe_eval_beaker_image michaeln/oe_eval_olmo3_rlzero $@
91-
92- # TODO
93- # --oe_eval_gpu_multiplier 4 \
94- # --oe_eval_beaker_image michaeln/oe_eval_rlzero
0 commit comments