44MODEL_NAME_OR_PATH=" /weka/oe-adapt-default/michaeln/checkpoints/olmo3-7b-base"
55GS_MODEL_NAME=" olmo3_7b_base"
66
7- DATASETS=" hamishivi/rlvr_orz_math_57k_collected_filtered 1.0"
7+ DATASETS=" allenai/Dolci-RLZero-Math-7B 1.0"
88
99# math evals
1010# EVALS="minerva_math_500::hamish_zs_reasoning_deepseek"
1111EVALS=" aime:zs_cot_r1::pass_at_32_2024_dapo,aime:zs_cot_r1::pass_at_32_2025_dapo,minerva_math_500::hamish_zs_reasoning_dapo"
1212
1313# AIME 2024, 2025 local evals
14- LOCAL_EVALS=" mnoukhov/aime2024-25-rlvr 1.0 mnoukhov/aime2024-25-rlvr 1.0 "
15- LOCAL_EVAL_SPLITS=" test_2024 test_2024 test_2025 test_2025 "
14+ LOCAL_EVALS=" allenai/Dolci-RLZero-Math-7B 16 "
15+ LOCAL_EVAL_SPLITS=" train "
1616
1717
1818EXP_NAME=" olmo3-7b_rlzero_${GS_MODEL_NAME} "
@@ -30,7 +30,7 @@ python mason.py \
3030 --pure_docker_mode \
3131 --image ${BEAKER_IMAGE} \
3232 --preemptible \
33- --num_nodes 5 \
33+ --num_nodes 8 \
3434 --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3535 --env VLLM_ATTENTION_BACKEND=" FLASH_ATTN" \
3636 --gs_model_name $GS_MODEL_NAME \
@@ -59,15 +59,15 @@ python open_instruct/grpo_fast.py \
5959 --dataset_mixer_eval_list_splits $LOCAL_EVAL_SPLITS \
6060 --max_prompt_token_length 2048 \
6161 --response_length 16384 \
62- --pack_length 32768 \
62+ --pack_length 18432 \
6363 --model_name_or_path ${MODEL_NAME_OR_PATH} \
6464 --chat_template_name olmo_thinker_dapo \
6565 --non_stop_penalty False \
6666 --temperature 1.0 \
6767 --total_episodes 512256 \
6868 --deepspeed_stage 3 \
6969 --num_learners_per_node 8 \
70- --vllm_num_engines 32 \
70+ --vllm_num_engines 56 \
7171 --vllm_tensor_parallel_size 1 \
7272 --lr_scheduler_type constant \
7373 --apply_verifiable_reward true \
@@ -80,7 +80,7 @@ python open_instruct/grpo_fast.py \
8080 --with_tracking \
8181 --vllm_enable_prefix_caching \
8282 --clip_higher 0.272 \
83- --output_dir /output/olmo3-7b-rlzero-spurious /checkpoints \
83+ --output_dir /output/olmo3-7b-rlzero-math /checkpoints \
8484 --mask_truncated_completions True \
8585 --oe_eval_max_length 32768 \
8686 --try_launch_beaker_eval_jobs_on_weka True \
0 commit comments