44MODEL_NAME_OR_PATH=" /weka/oe-adapt-default/michaeln/checkpoints/olmo3-7b-base"
55GS_MODEL_NAME=" olmo3_7b_base"
66
7- DATASETS=" saurabh5/DAPO-Math-17k-Processed_filtered_olmo_completions_new_template_filtered 1.0 saurabh5/MATH_3000_Filtered_olmo_completions_new_template_filtered 1.0"
7+ DATASETS=" hamishivi/rlvr_orz_math_57k_collected_filtered 1.0"
88
99# math evals
1010# EVALS="minerva_math_500::hamish_zs_reasoning_deepseek"
@@ -30,7 +30,7 @@ python mason.py \
3030 --pure_docker_mode \
3131 --image ${BEAKER_IMAGE} \
3232 --preemptible \
33- --num_nodes 8 \
33+ --num_nodes 5 \
3434 --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3535 --env VLLM_ATTENTION_BACKEND=" FLASH_ATTN" \
3636 --gs_model_name $GS_MODEL_NAME \
@@ -67,7 +67,7 @@ python open_instruct/grpo_fast.py \
6767 --total_episodes 512256 \
6868 --deepspeed_stage 3 \
6969 --num_learners_per_node 8 \
70- --vllm_num_engines 56 \
70+ --vllm_num_engines 32 \
7171 --vllm_tensor_parallel_size 1 \
7272 --lr_scheduler_type constant \
7373 --apply_verifiable_reward true \
@@ -80,8 +80,7 @@ python open_instruct/grpo_fast.py \
8080 --with_tracking \
8181 --vllm_enable_prefix_caching \
8282 --clip_higher 0.272 \
83- --output_dir /output/olmo3-7b-rlzero/checkpoints \
84- --gs_checkpoint_state_dir gs://ai2-llm/checkpoints/rlzero/olmo3-7b_rlzero/ \
83+ --output_dir /output/olmo3-7b-rlzero-spurious/checkpoints \
8584 --mask_truncated_completions True \
8685 --oe_eval_max_length 32768 \
8786 --try_launch_beaker_eval_jobs_on_weka True \
0 commit comments