Skip to content

Commit 3a3b895

Browse files
committed
back to 7b math rlzero
1 parent f3ddfe1 commit 3a3b895

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

scripts/train/olmo3/7b_rlzero.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
MODEL_NAME_OR_PATH="/weka/oe-adapt-default/michaeln/checkpoints/olmo3-7b-base"
55
GS_MODEL_NAME="olmo3_7b_base"
66

7-
DATASETS="hamishivi/rlvr_orz_math_57k_collected_filtered 1.0"
7+
DATASETS="allenai/Dolci-RLZero-Math-7B 1.0"
88

99
# math evals
1010
# EVALS="minerva_math_500::hamish_zs_reasoning_deepseek"
1111
EVALS="aime:zs_cot_r1::pass_at_32_2024_dapo,aime:zs_cot_r1::pass_at_32_2025_dapo,minerva_math_500::hamish_zs_reasoning_dapo"
1212

1313
# AIME 2024, 2025 local evals
14-
LOCAL_EVALS="mnoukhov/aime2024-25-rlvr 1.0 mnoukhov/aime2024-25-rlvr 1.0"
15-
LOCAL_EVAL_SPLITS="test_2024 test_2024 test_2025 test_2025"
14+
LOCAL_EVALS="allenai/Dolci-RLZero-Math-7B 16"
15+
LOCAL_EVAL_SPLITS="train"
1616

1717

1818
EXP_NAME="olmo3-7b_rlzero_${GS_MODEL_NAME}"
@@ -30,7 +30,7 @@ python mason.py \
3030
--pure_docker_mode \
3131
--image ${BEAKER_IMAGE} \
3232
--preemptible \
33-
--num_nodes 5 \
33+
--num_nodes 8 \
3434
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3535
--env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
3636
--gs_model_name $GS_MODEL_NAME \
@@ -59,15 +59,15 @@ python open_instruct/grpo_fast.py \
5959
--dataset_mixer_eval_list_splits $LOCAL_EVAL_SPLITS \
6060
--max_prompt_token_length 2048 \
6161
--response_length 16384 \
62-
--pack_length 32768 \
62+
--pack_length 18432 \
6363
--model_name_or_path ${MODEL_NAME_OR_PATH} \
6464
--chat_template_name olmo_thinker_dapo \
6565
--non_stop_penalty False \
6666
--temperature 1.0 \
6767
--total_episodes 512256 \
6868
--deepspeed_stage 3 \
6969
--num_learners_per_node 8 \
70-
--vllm_num_engines 32 \
70+
--vllm_num_engines 56 \
7171
--vllm_tensor_parallel_size 1 \
7272
--lr_scheduler_type constant \
7373
--apply_verifiable_reward true \
@@ -80,7 +80,7 @@ python open_instruct/grpo_fast.py \
8080
--with_tracking \
8181
--vllm_enable_prefix_caching \
8282
--clip_higher 0.272 \
83-
--output_dir /output/olmo3-7b-rlzero-spurious/checkpoints \
83+
--output_dir /output/olmo3-7b-rlzero-math/checkpoints \
8484
--mask_truncated_completions True \
8585
--oe_eval_max_length 32768 \
8686
--try_launch_beaker_eval_jobs_on_weka True \

0 commit comments

Comments
 (0)