Skip to content

Commit f3ddfe1

Browse files
for spurious...
1 parent 92ee688 commit f3ddfe1

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

scripts/train/olmo3/7b_rlzero.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
MODEL_NAME_OR_PATH="/weka/oe-adapt-default/michaeln/checkpoints/olmo3-7b-base"
55
GS_MODEL_NAME="olmo3_7b_base"
66

7-
DATASETS="saurabh5/DAPO-Math-17k-Processed_filtered_olmo_completions_new_template_filtered 1.0 saurabh5/MATH_3000_Filtered_olmo_completions_new_template_filtered 1.0"
7+
DATASETS="hamishivi/rlvr_orz_math_57k_collected_filtered 1.0"
88

99
# math evals
1010
# EVALS="minerva_math_500::hamish_zs_reasoning_deepseek"
@@ -30,7 +30,7 @@ python mason.py \
3030
--pure_docker_mode \
3131
--image ${BEAKER_IMAGE} \
3232
--preemptible \
33-
--num_nodes 8 \
33+
--num_nodes 5 \
3434
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3535
--env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
3636
--gs_model_name $GS_MODEL_NAME \
@@ -67,7 +67,7 @@ python open_instruct/grpo_fast.py \
6767
--total_episodes 512256 \
6868
--deepspeed_stage 3 \
6969
--num_learners_per_node 8 \
70-
--vllm_num_engines 56 \
70+
--vllm_num_engines 32 \
7171
--vllm_tensor_parallel_size 1 \
7272
--lr_scheduler_type constant \
7373
--apply_verifiable_reward true \
@@ -80,8 +80,7 @@ python open_instruct/grpo_fast.py \
8080
--with_tracking \
8181
--vllm_enable_prefix_caching \
8282
--clip_higher 0.272 \
83-
--output_dir /output/olmo3-7b-rlzero/checkpoints \
84-
--gs_checkpoint_state_dir gs://ai2-llm/checkpoints/rlzero/olmo3-7b_rlzero/ \
83+
--output_dir /output/olmo3-7b-rlzero-spurious/checkpoints \
8584
--mask_truncated_completions True \
8685
--oe_eval_max_length 32768 \
8786
--try_launch_beaker_eval_jobs_on_weka True \

0 commit comments

Comments
 (0)