Skip to content

Commit d1d560d

Browse files
committed
32b params
1 parent 6f2c44d commit d1d560d

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

scripts/train/olmo3/32b_rlzero.sh

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,17 +63,18 @@ python open_instruct/grpo_fast.py \
6363
--chat_template_name olmo_thinker_dapo \
6464
--non_stop_penalty False \
6565
--temperature 1.0 \
66-
--total_episodes 25600 \
66+
--total_episodes 12800 \
6767
--deepspeed_stage 3 \
6868
--num_learners_per_node 8 8 8 \
69-
--vllm_num_engines 16 \
70-
--vllm_tensor_parallel_size 4 \
69+
--vllm_num_engines 3 \
70+
--gather_whole_model False \
71+
--vllm_tensor_parallel_size 8 \
7172
--lr_scheduler_type constant \
7273
--apply_verifiable_reward true \
7374
--seed 1 \
74-
--local_eval_every 100 \
75-
--save_freq 100 \
76-
--checkpoint_state_freq 100 \
75+
--local_eval_every 25 \
76+
--save_freq 25 \
77+
--checkpoint_state_freq 25 \
7778
--gradient_checkpointing \
7879
--with_tracking \
7980
--vllm_enable_prefix_caching \
@@ -83,4 +84,6 @@ python open_instruct/grpo_fast.py \
8384
--try_launch_beaker_eval_jobs_on_weka True \
8485
--eval_priority high \
8586
--oe_eval_tasks $EVALS \
86-
--oe_eval_gpu_multiplier 4
87+
--oe_eval_gpu_multiplier 4 \
88+
--vllm_enforce_eager \
89+
--deepspeed_zpg 32

0 commit comments

Comments
 (0)