Skip to content

Commit 344fae6

Browse files
committed
Merge branch 'olmo3-rlzero' of github.com:allenai/open-instruct into olmo3-rlzero
2 parents 3a3b895 + 9ce6c88 commit 344fae6

File tree

4 files changed

+10
-12
lines changed

4 files changed

+10
-12
lines changed

scripts/train/olmo3/32b_rlzero.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ python mason.py \
2828
--task_name ${EXP_NAME} \
2929
--cluster ${cluster} \
3030
--workspace ai2/olmo-instruct \
31-
--priority high \
31+
--priority urgent \
3232
--pure_docker_mode \
3333
--image ${BEAKER_IMAGE} \
3434
--preemptible \
35-
--num_nodes 10 \
35+
--num_nodes 12 \
3636
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3737
--env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
3838
--gpus 8 \
@@ -68,10 +68,9 @@ python open_instruct/grpo_fast.py \
6868
--total_episodes 512256 \
6969
--deepspeed_stage 3 \
7070
--num_learners_per_node 8 8 8 8 \
71-
--vllm_num_engines 12 \
71+
--vllm_num_engines 16 \
7272
--gather_whole_model False \
7373
--vllm_tensor_parallel_size 4 \
74-
--inference_batch_size 125 \
7574
--lr_scheduler_type constant \
7675
--apply_verifiable_reward true \
7776
--seed 1 \

scripts/train/olmo3/32b_rlzero_code.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ python mason.py \
2525
--task_name ${EXP_NAME} \
2626
--cluster ${cluster} \
2727
--workspace ai2/olmo-instruct \
28-
--priority high \
28+
--priority urgent \
2929
--pure_docker_mode \
3030
--image ${BEAKER_IMAGE} \
3131
--preemptible \
32-
--num_nodes 10 \
32+
--num_nodes 12 \
3333
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3434
--env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
3535
--gpus 8 \
@@ -63,10 +63,9 @@ python mason.py \
6363
--total_episodes 512256 \
6464
--deepspeed_stage 3 \
6565
--num_learners_per_node 8 8 8 8 \
66-
--vllm_num_engines 12 \
66+
--vllm_num_engines 16 \
6767
--gather_whole_model False \
6868
--vllm_tensor_parallel_size 4 \
69-
--inference_batch_size 125 \
7069
--lr_scheduler_type constant \
7170
--apply_verifiable_reward true \
7271
--seed 1 \

scripts/train/olmo3/7b_rlzero.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ python mason.py \
2626
--task_name ${EXP_NAME} \
2727
--cluster ${cluster} \
2828
--workspace ai2/olmo-instruct \
29-
--priority high \
29+
--priority urgent \
3030
--pure_docker_mode \
3131
--image ${BEAKER_IMAGE} \
3232
--preemptible \

scripts/train/olmo3/7b_rlzero_code.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ python mason.py \
2626
--task_name ${EXP_NAME} \
2727
--cluster ${cluster} \
2828
--workspace ai2/olmo-instruct \
29-
--priority high \
29+
--priority urgent \
3030
--pure_docker_mode \
3131
--image ${BEAKER_IMAGE} \
3232
--preemptible \
33-
--num_nodes 5 \
33+
--num_nodes 8 \
3434
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
3535
--env VLLM_ATTENTION_BACKEND="FLASH_ATTN" \
3636
--gs_model_name $GS_MODEL_NAME \
@@ -67,7 +67,7 @@ python open_instruct/grpo_fast.py \
6767
--total_episodes 512256 \
6868
--deepspeed_stage 3 \
6969
--num_learners_per_node 8 \
70-
--vllm_num_engines 32 \
70+
--vllm_num_engines 56 \
7171
--vllm_tensor_parallel_size 1 \
7272
--lr_scheduler_type constant \
7373
--apply_verifiable_reward true \

0 commit comments

Comments
 (0)