Skip to content

Commit 4b59291

Browse files
committed
mask truncation false
1 parent 43876d7 commit 4b59291

File tree

1 file changed

+5
-9
lines changed

1 file changed

+5
-9
lines changed

scripts/train/olmo3/7b_rlzero.sh

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ DATASETS="allenai/Dolci-RLZero-Math-7B 1.0"
88

99
# math evals
1010
# EVALS="minerva_math_500::hamish_zs_reasoning_deepseek"
11-
EVALS="aime:zs_cot_r1::pass_at_32_2024_dapo,aime:zs_cot_r1::pass_at_32_2025_dapo,minerva_math_500::hamish_zs_reasoning_dapo"
11+
EVALS="aime:zs_cot_r1::pass_at_32_2024_rlzero,aime:zs_cot_r1::pass_at_32_2025_rlzero,minerva_math_500::hamish_zs_reasoning_rlzero"
1212

13-
# AIME 2024, 2025 local evals
1413
LOCAL_EVALS="allenai/Dolci-RLZero-Math-7B 16"
1514
LOCAL_EVAL_SPLITS="train"
1615

@@ -26,7 +25,7 @@ python mason.py \
2625
--task_name ${EXP_NAME} \
2726
--cluster ${cluster} \
2827
--workspace ai2/olmo-instruct \
29-
--priority urgent \
28+
--priority high \
3029
--pure_docker_mode \
3130
--image ${BEAKER_IMAGE} \
3231
--preemptible \
@@ -72,7 +71,7 @@ python open_instruct/grpo_fast.py \
7271
--lr_scheduler_type constant \
7372
--apply_verifiable_reward true \
7473
--seed 1 \
75-
--local_eval_every 100 \
74+
--local_eval_every 25 \
7675
--save_freq 100 \
7776
--beaker_eval_freq 100 \
7877
--checkpoint_state_freq 100 \
@@ -81,14 +80,11 @@ python open_instruct/grpo_fast.py \
8180
--vllm_enable_prefix_caching \
8281
--clip_higher 0.272 \
8382
--output_dir /output/olmo3-7b-rlzero-math/checkpoints \
84-
--mask_truncated_completions True \
83+
--mask_truncated_completions False \
8584
--oe_eval_max_length 32768 \
8685
--try_launch_beaker_eval_jobs_on_weka True \
8786
--eval_priority high \
8887
--eval_on_step_0 True \
8988
--oe_eval_tasks $EVALS \
89+
--oe_eval_gpu_multiplier 4 \
9090
--oe_eval_beaker_image michaeln/oe_eval_olmo3_rlzero $@
91-
92-
# TODO
93-
# --oe_eval_gpu_multiplier 4 \
94-
# --oe_eval_beaker_image michaeln/oe_eval_rlzero

0 commit comments

Comments
 (0)