@@ -25,6 +25,8 @@ export llama_data_path=/llama_data
2525export llm_gpt_case_path=$root_path /llm/auto_parallel/gpt-3
2626export gpt_data_path=/fleetx_data
2727
28+ DEFAULT_TOPO=pp_first
29+
2830unset CUDA_VISIBLE_DEVICES
2931
3032function is_a100() {
@@ -256,6 +258,7 @@ function llama_dygraph_auto_bs4_bf16_SD2() {
256258 ./run_pretrain_auto.py \
257259 --model_name_or_path " meta-llama/Llama-2-7b" \
258260 --tokenizer_name_or_path " meta-llama/Llama-2-7b" \
261+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
259262 --input_dir " ./data" \
260263 --output_dir " ./output" \
261264 --weight_decay 0.01 \
@@ -358,6 +361,7 @@ function llama_dygraph_auto_bs8_fp32_DP2() {
358361 --model_type " llama" \
359362 --model_name_or_path " facebook/llama-7b" \
360363 --tokenizer_name_or_path " facebook/llama-7b" \
364+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
361365 --input_dir " ./data" \
362366 --output_dir $case_out_dir \
363367 --split 949,50,1 \
@@ -429,6 +433,7 @@ function llama_dygraph_auto_bs8_fp32_DP2-MP2() {
429433 --model_type " llama" \
430434 --model_name_or_path " facebook/llama-7b" \
431435 --tokenizer_name_or_path " facebook/llama-7b" \
436+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
432437 --input_dir " ./data" \
433438 --output_dir $case_out_dir \
434439 --split 949,50,1 \
@@ -511,6 +516,7 @@ function llama_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
511516 --model_type " llama" \
512517 --model_name_or_path " facebook/llama-7b" \
513518 --tokenizer_name_or_path " facebook/llama-7b" \
519+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
514520 --input_dir " ./data" \
515521 --output_dir $case_out_dir \
516522 --split 949,50,1 \
@@ -584,6 +590,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
584590 --model_type " llama" \
585591 --model_name_or_path " facebook/llama-7b" \
586592 --tokenizer_name_or_path " facebook/llama-7b" \
593+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
587594 --input_dir " ./data" \
588595 --output_dir $case_out_dir \
589596 --split 949,50,1 \
@@ -659,6 +666,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate() {
659666 --use_intermediate_api 1\
660667 --model_name_or_path " facebook/llama-7b" \
661668 --tokenizer_name_or_path " facebook/llama-7b" \
669+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
662670 --input_dir " ./data" \
663671 --output_dir $case_out_dir \
664672 --split 949,50,1 \
@@ -732,6 +740,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-CP2() {
732740 --model_type " llama" \
733741 --model_name_or_path " facebook/llama-7b" \
734742 --tokenizer_name_or_path " facebook/llama-7b" \
743+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
735744 --input_dir " ./data" \
736745 --output_dir $case_out_dir \
737746 --split 949,50,1 \
@@ -806,6 +815,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-CP2_intermediate() {
806815 python -u -m paddle.distributed.launch --gpus " 0,1,2,3,4,5,6,7" --log_dir $case_log_dir run_pretrain_auto.py \
807816 --model_name_or_path " facebook/llama-7b" \
808817 --tokenizer_name_or_path " facebook/llama-7b" \
818+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
809819 --input_dir " ./data" \
810820 --output_dir $case_out_dir \
811821 --split 949,50,1 \
@@ -883,6 +893,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2_hybrid_pp() {
883893 --model_type " llama_pp" \
884894 --model_name_or_path " facebook/llama-7b" \
885895 --tokenizer_name_or_path " facebook/llama-7b" \
896+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
886897 --input_dir " ./data" \
887898 --output_dir $case_out_dir \
888899 --split 949,50,1 \
@@ -950,6 +961,7 @@ function llama_dygraph_auto_bs8_fp16_DP2-MP2-PP2_hybrid_pp() {
950961 --model_type " llama_pp" \
951962 --model_name_or_path " facebook/llama-7b" \
952963 --tokenizer_name_or_path " facebook/llama-7b" \
964+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
953965 --input_dir " ./data" \
954966 --output_dir $auto_case_out_dir \
955967 --split 949,50,1 \
@@ -1031,6 +1043,7 @@ function llama_dy2st_auto_bs4_bf16_DP1-MP1-PP4-SD2() {
10311043 ./run_pretrain_auto.py \
10321044 --model_name_or_path " meta-llama/Llama-2-13b" \
10331045 --tokenizer_name_or_path " meta-llama/Llama-2-13b" \
1046+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
10341047 --input_dir " ./data" \
10351048 --output_dir " ./output" \
10361049 --split 949,50,1 \
@@ -1131,6 +1144,7 @@ function llama_dy2st_auto_bs4_bf16_DP1-MP1-PP4-SD2-VPP3_split_bw() {
11311144 ./run_pretrain_auto.py \
11321145 --model_name_or_path " meta-llama/Llama-2-13b" \
11331146 --tokenizer_name_or_path " meta-llama/Llama-2-13b" \
1147+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
11341148 --input_dir " ./data" \
11351149 --output_dir " ./output" \
11361150 --split 949,50,1 \
@@ -1245,6 +1259,7 @@ function llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP1-SP() {
12451259 --model_type " llama" \
12461260 --model_name_or_path " facebook/llama-7b" \
12471261 --tokenizer_name_or_path " facebook/llama-7b" \
1262+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
12481263 --input_dir " ./data" \
12491264 --output_dir $case_out_dir \
12501265 --split 949,50,1 \
@@ -1361,6 +1376,7 @@ function llama_pir_auto_fuse_ffn_attention_qkv_MP2() {
13611376 run_pretrain_auto.py \
13621377 --model_name_or_path " facebook/llama-7b" \
13631378 --tokenizer_name_or_path " facebook/llama-7b" \
1379+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
13641380 --input_dir " ./data" \
13651381 --output_dir $auto_case_out_dir \
13661382 --split 949,50,1 \
@@ -1523,6 +1539,7 @@ function llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP2-SP() {
15231539 --model_type " llama" \
15241540 --model_name_or_path " facebook/llama-7b" \
15251541 --tokenizer_name_or_path " facebook/llama-7b" \
1542+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
15261543 --input_dir " ./data" \
15271544 --output_dir $case_out_dir \
15281545 --split 949,50,1 \
@@ -1623,6 +1640,7 @@ function llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1() {
16231640 --model_type " llama" \
16241641 --model_name_or_path " facebook/llama-7b" \
16251642 --tokenizer_name_or_path " facebook/llama-7b" \
1643+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
16261644 --input_dir " ./data" \
16271645 --output_dir $case_out_dir \
16281646 --split 949,50,1 \
@@ -1737,6 +1755,7 @@ function llama_dy2st_auto_bs2_bf16_DP2-MP1-PP1-CINN() {
17371755 --model_type " llama" \
17381756 --model_name_or_path " facebook/llama-7b" \
17391757 --tokenizer_name_or_path " facebook/llama-7b" \
1758+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
17401759 --input_dir " ./data" \
17411760 --output_dir $case_out_dir \
17421761 --split 949,50,1 \
@@ -1836,6 +1855,7 @@ function llama_dpo_dy2st_auto_bs2_bf16_MP8_intermediate() {
18361855 --log_dir $case_log_dir \
18371856 ../run_dpo_auto.py\
18381857 --model_name_or_path " meta-llama/Meta-Llama-3.1-8B-Instruct" \
1858+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
18391859 --train_dataset_path ${llama_data_path} /data_dpo/data/train.jsonl \
18401860 --dev_dataset_path ${llama_data_path} /data_dpo/data/dev.jsonl \
18411861 --output_dir ./checkpoints/dpo_ckpts \
@@ -1926,6 +1946,7 @@ function llama_align_dygraph_dy2st_pir_auto_grad_merge_bs2_fp32_DP1-MP1-PP1() {
19261946 --model_type " llama" \
19271947 --model_name_or_path " facebook/llama-7b" \
19281948 --tokenizer_name_or_path " facebook/llama-7b" \
1949+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
19291950 --input_dir " ./data" \
19301951 --output_dir $case_out_dir \
19311952 --split 949,50,1 \
@@ -2033,6 +2054,7 @@ function llama_align_dy2st_fthenb_and_vpp_auto_bs2_fp32_DP1-MP1-PP4() {
20332054 --model_type " llama" \
20342055 --model_name_or_path " facebook/llama-7b" \
20352056 --tokenizer_name_or_path " facebook/llama-7b" \
2057+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
20362058 --input_dir " ./data" \
20372059 --output_dir $case_out_dir \
20382060 --split 949,50,1 \
@@ -2156,6 +2178,7 @@ function llama_align_dygraph_dy2st_pir_auto_pp_bs2_bf16_DP1-MP1-PP4() {
21562178 --model_type " llama" \
21572179 --model_name_or_path " facebook/llama-7b" \
21582180 --tokenizer_name_or_path " facebook/llama-7b" \
2181+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
21592182 --input_dir " ./data" \
21602183 --output_dir $case_out_dir \
21612184 --split 949,50,1 \
@@ -2248,6 +2271,7 @@ function llama_convert_hybrid_ckpt_to_auto_parallel_bs2_fp32_DP2-MP1-PP1() {
22482271 ../../run_pretrain.py \
22492272 --model_name_or_path " facebook/llama-7b" \
22502273 --tokenizer_name_or_path " facebook/llama-7b" \
2274+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
22512275 --input_dir " ./data" \
22522276 --output_dir $dy_case_out_dir \
22532277 --split 949,50,1 \
@@ -2321,6 +2345,7 @@ function llama_convert_hybrid_ckpt_to_auto_parallel_bs2_fp32_DP2-MP1-PP1() {
23212345 run_pretrain_auto.py \
23222346 --model_name_or_path " facebook/llama-7b" \
23232347 --tokenizer_name_or_path " facebook/llama-7b" \
2348+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
23242349 --input_dir " ./data" \
23252350 --output_dir $auto_case_out_dir \
23262351 --split 949,50,1 \
@@ -2403,6 +2428,7 @@ function llama_baichuan_pir_auto_fuse_ffn_attention_qkv_DP2_MP2_PP2(){
24032428 --model_type " llama" \
24042429 --model_name_or_path " baichuan-inc/Baichuan2-13B-Base" \
24052430 --tokenizer_name_or_path " baichuan-inc/Baichuan2-13B-Base" \
2431+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
24062432 --input_dir " ./data" \
24072433 --output_dir $case_out_dir \
24082434 --split 949,50,1 \
@@ -2475,6 +2501,7 @@ function llama_baichuan_pir_auto_fuse_ffn_attention_qkv_DP2_MP2_PP2_intermediate
24752501 --use_intermediate_api true \
24762502 --model_name_or_path " baichuan-inc/Baichuan2-13B-Base" \
24772503 --tokenizer_name_or_path " baichuan-inc/Baichuan2-13B-Base" \
2504+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
24782505 --input_dir " ./data" \
24792506 --output_dir $case_out_dir \
24802507 --split 949,50,1 \
@@ -2548,6 +2575,7 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2() {
25482575 run_pretrain_auto.py \
25492576 --model_name_or_path gpt2-medium-en \
25502577 --tokenizer_name_or_path gpt2-medium-en \
2578+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
25512579 --input_dir " $gpt_data_path /data" \
25522580 --output_dir " output/$task_name " \
25532581 --split 949,50,1 \
@@ -2620,6 +2648,7 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2() {
26202648 run_pretrain_auto.py \
26212649 --model_name_or_path gpt2-medium-en \
26222650 --tokenizer_name_or_path gpt2-medium-en \
2651+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
26232652 --input_dir " $gpt_data_path /data" \
26242653 --output_dir $case_out_dir \
26252654 --split 949,50,1 \
@@ -2692,6 +2721,7 @@ function llm_gpt_dygraph_auto_bs8_fp32_DP2-MP2-PP2() {
26922721 run_pretrain_auto.py \
26932722 --model_name_or_path gpt2-medium-en \
26942723 --tokenizer_name_or_path gpt2-medium-en \
2724+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
26952725 --input_dir " $gpt_data_path /data" \
26962726 --output_dir $case_out_dir \
26972727 --split 949,50,1 \
@@ -2765,6 +2795,7 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2() {
27652795 run_pretrain_auto.py \
27662796 --model_name_or_path gpt2-medium-en \
27672797 --tokenizer_name_or_path gpt2-medium-en \
2798+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
27682799 --input_dir " $gpt_data_path /data" \
27692800 --output_dir $case_out_dir \
27702801 --split 949,50,1 \
@@ -2838,6 +2869,7 @@ function llm_gpt_dygraph_auto_bs8_fp16_DP2-MP2-PP2_intermediate() {
28382869 run_pretrain_auto.py \
28392870 --model_name_or_path gpt2-medium-en \
28402871 --tokenizer_name_or_path gpt2-medium-en \
2872+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
28412873 --input_dir " $gpt_data_path /data" \
28422874 --output_dir $case_out_dir \
28432875 --split 949,50,1 \
@@ -2911,6 +2943,7 @@ function llm_gpt_pir_auto_bs4_TP2(){
29112943 run_pretrain_auto.py \
29122944 --model_name_or_path gpt3-13B-en \
29132945 --tokenizer_name_or_path gpt3-13B-en \
2946+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
29142947 --input_dir " $gpt_data_path /data" \
29152948 --output_dir " output/$task_name " \
29162949 --split 949,50,1 \
@@ -2978,6 +3011,7 @@ function llm_gpt_pir_auto_bs4_TP2_PP2(){
29783011 run_pretrain_auto.py \
29793012 --model_name_or_path gpt3-13B-en \
29803013 --tokenizer_name_or_path gpt3-13B-en \
3014+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
29813015 --input_dir " $gpt_data_path /data" \
29823016 --output_dir " output/$task_name " \
29833017 --split 949,50,1 \
@@ -3041,6 +3075,7 @@ function llm_gpt_pir_auto_bs8_DP2_TP2_PP2(){
30413075 run_pretrain_auto.py \
30423076 --model_name_or_path gpt3-13B-en \
30433077 --tokenizer_name_or_path gpt3-13B-en \
3078+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
30443079 --input_dir " $gpt_data_path /data" \
30453080 --output_dir " output/$task_name " \
30463081 --split 949,50,1 \
@@ -3107,6 +3142,7 @@ function llm_gpt_pir_auto_bs8_DP2_TP2_PP2_intermediate(){
31073142 run_pretrain_auto.py \
31083143 --model_name_or_path gpt3-13B-en \
31093144 --tokenizer_name_or_path gpt3-13B-en \
3145+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
31103146 --input_dir " $gpt_data_path /data" \
31113147 --output_dir " output/$task_name " \
31123148 --split 949,50,1 \
@@ -3163,6 +3199,7 @@ function llm_qwen_dygraph_auto_bs1_fp32_DP2() {
31633199{
31643200 "model_name_or_path": "qwen/qwen-7b",
31653201 "tokenizer_name_or_path": "qwen/qwen-7b",
3202+ "hybrid_parallel_topo_order": "$DEFAULT_TOPO ",
31663203 "input_dir": "./data",
31673204 "output_dir": "./checkpoints/qwen_pretrain_ckpts",
31683205 "per_device_train_batch_size": 1,
@@ -3254,6 +3291,7 @@ function llm_qwen_dygraph_auto_bs1_fp32_DP2-MP2() {
32543291{
32553292 "model_name_or_path": "qwen/qwen-7b",
32563293 "tokenizer_name_or_path": "qwen/qwen-7b",
3294+ "hybrid_parallel_topo_order": "$DEFAULT_TOPO ",
32573295 "input_dir": "./data",
32583296 "output_dir": "./checkpoints/qwen_pretrain_ckpts",
32593297 "per_device_train_batch_size": 1,
@@ -3343,6 +3381,7 @@ function llm_qwen_dygraph_auto_bs1_fp32_DP2-MP2-PP2() {
33433381{
33443382 "model_name_or_path": "qwen/qwen-7b",
33453383 "tokenizer_name_or_path": "qwen/qwen-7b",
3384+ "hybrid_parallel_topo_order": "$DEFAULT_TOPO ",
33463385 "input_dir": "./data",
33473386 "output_dir": "./checkpoints/qwen_pretrain_ckpts",
33483387 "per_device_train_batch_size": 1,
@@ -3434,6 +3473,7 @@ function llm_qwen_dygraph_auto_bs1_bf16_DP2-MP2-PP2() {
34343473{
34353474 "model_name_or_path": "qwen/qwen-7b",
34363475 "tokenizer_name_or_path": "qwen/qwen-7b",
3476+ "hybrid_parallel_topo_order": "$DEFAULT_TOPO ",
34373477 "input_dir": "./data",
34383478 "output_dir": "./checkpoints/qwen_pretrain_ckpts",
34393479 "per_device_train_batch_size": 1,
@@ -3547,6 +3587,7 @@ function llm_qwen_pir_auto_bs1_bf16_TP2(){
35473587 run_pretrain_auto.py \
35483588 --model_name_or_path " qwen/qwen-14b" \
35493589 --tokenizer_name_or_path " qwen/qwen-14b" \
3590+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
35503591 --input_dir " ./data" \
35513592 --output_dir " output/$task_name /" \
35523593 --per_device_train_batch_size 1\
@@ -3624,6 +3665,7 @@ function llm_qwen_pir_auto_bs1_bf16_TP2_PP2(){
36243665 run_pretrain_auto.py \
36253666 --model_name_or_path " qwen/qwen-14b" \
36263667 --tokenizer_name_or_path " qwen/qwen-14b" \
3668+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
36273669 --input_dir " ./data" \
36283670 --output_dir " output/$task_name /" \
36293671 --per_device_train_batch_size 1\
@@ -3694,6 +3736,7 @@ function llama_lora_static_graph_auto_bs_2_bf16_DP2-TP2-PP1() {
36943736 --log_dir " $case_log_dir " \
36953737 ../run_finetune_auto.py \
36963738 --model_name_or_path " meta-llama/Meta-Llama-3.1-8B-Instruct" \
3739+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
36973740 --dataset_name_or_path " ./data" \
36983741 --output_dir " $case_out_dir " \
36993742 --enable_auto_parallel true \
@@ -3853,6 +3896,7 @@ if [ $IS_A100 -eq 1 ]; then
38533896 --model_type " deepseekv3_auto" \
38543897 --model_name_or_path $model_config_json \
38553898 --tokenizer_name_or_path " deepseek-ai/DeepSeek-V3" \
3899+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
38563900 --input_dir " ./data" \
38573901 --output_dir " output/$task_name " \
38583902 --split 949,50,1 \
@@ -3999,6 +4043,7 @@ if [ $IS_A100 -eq 1 ]; then
39994043 --model_type " deepseekv3_auto" \
40004044 --model_name_or_path $model_config_json \
40014045 --tokenizer_name_or_path " deepseek-ai/DeepSeek-V3" \
4046+ --hybrid_parallel_topo_order $DEFAULT_TOPO \
40024047 --input_dir " ./data" \
40034048 --output_dir " output/$task_name " \
40044049 --split 949,50,1 \
@@ -4075,6 +4120,7 @@ function llama_baichuan_dygraph_auto_sp_async_reduce_scatter_bs8_bf16_DP4-MP2-SP
40754120{
40764121 "model_name_or_path": "baichuan-inc/Baichuan2-13B-Base",
40774122 "tokenizer_name_or_path": "baichuan-inc/Baichuan2-13B-Base",
4123+ "hybrid_parallel_topo_order": "$DEFAULT_TOPO ",
40784124 "input_dir": "./data",
40794125 "output_dir": "./checkpoints/baichuan2_13b_ckpts",
40804126 "split": "949,50,1",
0 commit comments