diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index 6d0faf836a..219ad872f2 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -57,6 +57,8 @@ jobs: model_name: Qwen3-VL-30B-A3B-Instruct - runner: a2-2 model_name: DeepSeek-V2-Lite + - runner: a2-2 + model_name: ERNIE-4.5-21B-A3B-PT fail-fast: false # test will be triggered when tag 'accuracy-test' & 'ready-for-test' if: >- diff --git a/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml b/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml new file mode 100644 index 0000000000..a8c123a432 --- /dev/null +++ b/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml @@ -0,0 +1,14 @@ +model_name: "PaddlePaddle/ERNIE-4.5-21B-A3B-PT" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,flexible-extract" + value: 0.72 +- name: "ceval-valid" + metrics: + - name: "acc,none" + value: 0.85 +num_fewshot: 5 +tensor_parallel_size: 2 +batch_size: 16 +gpu_memory_utilization: 0.6 diff --git a/tests/e2e/models/configs/accuracy.txt b/tests/e2e/models/configs/accuracy.txt index 3bdcfd8a04..cae04631b0 100644 --- a/tests/e2e/models/configs/accuracy.txt +++ b/tests/e2e/models/configs/accuracy.txt @@ -6,3 +6,4 @@ Qwen2-7B.yaml Qwen2-VL-7B-Instruct.yaml Qwen2-Audio-7B-Instruct.yaml Qwen3-VL-30B-A3B-Instruct.yaml +ERNIE-4.5-21B-A3B-PT.yaml