InternLM · zhulinJulia24 · Feb 14, 2026 · Feb 14, 2026 · Feb 28, 2026 · Mar 2, 2026
diff --git a/.github/workflows/api_eval.yml b/.github/workflows/api_eval.yml
@@ -32,7 +32,11 @@ on:
         description: 'Set custom run ID. If not provided, github.run_id will be used'
         type: string
         default: ''
-
+      offline_mode:
+        required: true
+        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
+        type: boolean
+        default: false
 
 env:
   HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache

diff --git a/.github/workflows/api_eval_legacy.yml b/.github/workflows/api_eval_legacy.yml
@@ -0,0 +1,222 @@
+name: api_eval_legacy
+
+on:
+  workflow_dispatch:
+    inputs:
+      repo_org:
+        required: false
+        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
+        type: string
+        default: 'InternLM/lmdeploy'
+      repo_ref:
+        required: false
+        description: 'Set branch or tag or commit id. Default is "main"'
+        type: string
+        default: 'main'
+      backend:
+        required: true
+        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
+        type: string
+        default: "['turbomind', 'pytorch']"
+      execution_mode:
+        required: false
+        description: 'Select execution mode: infer, eval, or both. Default is "both"'
+        type: choice
+        options:
+          - both
+          - infer
+          - eval
+        default: 'both'
+      run_id:
+        required: false
+        description: 'Set custom run ID. If not provided, github.run_id will be used'
+        type: string
+        default: ''
+      offline_mode:
+        required: true
+        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
+        type: boolean
+        default: false
+
+
+env:
+  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
+  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
+  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
+  REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}
+  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
+  FAIL_CONFIG: '--lf'
+  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}
+  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
+  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
+  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
+  COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache
+  HF_DATASETS_OFFLINE: 1
+  HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets
+  HF_HUB_OFFLINE: 1
+  HF_EVALUATE_OFFLINE: 1
+  RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}
+  TEST_ENV: legacy
+
+jobs:
+  linux-build:
+    if: ${{ !cancelled() }}
+    strategy:
+      matrix:
+        pyver: [py310]
+    runs-on: ubuntu-latest
+    env:
+      PYTHON_VERSION: ${{ matrix.pyver }}
+      PLAT_NAME: manylinux2014_x86_64
+      DOCKER_TAG: cuda12.8
+      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
+    steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Build
+        run: |
+          echo ${PYTHON_VERSION}
+          echo ${PLAT_NAME}
+          echo ${DOCKER_TAG}
+          echo ${OUTPUT_FOLDER}
+          echo ${GITHUB_RUN_ID}
+          # remove -it
+          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
+          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          if-no-files-found: error
+          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
+          retention-days: 1
+          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
+
+
+  download_pkgs:
+    needs: linux-build
+    if: ${{!cancelled()}}
+    runs-on: [self-hosted, linux-a100]
+    timeout-minutes: 50
+    container:
+      image: openmmlab/lmdeploy:latest-cu12.8
+      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
+      volumes:
+        - /nvme/qa_test_models:/nvme/qa_test_models
+        - /mnt/121:/mnt/121
+        - /mnt/104:/mnt/104
+        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
+    steps:
+      - name: Clone repository
+        uses: actions/checkout@v2
+        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Copy repository
+        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
+        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
+      - name: Copy repository - offline
+        if: ${{inputs.offline_mode}}
+        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
+      - name: Download Artifacts
+        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
+        uses: actions/download-artifact@v4
+        with:
+          name: my-artifact-${{ github.run_id }}-py310
+      - name: Copy Artifacts
+        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
+        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
+      - name: Copy Artifacts - offline
+        if: ${{inputs.offline_mode}}
+        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
+      - name: Mark as start
+        run: |
+          chmod -R 777 ${{env.TEST_CODE_PATH}}
+          mkdir ${{env.REPORT_DIR}} -p
+          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
+
+  test_evaluation:
+    needs: download_pkgs
+    if: ${{ !cancelled() }}
+    runs-on: [self-hosted, linux-a100]
+    timeout-minutes: 7200
+    strategy:
+      fail-fast: false
+      matrix:
+        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
+        gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
+    container:
+      image: openmmlab/lmdeploy:latest-cu12.8
+      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
+      volumes:
+        - /nvme/github-actions/pip-cache:/root/.cache/pip
+        - /nvme/github-actions/packages:/root/packages
+        - /nvme/github-actions/resources:/root/resources
+        - /nvme/qa_test_models:/nvme/qa_test_models
+        - /nvme/huggingface_hub:/nvme/huggingface_hub
+        - /mnt/121:/mnt/121
+        - /mnt/104:/mnt/104
+        - /mnt/bigdisk:/mnt/bigdisk
+        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
+    steps:
+      - name: Copy repository and Artifacts
+        run: |
+          cp -r ${{env.TEST_CODE_PATH}}/. .
+          mkdir ${{env.REPORT_DIR}} -p
+          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
+      - name: Install lmdeploy - dependency
+        run: |
+          python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
+      - name: Install lmdeploy
+        run: |
+          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
+          python3 -m pip install -r requirements/test.txt
+      - name: Install opencompass
+        run: |
+          git clone https://github.com/open-compass/opencompass.git --depth 1
+          cd opencompass
+          python3 -m pip install .
+          python3 -m pip install langdetect
+      - name: Check env
+        run: |
+          pip install transformers==4.57.6
+          python3 -m pip list
+          lmdeploy check_env
+          mkdir ${{env.REPORT_DIR}} -p
+          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
+      - name: Setup paths for evaluation
+        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
+        run: |
+          overall_exit=0
+          ln -s /mnt/104/opencompass-data/data ./data
+          ln -s /nvme/qa_test_models/resource/nltk_data /usr/share/nltk_data
+          execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"
+          ulimit -n 65535
+          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then
+            pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          fi
+          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then
+            pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          fi
+          exit $overall_exit
+      - name: Clear workspace
+        if: always()
+        run: |
+          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
+          export workdir=$(pwd)
+          rm -rf $workdir/*