PaddlePaddle · ltd0924 · Oct 13, 2025 · Oct 13, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/.clang-format b/.clang-format
@@ -16,7 +16,7 @@
 ---
 Language:        Cpp
 BasedOnStyle:  Google
-IndentWidth:     4
+IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -1  # The private/protected/public has no indent in class

diff --git a/.github/actions/rerun-workflow/action.yml b/.github/actions/rerun-workflow/action.yml
@@ -0,0 +1,30 @@
+name: 'Rerun Workflow'
+description: 'Re-run GitHub Actions workflow for a given Pull Request'
+inputs:
+  GITHUB_TOKEN:
+    description: 'GitHub token with repo scope'
+    required: true
+  OWNER:
+    description: 'Repository owner'
+    required: true
+  REPO:
+    description: 'Repository name'
+    required: true
+  PR_ID:
+    description: 'Pull Request ID'
+    required: true
+  JOB_NAME:
+    description: 'Job name to rerun'
+    required: true
+
+runs:
+  using: 'composite'
+  steps:
+    - run: bash ./.github/actions/rerun-workflow/rerun.sh
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ inputs.GITHUB_TOKEN }}
+        OWNER: ${{ inputs.OWNER }}
+        REPO: ${{ inputs.REPO }}
+        PR_ID: ${{ inputs.PR_ID }}
+        JOB_NAME: ${{ inputs.JOB_NAME }}
diff --git a/.github/actions/rerun-workflow/rerun.sh b/.github/actions/rerun-workflow/rerun.sh
@@ -0,0 +1,77 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+COMMIT_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+  "https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_ID" | jq -r '.head.sha')
+
+echo "Commit SHA: $COMMIT_SHA"
+
+response=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+  "https://api.github.com/repos/$OWNER/$REPO/actions/runs?head_sha=$COMMIT_SHA&per_page=100")
+
+echo "Response: $response"
+
+run_ids=$(echo "$response" | jq -r '.workflow_runs[].id')
+
+if [ -n "$run_ids" ]; then
+  echo "Found run_ids for commit $COMMIT_SHA: $run_ids"
+
+  for run_id in $run_ids; do
+    if [ "$JOB_NAME" = "all-failed" ]; then
+      echo "Rerunning all failed jobs for run_id: $run_id"
+
+      rerun_response=$(curl -X POST -s -w "%{http_code}" -o /dev/null \
+        -H "Accept: application/vnd.github.v3+json" \
+        -H "Authorization: Bearer $GITHUB_TOKEN" \
+        "https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/rerun-failed-jobs")
+      if [ "$rerun_response" -eq 201 ]; then
+        echo "Successfully requested rerun for all blocked jobs in run_id: $run_id"
+      else
+        echo "Failed to request rerun for run_id: $run_id with status code $rerun_response"
+      fi
+
+    else
+      jobs_response=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
+      "https://api.github.com/repos/$OWNER/$REPO/actions/runs/$run_id/jobs")
+
+      echo "Jobs Response for run_id $run_id: $jobs_response"
+
+      # if [[ "$JOB_NAME" == *"bypass"* ]]; then
+        block_jobs=$(echo "$jobs_response" | jq -r --arg job_name "$JOB_NAME" \
+        '.jobs[] | select(.name == $job_name) | .id')
+      # else
+      #   block_jobs=$(echo "$jobs_response" | jq -r --arg job_name "$JOB_NAME" \
+      #   '.jobs[] | select(.name == $job_name and .conclusion != "success") | .id')
+      # fi
+
+      if [ -n "$block_jobs" ]; then
+        echo "Found block jobs for run_id $run_id: $block_jobs"
+
+        for job_id in $block_jobs; do
+          echo "Rerunning job_id: $job_id"
+          curl -X POST -H "Accept: application/vnd.github.v3+json" \
+            -H "Authorization: token $GITHUB_TOKEN" \
+            "https://api.github.com/repos/$OWNER/$REPO/actions/jobs/$job_id/rerun"
+        done
+      else
+        echo "No block jobs found for run_id $run_id with name $JOB_NAME."
+      fi
+    fi
+  done
+else
+  echo "No matching workflow runs found for commit $COMMIT_SHA."
+  exit 1
+fi
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,30 @@
+<!-- TemplateReference: https://github.com/PaddlePaddle/FastDeploy/blob/develop/.github/pull_request_template.md -->
+
+<!-- Thank you for your contribution! Please follow these guidelines to enhance your pull request. If anything is unclear, submit your PR and reach out to maintainers for assistance. -->
+
+## Motivation
+
+<!-- Describe the purpose and goals of this pull request. -->
+
+## Modifications
+
+<!-- Detail the changes made in this pull request. -->
+
+## Usage or Command
+
+<!-- You should provide the usage if this pr is about the new function. -->
+<!-- You should provide the command to run if this pr is about the performance optimization or fixing bug. -->
+
+## Accuracy Tests
+
+<!-- If this pull request affects model outputs (e.g., changes to the kernel or model forward code), provide accuracy test results. -->
+
+## Checklist
+
+- [ ] Add at least a tag in the PR title.
+  - Tag list: [`[FDConfig]`,`[APIServer]`,`[Engine]`, `[Scheduler]`, `[PD Disaggregation]`, `[Executor]`, `[Graph Optimization]`, `[Speculative Decoding]`, `[RL]`, `[Models]`, `[Quantization]`, `[Loader]`, `[OP]`, `[KVCache]`, `[DataProcessor]`, `[BugFix]`, `[Docs]`, `[CI]`, `[Optimization]`, `[Feature]`, `[Benchmark]`, `[Others]`, `[XPU]`, `[HPU]`, `[GCU]`, `[DCU]`, `[Iluvatar]`, `[Metax]`]
+  - You can add new tags based on the PR content, but the semantics must be clear.
+- [ ] Format your code, run `pre-commit` before commit.
+- [ ] Add unit tests. Please write the reason in this PR if no unit tests.
+- [ ] Provide accuracy results.
+- [ ] If the current PR is submitting to the `release` branch, make sure the PR has been submitted to the `develop` branch, then cherry-pick it to the `release` branch with the `[Cherry-Pick]` PR tag.
diff --git a/.github/workflows/_accuracy_test.yml b/.github/workflows/_accuracy_test.yml
@@ -44,7 +44,7 @@ jobs:
             FULL_REPO="${{ github.repository }}"
             REPO_NAME="${FULL_REPO##*/}"
             BASE_BRANCH="${{ github.base_ref }}"
-
+            docker pull ${docker_image}
             # Clean the repository directory before starting
             docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
             -e "REPO_NAME=${REPO_NAME}" \
@@ -55,7 +55,7 @@ jobs:
               fi
             '
 
-            wget -q ${fd_archive_url}
+            wget -q --no-proxy ${fd_archive_url}
             tar -xf FastDeploy.tar.gz
             rm -rf FastDeploy.tar.gz
             cd FastDeploy
@@ -80,12 +80,14 @@ jobs:
           FD_API_PORT=$((42088 + DEVICE_PORT * 100))
           FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
           FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
+          FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
           echo "Test ENV Parameter:"
           echo "========================================================="
           echo "FLASK_PORT=${FLASK_PORT}"
           echo "FD_API_PORT=${FD_API_PORT}"
           echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
           echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
+          echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
           echo "DEVICES=${DEVICES}"
           echo "========================================================="
 
@@ -99,7 +101,7 @@ jobs:
             exit 1
           fi
 
-          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
+          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
           LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
           echo "==== LOG_FILE is ${LOG_FILE} ===="
 
@@ -133,6 +135,7 @@ jobs:
           -e "FD_API_PORT=${FD_API_PORT}" \
           -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
           -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
+          -e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
           -e "FLASK_PORT=${FLASK_PORT}" \
           -v "${MODEL_CACHE_DIR}:/MODELDATA" \
           -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
@@ -152,11 +155,13 @@ jobs:
           ./llm-deploy-linux-amd64 -python python3.10 \
           -model_name ERNIE-4.5-0.3B-Paddle \
           -model_path /MODELDATA \
-          --skip install
+          --skip install,model
 
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
           pushd tests/ce/deploy
+          ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
+          ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
           python3.10 deploy.py > dd.log 2>&1 &
           sleep 3
           curl -X POST http://0.0.0.0:${FLASK_PORT}/start \

diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml
@@ -44,7 +44,7 @@ jobs:
             FULL_REPO="${{ github.repository }}"
             REPO_NAME="${FULL_REPO##*/}"
             BASE_BRANCH="${{ github.base_ref }}"
-
+            docker pull ${docker_image}
             # Clean the repository directory before starting
             docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
             -e "REPO_NAME=${REPO_NAME}" \
@@ -55,7 +55,7 @@ jobs:
               fi
             '
 
-            wget -q ${fd_archive_url}
+            wget -q --no-proxy ${fd_archive_url}
             tar -xf FastDeploy.tar.gz
             rm -rf FastDeploy.tar.gz
             cd FastDeploy
@@ -80,12 +80,14 @@ jobs:
           FD_API_PORT=$((42088 + DEVICE_PORT * 100))
           FD_ENGINE_QUEUE_PORT=$((42058 + DEVICE_PORT * 100))
           FD_METRICS_PORT=$((42078 + DEVICE_PORT * 100))
+          FD_CACHE_QUEUE_PORT=$((42098 + DEVICE_PORT * 100))
           echo "Test ENV Parameter:"
           echo "========================================================="
           echo "FLASK_PORT=${FLASK_PORT}"
           echo "FD_API_PORT=${FD_API_PORT}"
           echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
           echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
+          echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
           echo "DEVICES=${DEVICES}"
           echo "========================================================="
 
@@ -99,7 +101,7 @@ jobs:
             exit 1
           fi
 
-          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT)
+          PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
           LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
           echo "==== LOG_FILE is ${LOG_FILE} ===="
 
@@ -134,7 +136,7 @@ jobs:
           -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
           -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
           -e "FLASK_PORT=${FLASK_PORT}" \
-          -e "FD_FORCE_CHUNKED_PREFILL=1" \
+          -e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
           -v "${MODEL_CACHE_DIR}:/MODELDATA" \
           -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
           -v "${CACHE_DIR}/.cache:/root/.cache" \
@@ -153,11 +155,13 @@ jobs:
           ./llm-deploy-linux-amd64 -python python3.10 \
           -model_name ERNIE-4.5-0.3B-Paddle \
           -model_path /MODELDATA \
-          --skip install
+          --skip install,model
 
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
           pushd tests/ce/deploy
+          ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
+          ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk "{print \$2}" | xargs -r kill -9
           python3.10 deploy.py > dd.log 2>&1 &
           sleep 3
           curl -X POST http://0.0.0.0:${FLASK_PORT}/start \
@@ -202,20 +206,6 @@ jobs:
           check_service 90
           python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1
 
-          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-            -H "Content-Type: application/json" \
-            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_mtp.yaml\", \"--enable-logprob\": \"False\"}"
-          check_service 180
-          export TEMPLATE=TOKEN_NORMAL
-          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
-
-          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-            -H "Content-Type: application/json" \
-            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_sot.yaml\", \"--enable-logprob\": \"False\"}"
-          check_service 360
-          export TEMPLATE=TOKEN_NORMAL
-          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
-
           popd
           echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
           '

diff --git a/.github/workflows/_build_linux.yml b/.github/workflows/_build_linux.yml
@@ -55,7 +55,7 @@ on:
 jobs:
   fd-build:
     runs-on: [self-hosted, GPU-Build]
-    timeout-minutes: 240
+    timeout-minutes: 360
     outputs:
       wheel_path: ${{ steps.set_output.outputs.wheel_path }}
     steps:
@@ -82,7 +82,7 @@ jobs:
               fi
             '
 
-            wget -q ${fd_archive_url}
+            wget -q --no-proxy ${fd_archive_url}
             tar -xf FastDeploy.tar.gz
             rm -rf FastDeploy.tar.gz
             cd FastDeploy
@@ -106,7 +106,12 @@ jobs:
             CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
             gpu_id=$(echo "$CARD_ID" | fold -w1 | paste -sd,)
 
-            CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
+            IFS='/' read -ra parts <<< "${GITHUB_WORKSPACE}"
+            len=${#parts[@]}
+            CCACHE_DEFAULT_DIR="/$(IFS=/; echo "${parts[*]:1:$((len-5))}")"
+            echo "$CCACHE_DEFAULT_DIR"
+
+            CACHE_DIR="${CACHE_DIR:-$CCACHE_DEFAULT_DIR}"
             echo "CACHE_DIR is set to ${CACHE_DIR}"
             if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
               touch "${CACHE_DIR}/gitconfig"
@@ -127,13 +132,15 @@ jobs:
             -e "PADDLEVERSION=${PADDLEVERSION}" \
             -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
             -e "BRANCH_REF=${BRANCH_REF}" \
+            -e "CCACHE_MAXSIZE=50G" \
             --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
             if [[ -n "${FD_VERSION}" ]]; then
               export FASTDEPLOY_VERSION=${FD_VERSION}
               echo "Custom FastDeploy version: ${FASTDEPLOY_VERSION}"
             fi
 
             git config --global --add safe.directory /workspace/FastDeploy
+            chown -R $(whoami) /workspace/FastDeploy
             cd FastDeploy
             if [[ "${WITH_NIGHTLY_BUILD}" == "ON" ]];then
               GIT_COMMIT_TIME=$(git --no-pager show -s --format=%ci HEAD)

diff --git a/.github/workflows/ci_gcu.yml → .github/workflows/_ci_gcu.yml b/.github/workflows/ci_gcu.yml → .github/workflows/_ci_gcu.yml
@@ -1,10 +1,10 @@
 name: CI_GCU
 
 on:
-  pull_request:
-    branches:
-      - develop
-      - 'release/*'
+  #pull_request:
+    #branches:
+      #- develop
+      #- 'release/*'
   workflow_dispatch:
 
 concurrency: