From 1c22d64b597b6fe5ecdb0d852b6397b8ce048fc4 Mon Sep 17 00:00:00 2001 From: yyssys Date: Tue, 21 Oct 2025 08:13:27 +0000 Subject: [PATCH] [XPU]set max_num_batched_tokens=8192 --- fastdeploy/engine/args_utils.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index 176683c986..ee2807fdc9 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -20,8 +20,6 @@ from dataclasses import fields as dataclass_fields from typing import Any, Dict, List, Optional, Union -import paddle - from fastdeploy import envs from fastdeploy.config import ( CacheConfig, @@ -1025,10 +1023,7 @@ def create_engine_config(self, port_availability_check=True) -> FDConfig: if self.max_num_batched_tokens is None: if int(envs.ENABLE_V1_KVCACHE_SCHEDULER): - if paddle.is_compiled_with_xpu(): - self.max_num_batched_tokens = self.max_model_len - else: - self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM + self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM else: if self.enable_chunked_prefill: self.max_num_batched_tokens = 2048