Skip to content

Commit

Permalink
delete max_batch_size
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanlehome committed Feb 22, 2025
1 parent b425f74 commit d824c2a
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 7 deletions.
5 changes: 0 additions & 5 deletions llm/predict/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,6 @@ class PredictorArgument:
metadata={"help": "avx cachekv type. Supported values: fp16,int8"},
)
batch_size: int = field(default=1, metadata={"help": "The batch size of data."})
max_batch_size: int = field(
default=1, metadata={"help": "The max batch size of data used for export static model."}
)
benchmark: bool = field(
default=False,
metadata={
Expand Down Expand Up @@ -179,8 +176,6 @@ def __post_init__(self):
assert (
self.src_length + self.max_length <= self.total_max_length
), "src_length + max_length should smaller than total_max_length."
if self.max_batch_size < self.batch_size:
self.max_batch_size = self.batch_size


@dataclass
Expand Down
3 changes: 2 additions & 1 deletion paddlenlp/experimental/transformers/deepseek_v2/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,8 @@ def __init__(self, config: DeepseekV2Config, base_model_prefix: str):
self.prefill_cache_k_buffer: paddle.Tensor = None
self.prefill_cache_v_buffer: paddle.Tensor = None
if self.config.mla_use_matrix_absorption:
max_block_nums = config.max_batch_size * (self.max_seq_len + config.block_size - 1) // config.block_size
max_batch_size = 1
max_block_nums = max_batch_size * (self.max_seq_len + config.block_size - 1) // config.block_size
cache_k_shape = [

Check warning on line 491 in paddlenlp/experimental/transformers/deepseek_v2/modeling.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/experimental/transformers/deepseek_v2/modeling.py#L486-L491

Added lines #L486 - L491 were not covered by tests
max_block_nums,
config.num_key_value_heads // max(config.tensor_parallel_degree, 1),
Expand Down
1 change: 0 additions & 1 deletion paddlenlp/transformers/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,6 @@ def set_inference_config(cls, config, predictor_args, **kwargs):
config.append_attn = predictor_args.append_attn
config.decode_strategy = predictor_args.decode_strategy
config.mla_use_matrix_absorption = predictor_args.mla_use_matrix_absorption
config.max_batch_size = predictor_args.max_batch_size
config.weightonly_group_size = predictor_args.weightonly_group_size

Check warning on line 1175 in paddlenlp/transformers/model_utils.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/transformers/model_utils.py#L1174-L1175

Added lines #L1174 - L1175 were not covered by tests

if config.quantization_config.quant_type is not None:
Expand Down

0 comments on commit d824c2a

Please sign in to comment.