Skip to content

Commit 6547eb7

Browse files
committed
fix schedule
Signed-off-by: realliujiaxu <[email protected]>
1 parent 427b17e commit 6547eb7

File tree

3 files changed

+21
-23
lines changed

3 files changed

+21
-23
lines changed

vllm_ascend/ascend_forward_context.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
set_forward_context)
1212

1313
import vllm_ascend.envs as envs_ascend
14-
from vllm_ascend.utils import enable_sp, is_moe_model, version_check
14+
from vllm_ascend.utils import enable_sp, is_moe_model, version_check, has_layer_idx
1515

1616
if TYPE_CHECKING:
1717
from vllm_ascend.ops.weight_prefetch import WeightPrefetchMethod
@@ -133,9 +133,7 @@ def set_ascend_forward_context(
133133
# set layer_idx to enable optimization features that depend on this information.
134134
# This is only applicable to models that contain these necessary attributes.
135135
forward_context.layer_idx = None
136-
if model_instance is not None and \
137-
hasattr(model_instance, "model") and \
138-
hasattr(model_instance.model, "start_layer"):
136+
if has_layer_idx(model_instance):
139137
forward_context.layer_idx = model_instance.model.start_layer
140138

141139
# TODO(rjg-lyh): refactor mlp weight prefetch method

vllm_ascend/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
_MIN_DP_BUFFER_SIZE = 50
5858
_IS_MOE_MODEL = None
5959
_ENABLE_SP = None
60+
_HAS_LAYER_IDX = None
6061

6162

6263
def is_310p():
@@ -767,3 +768,11 @@ def version_check():
767768
if full_date >= "20250919":
768769
return True
769770
return False
771+
772+
def has_layer_idx(model_instance: torch.nn.Module) -> bool:
773+
global _HAS_LAYER_IDX
774+
if _HAS_LAYER_IDX is None:
775+
_HAS_LAYER_IDX = model_instance is not None and \
776+
hasattr(model_instance, "model") and \
777+
hasattr(model_instance.model, "start_layer")
778+
return _HAS_LAYER_IDX

vllm_ascend/worker/model_runner_v1.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@
131131
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
132132
AscendSocVersion, ProfileExecuteDuration,
133133
enable_sp, get_ascend_soc_version, is_310p,
134-
is_enable_nz, lmhead_tp_enable)
134+
is_enable_nz, lmhead_tp_enable, is_moe_model)
135135
from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch
136136

137137
if TYPE_CHECKING:
@@ -470,11 +470,14 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
470470
self.in_profile_run = False
471471

472472
self._init_mc2_tokens_capacity()
473-
self.reserved_mc2_mask = torch.zeros(
474-
self.mc2_tokens_capacity,
475-
dtype=torch.bool,
476-
device=self.device,
477-
)
473+
if is_moe_model(vllm_config):
474+
self.reserved_mc2_mask = torch.zeros(
475+
self.mc2_tokens_capacity,
476+
dtype=torch.bool,
477+
device=self.device,
478+
)
479+
else:
480+
self.reserved_mc2_mask = None
478481
self.dynamic_eplb = self.ascend_config.dynamic_eplb or self.ascend_config.expert_map_record_path
479482
if self.dynamic_eplb:
480483
EPLBParamUtils.check_dynamic_eplb(self.ascend_config.dynamic_eplb)
@@ -1339,9 +1342,7 @@ def _prepare_inputs(
13391342
self.query_lens = torch.from_numpy(num_scheduled_tokens)
13401343

13411344
# Copy the tensors to the NPU.
1342-
self.input_ids[:total_num_scheduled_tokens].copy_(
1343-
self.input_ids_cpu[:total_num_scheduled_tokens], non_blocking=True)
1344-
1345+
self._prepare_input_ids(total_num_scheduled_tokens, cu_num_tokens)
13451346
self.positions_cpu[total_num_scheduled_tokens:num_input_tokens].zero_()
13461347
self.positions[:num_input_tokens].copy_(
13471348
self.positions_cpu[:num_input_tokens], non_blocking=True)
@@ -1362,16 +1363,6 @@ def _prepare_inputs(
13621363
self._update_graph_pad_size(with_prefill, maybe_padded_num_tokens)
13631364
attn_metadata: dict[str, Any] = {}
13641365

1365-
# Prepare input_ids
1366-
token_indices = (positions_np +
1367-
req_indices * self.input_batch.token_ids_cpu.shape[1])
1368-
torch.index_select(self.input_batch.token_ids_cpu_tensor.flatten(),
1369-
0,
1370-
torch.from_numpy(token_indices),
1371-
out=self.input_ids_cpu[:total_num_scheduled_tokens])
1372-
# Copy the tensors to the NPU.
1373-
self._prepare_input_ids(total_num_scheduled_tokens, cu_num_tokens)
1374-
13751366
# _prepare_inputs may reorder the batch, so we must gather
13761367
# multi-modal outputs after that to ensure the correct order
13771368
if self.is_multimodal_model:

0 commit comments

Comments
 (0)