Skip to content

Commit a16bf4a

Browse files
fix acc bug
Signed-off-by: LookAround <[email protected]>
1 parent 453857f commit a16bf4a

File tree

2 files changed

+0
-39
lines changed

2 files changed

+0
-39
lines changed

vllm_ascend/attention/utils.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,6 @@ class AscendPrefillContextParallelMetadata:
2020

2121
num_computed_tokens_of_pcp_dcp: Optional[list[list[list[int]]]] = None
2222

23-
local_chunked_kv_lens: Optional[list[Optional[list[Optional[list[Optional[
24-
list[int]]]]]]]] = None
25-
26-
mask_for_non_zero_chunk: Optional[List[bool]] = None
27-
28-
max_chunk_num: int = 0
29-
3023
q_head_idx_tensor: torch.Tensor = None
3124

3225
q_tail_idx_tensor: torch.Tensor = None
@@ -115,23 +108,6 @@ class AscendCommonAttentionMetadata:
115108
AscendPrefillContextParallelMetadata] = None
116109

117110

118-
def extract_req_dcp_by_chunk_pcp(lst,
119-
chunk_idx,
120-
dcp_size,
121-
pcp_rank,
122-
fill_value=0):
123-
num_reqs = len(lst)
124-
results: List[List[int]] = []
125-
for i in range(num_reqs):
126-
if len(lst[i]) == 0 or chunk_idx >= len(lst[i]):
127-
# empty req or this req has no corresponding chunk, fill 0
128-
results.append([fill_value] * dcp_size)
129-
continue
130-
dcp_values = lst[i][chunk_idx][pcp_rank]
131-
results.append(dcp_values)
132-
return results
133-
134-
135111
def filter_chunked_req_indices(
136112
seq_len: torch.Tensor,
137113
mask_for_non_zero_chunk: Optional[List[bool]],

vllm_ascend/worker/npu_input_batch.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,6 @@ class CachedRequestState:
7373
lora_request: Optional[LoRARequest] = None
7474
prompt_embeds: Optional[torch.Tensor] = None
7575

76-
# pcp/dcp param
77-
local_chunked_kv_lens: Optional[list[Optional[list[Optional[
78-
list[int]]]]]] = None # Records computed tokens for each chunk
79-
next_pcp_dcp_start_rank: int = 0 # Tracks next starting rank for round-robin distribution
80-
token_blank_in_last_blk: int = 0 # if the last block is not full, how many future tokens can be stored
81-
8276
def __post_init__(self):
8377
self.num_prompt_tokens = length_from_prompt_token_ids_or_embeds(
8478
self.prompt_token_ids, self.prompt_embeds)
@@ -319,10 +313,6 @@ def __init__(
319313
self.prev_sampled_token_ids_invalid_indices: Optional[set[int]] = None
320314
self.prev_req_id_to_index: Optional[dict[str, int]] = None
321315

322-
# pcp/dcp parameters
323-
self.local_chunked_kv_lens: list[Optional[list[Optional[list[Optional[
324-
list[int]]]]]]] = [None] * max_num_reqs
325-
326316
@property
327317
def req_ids(self) -> list[str]:
328318
# None elements should only be present transiently
@@ -395,9 +385,6 @@ def add_request(
395385
self.num_computed_tokens_cpu[req_index] = request.num_computed_tokens
396386
self.block_table.add_row(request.block_ids, req_index)
397387

398-
# Add PCP/DCP tracking fields
399-
self.local_chunked_kv_lens[req_index] = request.local_chunked_kv_lens
400-
401388
if sampling_params := request.sampling_params:
402389
if (self.is_spec_decode
403390
and is_spec_decode_unsupported(sampling_params)):
@@ -693,8 +680,6 @@ def condense(self) -> None:
693680
last_req_index]
694681
self.num_computed_tokens_cpu[
695682
empty_index] = self.num_computed_tokens_cpu[last_req_index]
696-
self.local_chunked_kv_lens[
697-
empty_index] = self.local_chunked_kv_lens[last_req_index]
698683
self.block_table.move_row(last_req_index, empty_index)
699684
self.temperature_cpu[empty_index] = self.temperature_cpu[
700685
last_req_index]

0 commit comments

Comments
 (0)