Skip to content

Commit 48de24c

Browse files
committed
fix scheduler
1 parent 3cb2af9 commit 48de24c

File tree

2 files changed

+77
-33
lines changed

2 files changed

+77
-33
lines changed

vllm_ascend/core/scheduler.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -483,23 +483,42 @@ def skip_cur_request():
483483
num_scheduled_tokens, scheduled_spec_decode_tokens,
484484
req_to_new_blocks)
485485
scheduled_cached_reqs = cached_reqs_data
486-
487-
scheduler_output = SchedulerOutput(
488-
scheduled_new_reqs=new_reqs_data,
489-
scheduled_cached_reqs=scheduled_cached_reqs,
490-
num_scheduled_tokens=num_scheduled_tokens,
491-
total_num_scheduled_tokens=total_num_scheduled_tokens,
492-
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
493-
scheduled_encoder_inputs=scheduled_encoder_inputs,
494-
num_common_prefix_blocks=num_common_prefix_blocks,
495-
# finished_req_ids is an existing state in the scheduler,
496-
# instead of being newly scheduled in this step.
497-
# It contains the request IDs that are finished in between
498-
# the previous and the current steps.
499-
finished_req_ids=self.finished_req_ids, # type: ignore
500-
free_encoder_mm_hashes=self.encoder_cache_manager.
501-
get_freed_mm_hashes(),
502-
)
486+
if vllm_version_is("0.11.0"):
487+
scheduler_output = SchedulerOutput(
488+
scheduled_new_reqs=new_reqs_data,
489+
scheduled_cached_reqs=scheduled_cached_reqs,
490+
num_scheduled_tokens=num_scheduled_tokens,
491+
total_num_scheduled_tokens=total_num_scheduled_tokens,
492+
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
493+
scheduled_encoder_inputs=scheduled_encoder_inputs,
494+
num_common_prefix_blocks=num_common_prefix_blocks,
495+
# finished_req_ids is an existing state in the scheduler,
496+
# instead of being newly scheduled in this step.
497+
# It contains the request IDs that are finished in between
498+
# the previous and the current steps.
499+
finished_req_ids=self.finished_req_ids, # type: ignore
500+
free_encoder_mm_hashes=self.encoder_cache_manager.
501+
get_freed_mm_hashes(),
502+
structured_output_request_ids={},
503+
grammar_bitmask=None,
504+
)
505+
else:
506+
scheduler_output = SchedulerOutput(
507+
scheduled_new_reqs=new_reqs_data,
508+
scheduled_cached_reqs=scheduled_cached_reqs,
509+
num_scheduled_tokens=num_scheduled_tokens,
510+
total_num_scheduled_tokens=total_num_scheduled_tokens,
511+
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
512+
scheduled_encoder_inputs=scheduled_encoder_inputs,
513+
num_common_prefix_blocks=num_common_prefix_blocks,
514+
# finished_req_ids is an existing state in the scheduler,
515+
# instead of being newly scheduled in this step.
516+
# It contains the request IDs that are finished in between
517+
# the previous and the current steps.
518+
finished_req_ids=self.finished_req_ids, # type: ignore
519+
free_encoder_mm_hashes=self.encoder_cache_manager.
520+
get_freed_mm_hashes(),
521+
)
503522

504523
# NOTE(Kuntai): this function is designed for multiple purposes:
505524
# 1. Plan the KV cache store

vllm_ascend/core/scheduler_dynamic_batch.py

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -561,22 +561,47 @@ def schedule(self) -> SchedulerOutput:
561561
scheduled_spec_decode_tokens,
562562
req_to_new_blocks,
563563
)
564-
scheduler_output = SchedulerOutput(
565-
scheduled_new_reqs=new_reqs_data,
566-
scheduled_cached_reqs=cached_reqs_data,
567-
num_scheduled_tokens=num_scheduled_tokens,
568-
total_num_scheduled_tokens=total_num_scheduled_tokens,
569-
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
570-
scheduled_encoder_inputs=scheduled_encoder_inputs,
571-
num_common_prefix_blocks=num_common_prefix_blocks,
572-
# finished_req_ids is an existing state in the scheduler,
573-
# instead of being newly scheduled in this step.
574-
# It contains the request IDs that are finished in between
575-
# the previous and the current steps.
576-
finished_req_ids=self.finished_req_ids,
577-
free_encoder_mm_hashes=self.encoder_cache_manager.
578-
get_freed_mm_hashes(),
579-
)
564+
if vllm_version_is("0.11.0"):
565+
scheduled_requests = (scheduled_new_reqs + scheduled_running_reqs +
566+
scheduled_resumed_reqs)
567+
structured_output_request_ids, grammar_bitmask = (
568+
self.get_grammar_bitmask(scheduled_requests,
569+
scheduled_spec_decode_tokens))
570+
scheduler_output = SchedulerOutput(
571+
scheduled_new_reqs=new_reqs_data,
572+
scheduled_cached_reqs=cached_reqs_data,
573+
num_scheduled_tokens=num_scheduled_tokens,
574+
total_num_scheduled_tokens=total_num_scheduled_tokens,
575+
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
576+
scheduled_encoder_inputs=scheduled_encoder_inputs,
577+
num_common_prefix_blocks=num_common_prefix_blocks,
578+
# finished_req_ids is an existing state in the scheduler,
579+
# instead of being newly scheduled in this step.
580+
# It contains the request IDs that are finished in between
581+
# the previous and the current steps.
582+
finished_req_ids=self.finished_req_ids,
583+
free_encoder_mm_hashes=self.encoder_cache_manager.
584+
get_freed_mm_hashes(),
585+
structured_output_request_ids=structured_output_request_ids,
586+
grammar_bitmask=grammar_bitmask,
587+
)
588+
else:
589+
scheduler_output = SchedulerOutput(
590+
scheduled_new_reqs=new_reqs_data,
591+
scheduled_cached_reqs=cached_reqs_data,
592+
num_scheduled_tokens=num_scheduled_tokens,
593+
total_num_scheduled_tokens=total_num_scheduled_tokens,
594+
scheduled_spec_decode_tokens=scheduled_spec_decode_tokens,
595+
scheduled_encoder_inputs=scheduled_encoder_inputs,
596+
num_common_prefix_blocks=num_common_prefix_blocks,
597+
# finished_req_ids is an existing state in the scheduler,
598+
# instead of being newly scheduled in this step.
599+
# It contains the request IDs that are finished in between
600+
# the previous and the current steps.
601+
finished_req_ids=self.finished_req_ids,
602+
free_encoder_mm_hashes=self.encoder_cache_manager.
603+
get_freed_mm_hashes(),
604+
)
580605

581606
# NOTE(Kuntai): this function is designed for multiple purposes:
582607
# 1. Plan the KV cache store

0 commit comments

Comments
 (0)