Skip to content

[P/D] Prepare For Upstreaming #75

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 31 commits into
base: disagg_pd_dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7196adf
update to use separate scheduler
robertgshaw2-redhat May 5, 2025
493cda1
update to use separate scheduler
robertgshaw2-redhat May 5, 2025
2427918
update to use separate scheduler
robertgshaw2-redhat May 5, 2025
50cecf6
updated
robertgshaw2-redhat May 5, 2025
ba3e759
updated
robertgshaw2-redhat May 5, 2025
096de02
updated
robertgshaw2-redhat May 5, 2025
0fefd4a
updated
robertgshaw2-redhat May 5, 2025
eba6982
updated
robertgshaw2-redhat May 5, 2025
d97cbf9
updated
robertgshaw2-redhat May 5, 2025
590c213
updated
robertgshaw2-redhat May 5, 2025
be0407a
updated
robertgshaw2-redhat May 5, 2025
c905a48
updated
robertgshaw2-redhat May 5, 2025
0b2cc61
updated
robertgshaw2-redhat May 5, 2025
23c3a6f
updated
robertgshaw2-redhat May 5, 2025
000715d
updated
robertgshaw2-redhat May 5, 2025
37ae9ad
updated
robertgshaw2-redhat May 5, 2025
59280ea
updated
robertgshaw2-redhat May 5, 2025
70c766f
updated
robertgshaw2-redhat May 5, 2025
089b1d7
updated
robertgshaw2-redhat May 5, 2025
17e9085
updated
robertgshaw2-redhat May 5, 2025
e554759
updated
robertgshaw2-redhat May 5, 2025
1c24c66
updated
robertgshaw2-redhat May 5, 2025
8ac138e
updated
robertgshaw2-redhat May 5, 2025
f8239cf
updated
robertgshaw2-redhat May 6, 2025
4a39108
updated
robertgshaw2-redhat May 6, 2025
6f328a2
remove multi-connector
robertgshaw2-redhat May 6, 2025
da291ce
remove multi-connector
robertgshaw2-redhat May 6, 2025
b00157b
cleanup
robertgshaw2-redhat May 6, 2025
5af868e
cherry pick nixl
robertgshaw2-redhat May 6, 2025
baf2700
updated
robertgshaw2-redhat May 6, 2025
e22d44b
updated
robertgshaw2-redhat May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ steps:
- pytest -v -s v1/worker
- pytest -v -s v1/structured_output
- pytest -v -s v1/spec_decode
- pytest -v -s v1/kv_transfer
- pytest -v -s v1/kv_connector/unit
- pytest -v -s v1/test_serial_utils.py
- pytest -v -s v1/test_stats.py
- pytest -v -s v1/test_utils.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ run_tests_for_model() {
done

# Build the command for the proxy server with all the hosts and ports
PROXY_CMD="python ${GIT_ROOT}/tests/v1/kv_connector/toy_proxy_server.py --port 8192"
PROXY_CMD="python ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192"

# Add all prefill hosts and ports
PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[@]}"
Expand All @@ -157,7 +157,7 @@ run_tests_for_model() {

# Run lm eval for this model
echo "Running tests for $model_name"
TEST_MODEL=$model_name python -m pytest -s -x ${GIT_ROOT}/tests/v1/kv_connector/test_accuracy.py
TEST_MODEL=$model_name python -m pytest -s -x ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_accuracy.py

# Clean up before running next model
cleanup_instances
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,37 @@ def test_no_spurious_prefix_caching():
for block in remote_blocks:
assert block.ref_cnt == 1
assert block._block_hash is None


def test_short_prompt_lifecycle():
"""Test lifecycle of a Remote Decode request with short prompt."""

vllm_config = create_vllm_config()
scheduler = create_scheduler(vllm_config)

# Not enough tokens for full block.
NUM_TOKENS = vllm_config.cache_config.block_size // 2
request = create_request(request_id=1,
num_tokens=NUM_TOKENS,
do_remote_decode=True)

scheduler.add_request(request)

# STEP (1): Prefill.
# (1a): schedule()
scheduler_output = scheduler.schedule()
assert len(scheduler.running) == 1
assert len(scheduler_output.scheduled_new_reqs) == 1

# (1b): execute_model()
model_runner_output = create_model_runner_output(reqs=[request])

# (1c): update_from_output()
# Since tokens < block_size, there will be no kv xfer.
# So this should be cleaned up immediately.
_ = scheduler.update_from_output(scheduler_output, model_runner_output)

# Confirm we do not have any memory leaks after req lifecycle.
# We need one more call to schedule() to clear data for persistent batch.
_ = scheduler.schedule()
assert_scheduler_empty(scheduler)
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from vllm.config import (CacheConfig, DeviceConfig, KVTransferConfig,
ModelConfig, SchedulerConfig, VllmConfig)
from vllm.sampling_params import KVTransferParams, SamplingParams
from vllm.v1.core.sched.scheduler import Scheduler
from vllm.v1.core.sched.scheduler_disagg import DisaggregatedScheduler
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
KVCacheGroupSpec)
from vllm.v1.outputs import ModelRunnerOutput
Expand All @@ -16,7 +16,7 @@
EOS_TOKEN_ID = 50256


def assert_scheduler_empty(scheduler: Scheduler):
def assert_scheduler_empty(scheduler: DisaggregatedScheduler):
"""Confirm the scheduler is "empty" - i.e. no leaks."""
# Scheduler Metadata.
assert len(scheduler.requests) == 0
Expand Down Expand Up @@ -88,7 +88,7 @@ def create_vllm_config(
def create_scheduler(
vllm_config: VllmConfig,
num_blocks: int = 10000,
) -> Scheduler:
) -> DisaggregatedScheduler:
"""Initialize Scheduler For Testing."""
block_size = vllm_config.cache_config.block_size
kv_cache_config = KVCacheConfig(
Expand All @@ -101,7 +101,7 @@ def create_scheduler(
],
)
vllm_config.cache_config.num_gpu_blocks = num_blocks
return Scheduler(
return DisaggregatedScheduler(
vllm_config=vllm_config,
kv_cache_config=kv_cache_config,
log_stats=True,
Expand Down
239 changes: 0 additions & 239 deletions tests/v1/kv_transfer/test_multi_connector.py

This file was deleted.

2 changes: 0 additions & 2 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3402,8 +3402,6 @@ class KVTransferConfig(BaseModel):
kv_connector: Optional[str] = None

# Engine ID for the KV transfers.
# Note(tms): sticking this here so the engine_id is consistent between
# scheduler-side and worker-side of the KVConnector
engine_id: str = str(uuid.uuid4())

# The device used by kv connector to buffer the KV cache.
Expand Down
5 changes: 0 additions & 5 deletions vllm/distributed/kv_transfer/kv_connector/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,3 @@ def create_connector_v1(
"NixlConnector",
"vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector",
"NixlConnector")

KVConnectorFactory.register_connector(
"MultiConnector",
"vllm.distributed.kv_transfer.kv_connector.v1.multi_connector",
"MultiConnector")
2 changes: 2 additions & 0 deletions vllm/distributed/kv_transfer/kv_connector/v1/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import enum
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING

import torch
Expand All @@ -46,6 +47,7 @@ class KVConnectorRole(enum.Enum):
WORKER = 1


@dataclass
class KVConnectorMetadata:
pass

Expand Down
Loading