[Bug fixes] Fix a few more vLLM imports + Dockerfile typo (#953)

jrplatin · Jacob Platin · web-flow · commit 608909ddac51 · 2025-10-27T17:35:50.000-07:00
Signed-off-by: Jacob Platin &lt;jacobplatn@google.com&gt;
Co-authored-by: Jacob Platin &lt;jacobplatn@google.com&gt;
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -27,7 +27,7 @@ RUN python3 -m pip install --no-cache-dir \
     pytest-asyncio \
     git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api] \
     pytest-cov \
-    tblib \
+    tblib
 
 # Install tpu_inference
 WORKDIR /workspace/tpu_inference
diff --git a/examples/multi_modal_inference.py b/examples/multi_modal_inference.py
@@ -21,7 +21,7 @@
 from vllm import LLM, EngineArgs, SamplingParams
 from vllm.assets.image import ImageAsset
 from vllm.multimodal.image import convert_image_mode
-from vllm.utils import FlexibleArgumentParser
+from vllm.utils.argparse_utils import FlexibleArgumentParser
 
 
 class ModelRequestData(NamedTuple):
diff --git a/examples/offline_lora_inference.py b/examples/offline_lora_inference.py
@@ -7,7 +7,7 @@
 import vllm.envs as envs
 from vllm import LLM, EngineArgs
 from vllm.lora.request import LoRARequest
-from vllm.utils import FlexibleArgumentParser
+from vllm.utils.argparse_utils import FlexibleArgumentParser
 
 
 def create_parser():
diff --git a/examples/tpu_profiling.py b/examples/tpu_profiling.py
@@ -19,7 +19,7 @@
 from vllm import LLM, SamplingParams
 from vllm.engine.arg_utils import EngineArgs
 from vllm.inputs import PromptType
-from vllm.utils import FlexibleArgumentParser
+from vllm.utils.argparse_utils import FlexibleArgumentParser
 
 DURATION_MS = int(os.getenv("VLLM_TPU_PROFILE_DURATION_MS", 3000))
 DELAY_MS = int(os.getenv("VLLM_TPU_PROFILE_DELAY_MS", 0))
diff --git a/scripts/vllm/benchmarking/benchmark_serving.py b/scripts/vllm/benchmarking/benchmark_serving.py
@@ -47,7 +47,7 @@
     from backend_request_func import get_tokenizer
 
 try:
-    from vllm.utils import FlexibleArgumentParser
+    from vllm.utils.argparse_utils import FlexibleArgumentParser
 except ImportError:
     from argparse import ArgumentParser as FlexibleArgumentParser
 
diff --git a/tests/e2e/benchmarking/mlperf.sh b/tests/e2e/benchmarking/mlperf.sh
@@ -295,7 +295,7 @@ for model_name in $model_list; do
         --dataset-name "$dataset_name" \
         --dataset-path "$dataset_path" \
         --num-prompts "$num_prompts" \
-        --run_eval 2>&1 | tee -a "$BENCHMARK_LOG_FILE"
+        --run-eval 2>&1 | tee -a "$BENCHMARK_LOG_FILE"
 
         # TODO (jacobplatin): probably want to add an option to skip this in the future
         if [ "$dataset_name" == "mlperf" ]; then
diff --git a/tpu_inference/core/disagg_executor.py b/tpu_inference/core/disagg_executor.py
@@ -6,11 +6,11 @@
 from vllm.logger import init_logger
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.cache import worker_receiver_cache_from_config
-from vllm.utils import run_method
 from vllm.utils.network_utils import (get_distributed_init_method, get_ip,
                                       get_open_port)
 from vllm.v1.executor.abstract import Executor
 from vllm.v1.outputs import AsyncModelRunnerOutput
+from vllm.v1.serial_utils import run_method
 from vllm.v1.worker.worker_base import WorkerWrapperBase
 
 logger = init_logger(__name__)