-
Notifications
You must be signed in to change notification settings - Fork 403
Description
Your current environment
直接下载的官方镜像
docker pull quay.io/ascend/vllm-ascend:v0.9.2rc1 --platform arm64
🐛 Describe the bug
严格按照官网的教程(https://vllm-ascend.readthedocs.io/en/latest/tutorials/single_npu_qwen3_embedding.html)
运行如下命令后,加载完模型报错。
vllm serve Qwen/Qwen3-Embedding-8B --task embed
设备是8卡910b
报错信息如下:
ERROR 07-31 02:52:28 [core.py:586] EngineCore failed to start. ERROR 07-31 02:52:28 [core.py:586] Traceback (most recent call last): ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 577, in run_engine_core ERROR 07-31 02:52:28 [core.py:586] engine_core = EngineCoreProc(*args, **kwargs) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 404, in init ERROR 07-31 02:52:28 [core.py:586] super().init(vllm_config, executor_class, log_stats, ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 82, in init ERROR 07-31 02:52:28 [core.py:586] self._initialize_kv_caches(vllm_config) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 142, in _initialize_kv_caches ERROR 07-31 02:52:28 [core.py:586] available_gpu_memory = self.model_executor.determine_available_memory() ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/v 1/executor/abstract.py", line 76, in determine_available_memory ERROR 07-31 02:52:28 [core.py:586] output = self.collective_rpc("determine_available_memory") ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 57, in collective_rpc ERROR 07-31 02:52:28 [core.py:586] answer = run_method(self.driver_worker, method, args, kwargs) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/utils/init.py", line 2736, in run_method ERROR 07-31 02:52:28 [core.py:586] return func(*args, **kwargs) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 152, in determine_available_memory ERROR 07-31 02:52:28 [core.py:586] self.model_runner.profile_run() ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1683, in profile_run ERROR 07-31 02:52:28 [core.py:586] output = self._dummy_pooler_run(hidden_states) ERROR 07-31 02:52:28 [core.py:586] File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context ERROR 07-31 02:52:28 [core.py:586] return func(*args, **kwargs) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1731, in _dummy_pooler_run ERROR 07-31 02:52:28 [core.py:586] pooler_output = self.model.pooler(hidden_states=hidden_states_list, ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/models/adapters.py", line 83, in pooler ERROR 07-31 02:52:28 [core.py:586] return self._pooler(hidden_states, pooling_metadata) ERROR 07-31 02:52:28 [core.py:586] File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl ERROR 07-31 02:52:28 [core.py:586] return self._call_impl(*args, **kwargs) ERROR 07-31 02:52:28 [core.py:586] File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl ERROR 07-31 02:52:28 [core.py:586] return forward_call(*args, **kwargs) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/layers/pooler.py", line 107, in forward ERROR 07-31 02:52:28 [core.py:586] pooled_data = self.extract_states(hidden_states, pooling_metadata) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/layers/pooler.py", line 120, in extract_states ERROR 07-31 02:52:28 [core.py:586] prompt_lens = self.get_prompt_lens(hidden_states, pooling_metadata) ERROR 07-31 02:52:28 [core.py:586] File "/vllm-workspace/vllm/vllm/model_executor/layers/pooler.py", line 88, in get_prompt_lens ERROR 07-31 02:52:28 [core.py:586] assert isinstance(hidden_states, torch.Tensor) ERROR 07-31 02:52:28 [core.py:586] AssertionError
Process EngineCore_0: Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 590, in run_engine_core raise e File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 577, in run_engine_core engine_core = EngineCoreProc(*args, **kwargs) File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 404, in init super().init(vllm_config, executor_class, log_stats, File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 82, in init self._initialize_kv_caches(vllm_config) File "/vllm-workspace/vllm/vllm/v 1/engine/core.py", line 142, in _initialize_kv_caches available_gpu_memory = self.model_executor.determine_available_memory() File "/vllm-workspace/vllm/vllm/v 1/executor/abstract.py", line 76, in determine_available_memory output = self.collective_rpc("determine_available_memory") File "/vllm-workspace/vllm/vllm/executor/uniproc_executor.py", line 57, in collective_rpc answer = run_method(self.driver_worker, method, args, kwargs) File "/vllm-workspace/vllm/vllm/utils/init.py", line 2736, in run_method return func(*args, **kwargs) File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 152, in determine_available_memory self.model_runner.profile_run() File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1683, in profile_run output = self._dummy_pooler_run(hidden_states) File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context return func(*args, **kwargs) File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1731, in _dummy_pooler_run pooler_output = self.model.pooler(hidden_states=hidden_states_list, File "/vllm-workspace/vllm/vllm/model_executor/models/adapters.py", line 83, in pooler return self._pooler(hidden_states, pooling_metadata) File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/python3.10.17/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl return forward_call(*args, **kwargs) File "/vllm-workspace/vllm/vllm/model_executor/layers/pooler.py", line 107, in forward pooled_data = self.extract_states(hidden_states, pooling_metadata) File "/vllm-workspace/vllm/vllm/model_executor/layers/pooler.py", line 120, in extract_states prompt_lens = self.get_prompt_lens(hidden_states, pooling_metadata) File "/vllm-workspace/vllm/vllm/model_executor/layers/pooler.py", line 88, in get_prompt_lens assert isinstance(hidden_states, torch.Tensor)
AssertionError Traceback (most recent call last): File "/usr/local/python3.10.17/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/usr/local/python3.10.17/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1495, in < module> uvloop.run(run_server(args)) File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/init.py", line 82, in run return loop.run_until_complete(wrapper()) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/init.py", line 61, in wrapper return await main File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1431, in run_server await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1451, in run_server_worker async with build_async_engine_client(args, client_config) as engine_client: File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in aenter return await anext(self.gen) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client async with build_async_engine_client_from_engine_args( File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in aenter return await anext(self.gen) File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args async_llm = AsyncLLM.from_vllm_config( File "/vllm-workspace/vllm/vllm/v 1/engine/async_llm.py", line 162, in from_vllm_config return cls( File "/vllm-workspace/vllm/vllm/v 1/engine/async_llm.py", line 124, in init self.engine_core = EngineCoreClient.make_async_mp_client( File "/vllm-workspace/vllm/vllm/v 1/engine/core_client.py", line 96, in make_async_mp_client return AsyncMPClient(*client_args) File "/vllm-workspace/vllm/vllm/v 1/engine/core_client.py", line 666, in init super().init( File "/vllm-workspace/vllm/vllm/v 1/engine/core_client.py", line 403, in init with launch_core_engines(vllm_config, executor_class, File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 142, in exit next(self.gen) File "/vllm-workspace/vllm/vllm/v 1/engine/utils.py", line 434, in launch_core_engines wait_for_engine_startup( File "/vllm-workspace/vllm/vllm/v 1/engine/utils.py", line 484, in wait_for_engine_startup raise RuntimeError("Engine core initialization failed. " RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} [ERROR] 2025-07-31-02:52:38 (PID:441, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception