-
Notifications
You must be signed in to change notification settings - Fork 347
Description
Your current environment
def profile_run(self) -> None:
# Trigger compilation for general shape.
hidden_states = self._dummy_run(self.max_num_tokens)
output = None
if get_pp_group().is_last_rank:
if self.is_pooling_model:
output = self._dummy_pooler_run(hidden_states)
else:
# For profile, have maximum num_reqs and that collectively have
# maximum num_tokens.
min_tokens_per_req = self.max_num_tokens // self.max_num_reqs
num_scheduled_tokens_list = [min_tokens_per_req
] * self.max_num_reqs
num_scheduled_tokens_list[
-1] += self.max_num_tokens % self.max_num_reqs
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
dtype=np.int32)
logit_indices = np.cumsum(num_scheduled_tokens) - 1
# TODO: need to rum a dummy sampler for generate task
hidden_states = hidden_states[logit_indices]
output = self.model.compute_logits(hidden_states, None)
🐛 Describe the bug
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] WorkerProc hit an exception.
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] Traceback (most recent call last):
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 517, in worker_busy_loop
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] output = func(*args, **kwargs)
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 152, in determine_available_memory
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] self.model_runner.profile_run()
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1701, in profile_run
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] hidden_states = hidden_states[logit_indices]
(VllmWorker rank=3 pid=56447) ERROR 07-30 06:16:20 [multiproc_executor.py:522] RuntimeError: ACL stream synchronize failed, error code:507018
(VllmWorker rank=1 pid=56185) INFO 07-30 06:16:20 [monitor.py:34] torch.compile takes 4.12 s in total
[rank1]:[W730 06:16:20.361744142 compiler_depend.ts:57] Warning: E39999: Inner Error!
E39999: [PID: 56185] 2025-07-30-06:16:20.219.133 The error from device(chipId:1, dieId:0), serial number is 25, an exception occurred during AICPU execution, stream_id:2, task_id:285, errcode:21008, msg:inner error.[FUNC:ProcessStarsAicpuErrorInfo][FILE:device_error_proc.cc][LINE:1496]
TraceBack (most recent call last):
Kernel task happen error, retCode=0x2a, [aicpu exception].[FUNC:PreCheckTaskErr][FILE:davinci_kernel_task.cc][LINE:1366]
AICPU Kernel task happen error, retCode=0x2a.[FUNC:GetError][FILE:stream.cc][LINE:1119]
Aicpu kernel execute failed, device_id=1, stream_id=2, task_id=285, errorCode=2a.[FUNC:PrintAicpuErrorInfo][FILE:davinci_kernel_task.cc][LINE:1128]
Aicpu kernel execute failed, device_id=1, stream_id=2, task_id=285, fault op_name=[FUNC:GetError][FILE:stream.cc][LINE:1119]
rtStreamSynchronize execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
synchronize stream failed, runtime result = 507018[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
(function copy_between_host_and_device_opapi)
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] WorkerProc hit an exception.
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] Traceback (most recent call last):
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 517, in worker_busy_loop
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] output = func(*args, **kwargs)
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 152, in determine_available_memory
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] self.model_runner.profile_run()
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1701, in profile_run
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] hidden_states = hidden_states[logit_indices]
(VllmWorker rank=1 pid=56185) ERROR 07-30 06:16:20 [multiproc_executor.py:522] RuntimeError: ACL stream synchronize failed, error code:507018
(VllmWorker rank=0 pid=56178) INFO 07-30 06:16:20 [monitor.py:34] torch.compile takes 4.42 s in total
[rank0]:[W730 06:16:20.372906864 compiler_depend.ts:57] Warning: E39999: Inner Error!
E39999: [PID: 56178] 2025-07-30-06:16:20.230.211 The error from device(chipId:0, dieId:0), serial number is 27, an exception occurred during AICPU execution, stream_id:2, task_id:286, errcode:21008, msg:inner error.[FUNC:ProcessStarsAicpuErrorInfo][FILE:device_error_proc.cc][LINE:1496]
TraceBack (most recent call last):
Kernel task happen error, retCode=0x2a, [aicpu exception].[FUNC:PreCheckTaskErr][FILE:davinci_kernel_task.cc][LINE:1366]
AICPU Kernel task happen error, retCode=0x2a.[FUNC:GetError][FILE:stream.cc][LINE:1119]
Aicpu kernel execute failed, device_id=0, stream_id=2, task_id=286, errorCode=2a.[FUNC:PrintAicpuErrorInfo][FILE:davinci_kernel_task.cc][LINE:1128]
Aicpu kernel execute failed, device_id=0, stream_id=2, task_id=286, fault op_name=[FUNC:GetError][FILE:stream.cc][LINE:1119]
rtStreamSynchronize execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
synchronize stream failed, runtime result = 507018[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
(function copy_between_host_and_device_opapi)
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] WorkerProc hit an exception.
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] Traceback (most recent call last):
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 517, in worker_busy_loop
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] output = func(*args, **kwargs)
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 152, in determine_available_memory
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] self.model_runner.profile_run()
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 1701, in profile_run
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] hidden_states = hidden_states[logit_indices]
(VllmWorker rank=0 pid=56178) ERROR 07-30 06:16:20 [multiproc_executor.py:522] RuntimeError: ACL stream synchronize failed, error code:507018
ERROR 07-30 06:16:20 [core.py:586] EngineCore failed to start.
ERROR 07-30 06:16:20 [core.py:586] Traceback (most recent call last):
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 577, in run_engine_core
ERROR 07-30 06:16:20 [core.py:586] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 404, in init
ERROR 07-30 06:16:20 [core.py:586] super().init(vllm_config, executor_class, log_stats,
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 82, in init
ERROR 07-30 06:16:20 [core.py:586] self._initialize_kv_caches(vllm_config)
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 142, in _initialize_kv_caches
ERROR 07-30 06:16:20 [core.py:586] available_gpu_memory = self.model_executor.determine_available_memory()
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/executor/abstract.py", line 76, in determine_available_memory
ERROR 07-30 06:16:20 [core.py:586] output = self.collective_rpc("determine_available_memory")
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 215, in collective_rpc
ERROR 07-30 06:16:20 [core.py:586] result = get_response(w, dequeue_timeout)
ERROR 07-30 06:16:20 [core.py:586] File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 202, in get_response
ERROR 07-30 06:16:20 [core.py:586] raise RuntimeError(
ERROR 07-30 06:16:20 [core.py:586] RuntimeError: Worker failed with error 'ACL stream synchronize failed, error code:507018', please check the stack trace above for the root cause
[W730 06:16:22.858261943 compiler_depend.ts:526] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.718.847 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeUsedDevices)
[W730 06:16:22.860542008 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.722.390 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.862563250 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.724.490 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.864553442 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.726.561 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.866398652 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.728.498 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.868223023 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.730.302 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.869118032 compiler_depend.ts:526] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.730.327 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeUsedDevices)
[W730 06:16:22.870843091 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.732.136 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.871217776 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.733.148 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.873211937 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.735.124 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.873954906 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.735.550 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.874991027 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.737.046 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.876054519 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.737.922 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.876856668 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.738.871 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.878056901 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.739.920 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.878726988 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.740.794 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.880341726 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.741.978 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.880558428 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.742.606 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.882374518 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.744.402 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.882392808 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.744.221 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.884241489 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.746.298 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.884581523 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.746.327 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.886622125 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.748.444 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.886880658 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.748.081 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.888663337 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.750.525 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.889824740 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.751.406 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.890626549 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.752.527 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.891996724 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.753.743 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.892680212 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56178] 2025-07-30-06:16:22.754.528 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.894178098 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.755.936 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.896279971 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.758.053 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.898495786 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.760.196 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.900581869 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.762.357 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.902467289 compiler_depend.ts:526] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.763.628 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeUsedDevices)
[W730 06:16:22.902736752 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56185] 2025-07-30-06:16:22.764.501 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.904542782 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.766.524 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.906262611 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.768.454 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.907835098 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.770.105 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.909448696 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.771.729 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.911005673 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.773.285 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.912721182 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.774.874 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.914327090 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.776.561 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.916014068 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.778.221 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.917713527 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.780.010 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.919269074 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.781.577 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.920812741 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.783.103 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.922378718 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.784.677 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.923923655 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.786.199 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.925511073 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.787.810 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
[W730 06:16:22.927078260 compiler_depend.ts:508] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.789.339 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function npuSynchronizeDevice)
[W730 06:16:22.928694818 compiler_depend.ts:227] Warning: NPU warning, error code is 507018[Error]:
[Error]: The aicpu execution is abnormal.
Rectify the fault based on the error information in the ascend log.
EH9999: Inner Error!
rtDeviceSynchronizeWithTimeout execute failed, reason=[aicpu exception][FUNC:FuncErrorReason][FILE:error_message_manage.cc][LINE:53]
EH9999: [PID: 56447] 2025-07-30-06:16:22.790.944 wait for compute device to finish failed, runtime result = 507018.[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
TraceBack (most recent call last):
(function empty_cache)
ERROR 07-30 06:16:32 [multiproc_executor.py:135] Worker proc VllmWorker-2 died unexpectedly, shutting down executor.
Process EngineCore_0:
Traceback (most recent call last):
File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/usr/local/python3.10.17/lib/python3.10/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 590, in run_engine_core
raise e
File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 577, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 404, in init
super().init(vllm_config, executor_class, log_stats,
File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 82, in init
self._initialize_kv_caches(vllm_config)
File "/vllm-workspace/vllm/vllm/v1/engine/core.py", line 142, in _initialize_kv_caches
available_gpu_memory = self.model_executor.determine_available_memory()
File "/vllm-workspace/vllm/vllm/v1/executor/abstract.py", line 76, in determine_available_memory
output = self.collective_rpc("determine_available_memory")
File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 215, in collective_rpc
result = get_response(w, dequeue_timeout)
File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 202, in get_response
raise RuntimeError(
RuntimeError: Worker failed with error 'ACL stream synchronize failed, error code:507018', please check the stack trace above for the root cause
Traceback (most recent call last):
File "/usr/local/python3.10.17/bin/vllm", line 8, in
sys.exit(main())
File "/vllm-workspace/vllm/vllm/entrypoints/cli/main.py", line 65, in main
args.dispatch_function(args)
File "/vllm-workspace/vllm/vllm/entrypoints/cli/serve.py", line 55, in cmd
uvloop.run(run_server(args))
File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/init.py", line 82, in run
return loop.run_until_complete(wrapper())
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/python3.10.17/lib/python3.10/site-packages/uvloop/init.py", line 61, in wrapper
return await main
File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1431, in run_server
await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 1451, in run_server_worker
async with build_async_engine_client(args, client_config) as engine_client:
File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in aenter
return await anext(self.gen)
File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 199, in aenter
return await anext(self.gen)
File "/vllm-workspace/vllm/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args
async_llm = AsyncLLM.from_vllm_config(
File "/vllm-workspace/vllm/vllm/v1/engine/async_llm.py", line 162, in from_vllm_config
return cls(
File "/vllm-workspace/vllm/vllm/v1/engine/async_llm.py", line 124, in init
self.engine_core = EngineCoreClient.make_async_mp_client(
File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 96, in make_async_mp_client
return AsyncMPClient(*client_args)
File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 666, in init
super().init(
File "/vllm-workspace/vllm/vllm/v1/engine/core_client.py", line 403, in init
with launch_core_engines(vllm_config, executor_class,
File "/usr/local/python3.10.17/lib/python3.10/contextlib.py", line 142, in exit
next(self.gen)
File "/vllm-workspace/vllm/vllm/v1/engine/utils.py", line 434, in launch_core_engines
wait_for_engine_startup(
File "/vllm-workspace/vllm/vllm/v1/engine/utils.py", line 484, in wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
[ERROR] 2025-07-30-06:16:37 (PID:55707, Device:-1, RankID:-1) ERR99999 UNKNOWN applicaiton exception