diff --git a/.github/workflows/cuda12.8-whl-release.yml b/.github/workflows/cuda12.8_whl_release.yml
similarity index 100%
rename from .github/workflows/cuda12.8-whl-release.yml
rename to .github/workflows/cuda12.8_whl_release.yml
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit_test.yml
similarity index 100%
rename from .github/workflows/unit-test.yml
rename to .github/workflows/unit_test.yml
diff --git a/.github/workflows/windows-x64-gpu.yml b/.github/workflows/windows_x64_gpu.yml
similarity index 100%
rename from .github/workflows/windows-x64-gpu.yml
rename to .github/workflows/windows_x64_gpu.yml
diff --git a/README.md b/README.md
index f903b837b1..a9c57ac665 100644
--- a/README.md
+++ b/README.md
@@ -176,7 +176,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
   <li>Qwen2-VL (2B, 7B, 72B)</li>
   <li>Qwen2.5-VL (3B, 7B, 72B)</li>
   <li>Qwen3-VL (2B - 235B)</li>
-  <li>Qwen3.5</li>
+  <li>Qwen3.5 (27B - 397B)</li>
   <li>DeepSeek-VL (7B)</li>
   <li>DeepSeek-VL2 (3B, 16B, 27B)</li>
   <li>InternVL-Chat (v1.1-v1.5)</li>
@@ -228,7 +228,7 @@ The default prebuilt package is compiled on **CUDA 12** since v0.3.0.
 For the GeForce RTX 50 series, please install the LMDeploy prebuilt package complied with **CUDA 12.8**
 
 ```shell
-export LMDEPLOY_VERSION=0.12.1
+export LMDEPLOY_VERSION=0.12.2
 export PYTHON_VERSION=310
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu128-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu128
 ```
diff --git a/README_ja.md b/README_ja.md
index 8537d4085c..ad6bf24361 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -155,7 +155,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
   <li>Qwen2-VL (2B, 7B, 72B)</li>
   <li>Qwen2.5-VL (3B, 7B, 72B)</li>
   <li>Qwen3-VL (2B - 235B)</li>
-  <li>Qwen3.5</li>
+  <li>Qwen3.5 (27B - 397B)</li>
   <li>DeepSeek-VL (7B)</li>
   <li>DeepSeek-VL2 (3B, 16B, 27B)</li>
   <li>InternVL-Chat (v1.1-v1.5)</li>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 4c2dd759ba..bf23cb8509 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -178,7 +178,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力，在各种规模的模型
   <li>Qwen2-VL (2B, 7B, 72B)</li>
   <li>Qwen2.5-VL (3B, 7B, 72B)</li>
   <li>Qwen3-VL (2B - 235B)</li>
-  <li>Qwen3.5</li>
+  <li>Qwen3.5 (27B - 397B)</li>
   <li>DeepSeek-VL (7B)</li>
   <li>DeepSeek-VL2 (3B, 16B, 27B)</li>
   <li>InternVL-Chat (v1.1-v1.5)</li>
@@ -230,7 +230,7 @@ pip install lmdeploy
 若使用 GeForce RTX 50 系列显卡，请安装基于 **CUDA 12.8** 编译的 LMDeploy 预编译包。
 
 ```shell
-export LMDEPLOY_VERSION=0.12.1
+export LMDEPLOY_VERSION=0.12.2
 export PYTHON_VERSION=310
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu128-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu128
 ```
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index 6503cc70f5..8ded44555d 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 The default prebuilt package is compiled on **CUDA 12**. If CUDA 11+ (>=11.3) is required, you can install lmdeploy by:
 
 ```shell
-export LMDEPLOY_VERSION=0.12.1
+export LMDEPLOY_VERSION=0.12.2
 export PYTHON_VERSION=310
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
diff --git a/docs/en/quantization/llm_compressor.md b/docs/en/quantization/llm_compressor.md
index 7522f8c5d7..2b6ab9bfc4 100644
--- a/docs/en/quantization/llm_compressor.md
+++ b/docs/en/quantization/llm_compressor.md
@@ -44,7 +44,7 @@ conda create -n lmdeploy python=3.10 -y
 conda activate lmdeploy
 
 # Install llm-compressor
-pip install llm-compressor
+pip install llmcompressor
 
 # Clone lmdeploy source code and run the quantization example
 git clone https://github.com/InternLM/lmdeploy
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index 0ce854a24a..988252b028 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -23,7 +23,7 @@ pip install lmdeploy
 默认的预构建包是在 **CUDA 12** 上编译的。如果需要 CUDA 11+ (>=11.3)，你可以使用以下命令安装 lmdeploy：
 
 ```shell
-export LMDEPLOY_VERSION=0.12.1
+export LMDEPLOY_VERSION=0.12.2
 export PYTHON_VERSION=310
 pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
diff --git a/docs/zh_cn/quantization/llm_compressor.md b/docs/zh_cn/quantization/llm_compressor.md
index 10a90b4538..8fdcbba6a4 100644
--- a/docs/zh_cn/quantization/llm_compressor.md
+++ b/docs/zh_cn/quantization/llm_compressor.md
@@ -42,7 +42,7 @@ conda create -n lmdeploy python=3.10 -y
 conda activate lmdeploy
 
 # 安装 llm-compressor
-pip install llm-compressor
+pip install llmcompressor
 
 # 下载 lmdeploy 源码，运行量化用用例
 git clone https://github.com/InternLM/lmdeploy
diff --git a/lmdeploy/pytorch/engine/engine_loop.py b/lmdeploy/pytorch/engine/engine_loop.py
index 1106f20c9c..d0b6a5e2d6 100644
--- a/lmdeploy/pytorch/engine/engine_loop.py
+++ b/lmdeploy/pytorch/engine/engine_loop.py
@@ -146,8 +146,7 @@ def _log_resps(outputs: List[InferOutput]):
         if logger.level <= logging.DEBUG:
             session_ids = [out.session_id for out in outputs]
             logger.debug(f'Response sessions: {session_ids}')
-        elif logger.level <= logging.INFO:
-            logger.info(f'Response: num_outputs={len(outputs)}.')
+            logger.debug(f'Response: num_outputs={len(outputs)}.')
 
     def _send_resp(self, out: InferOutput):
         """Send response."""
diff --git a/lmdeploy/serve/core/async_engine.py b/lmdeploy/serve/core/async_engine.py
index 2a68483ad4..3adce23d43 100644
--- a/lmdeploy/serve/core/async_engine.py
+++ b/lmdeploy/serve/core/async_engine.py
@@ -268,7 +268,7 @@ async def safe_run(self, handle, session, **kwargs):
             metrics_processor.increase_api_routed_requests()
             yield generator
         except (Exception, asyncio.CancelledError, GeneratorExit) as e:  # noqa
-            logger.error(f'[safe_run] session {session.session_id} exception caught: {type(e).__name__} {e}')
+            logger.exception(f'[safe_run] session {session.session_id} exception caught: {e}')
             await session.async_abort()
             if self.backend == 'pytorch':
                 await handle.async_end(session.session_id)
diff --git a/lmdeploy/serve/processors/multimodal.py b/lmdeploy/serve/processors/multimodal.py
index 2ea599e687..93f636e27e 100644
--- a/lmdeploy/serve/processors/multimodal.py
+++ b/lmdeploy/serve/processors/multimodal.py
@@ -99,8 +99,6 @@ async def async_convert_multimodal_data(messages: List[Dict]) -> List[Dict]:
         def _inner_call(i, in_messages, out_messages):
             role = in_messages[i]['role']
             content = in_messages[i]['content']
-            assert role in ['system', 'user', 'assistant'], \
-                f'unsupported role "{role}"'
             if role != 'user' or isinstance(content, str):
                 # the content is a user's prompt or an assistant's prompt,
                 # returning it directly
diff --git a/lmdeploy/version.py b/lmdeploy/version.py
index 54a2e46ee5..6c412114c3 100644
--- a/lmdeploy/version.py
+++ b/lmdeploy/version.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Tuple
 
-__version__ = '0.12.1'
+__version__ = '0.12.2'
 short_version = __version__
 
 
diff --git a/src/turbomind/models/llama/SequenceManager.cc b/src/turbomind/models/llama/SequenceManager.cc
index 594fa78b6b..f0e9e4ae40 100644
--- a/src/turbomind/models/llama/SequenceManager.cc
+++ b/src/turbomind/models/llama/SequenceManager.cc
@@ -528,7 +528,7 @@ auto SequenceManager::Materialize(Sequences             sequences,
 
     // release preempted blocks -> cached
     if (!schedule.victims.empty()) {
-        TM_LOG_WARNING("[SeqMgr] #victim: %d", (int)schedule.victims.size());
+        TM_LOG_INFO("[SeqMgr] #victim: %d", (int)schedule.victims.size());
         for (const auto& p : schedule.victims) {
             UpdateAndSetUnlock(*p);
         }