diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/README.md
index fee88c56a89..85ad84a17f4 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/README.md
@@ -4,7 +4,7 @@ This example provides an end-to-end workflow to quantize DeepSeek models to MXFP
 ```bash
 pip install neural-compressor-pt==3.7
 # auto-round
-pip install auto-round==0.9.2
+pip install auto-round==0.9.3
 # vLLM
 git clone -b fused-moe-ar --single-branch --quiet https://github.com/yiliu30/vllm-fork.git && cd vllm-fork
 VLLM_USE_PRECOMPILED=1 pip install --editable . -vvv
@@ -16,7 +16,7 @@ pip uninstall flash_attn
 ### Quantize Model
 - Export model path
 ```bash
-export MODEL=deepseek-ai/DeepSeek-R1
+export MODEL=unsloth/DeepSeek-R1-BF1
 ```
 
 - MXFP8
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/quantize.py
index 9becc2cecf9..496a9f26e68 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/quantize.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/quantize.py
@@ -40,11 +40,12 @@ def get_model_and_tokenizer(model_name):
     fp32_model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
-        trust_remote_code=True,
+        trust_remote_code=False,
+        dtype="auto",
     )
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
-        trust_remote_code=True,
+        trust_remote_code=False,
     )
     return fp32_model, tokenizer
 
@@ -68,6 +69,7 @@ def quant_model(args):
         fp_layers=config["fp_layers"],
         export_format=export_format,
         output_dir=output_dir,
+        reloading=False,
     )
 
     # quantizer execute
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/run_evaluation.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/run_evaluation.sh
index 1d805c7872b..d0039e5ecff 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/run_evaluation.sh
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/deepseek/run_evaluation.sh
@@ -114,7 +114,6 @@ lm_eval --model vllm \
   --tasks $TASK_NAME \
   --batch_size $BATCH_SIZE \
   --log_samples \
-  --limit 64 \
   --seed 42 \
   --output_path ${OUTPUT_DIR} \
   --show_config 2>&1 | tee ${OUTPUT_DIR}/log.txt
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/README.md
index 14a04aa99e0..8f494b05a55 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/README.md
@@ -4,7 +4,7 @@ This example provides an end-to-end workflow to quantize Qwen models to MXFP4/MX
 ```bash
 pip install neural-compressor-pt==3.7
 # auto-round
-pip install auto-round==0.9.2
+pip install auto-round==0.9.3
 # vLLM
 git clone -b fused-moe-ar --single-branch --quiet https://github.com/yiliu30/vllm-fork.git && cd vllm-fork
 VLLM_USE_PRECOMPILED=1 pip install --editable . -vvv
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/quantize.py
index 28b7e59b75d..24b6a762ff2 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/quantize.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/quantize.py
@@ -62,11 +62,14 @@ def quant_model(args):
     quant_config = AutoRoundConfig(
         tokenizer=tokenizer,
         scheme=config["scheme"],
-        enable_torch_compile=args.enable_torch_compile,
+        enable_torch_compile=True,
         iters=config["iters"],
         fp_layers=config["fp_layers"],
         export_format=export_format,
+        disable_opt_rtn=True,
+        low_gpu_mem_usage=True,
         output_dir=output_dir,
+        reloading=False,
     )
 
     # quantizer execute
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/run_evaluation.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/run_evaluation.sh
index 1d805c7872b..d0039e5ecff 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/run_evaluation.sh
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/auto_round/qwen/run_evaluation.sh
@@ -114,7 +114,6 @@ lm_eval --model vllm \
   --tasks $TASK_NAME \
   --batch_size $BATCH_SIZE \
   --log_samples \
-  --limit 64 \
   --seed 42 \
   --output_path ${OUTPUT_DIR} \
   --show_config 2>&1 | tee ${OUTPUT_DIR}/log.txt
\ No newline at end of file