Skip to content

Commit 73fb19b

Browse files
committed
Update export script to handle SpinQuant models
1 parent fc72193 commit 73fb19b

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

llama_export/scripts/export_llama.sh

+14-7
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,22 @@ export_cmd="python -m examples.models.llama.export_llama \
1414
--output_name=/outputs/llama3_2.pte"
1515

1616
# The quantized versions of Llama should cointain a quantization_args key in params.json
17-
if grep -q "lora_args" /model/params.json; then
17+
if grep -q "quantization_args" /model/params.json; then
1818
export_cmd="${export_cmd//-d bf16/-d fp32}"
1919
export_cmd+=" \
20-
-qat \
21-
-lora 16 \
22-
--preq_mode 8da4w_output_8da8w \
23-
--preq_group_size 32 \
24-
--xnnpack-extended-ops \
25-
--preq_embedding_quantize 8,0"
20+
--preq_mode 8da4w_output_8da8w \
21+
--preq_group_size 32 \
22+
--xnnpack-extended-ops \
23+
--preq_embedding_quantize 8,0"
24+
25+
if grep -q "lora_args" /model/params.json; then
26+
export_cmd+=" \
27+
-qat \
28+
-lora 16"
29+
else # SpinQuant
30+
export_cmd+=" \
31+
--use_spin_quant native"
32+
fi
2633
fi
2734

2835
if ! eval "$export_cmd"; then

0 commit comments

Comments
 (0)