Commit 73fb19b 1 parent fc72193 commit 73fb19b Copy full SHA for 73fb19b
File tree 1 file changed +14
-7
lines changed
1 file changed +14
-7
lines changed Original file line number Diff line number Diff line change @@ -14,15 +14,22 @@ export_cmd="python -m examples.models.llama.export_llama \
14
14
--output_name=/outputs/llama3_2.pte"
15
15
16
16
# The quantized versions of Llama should cointain a quantization_args key in params.json
17
- if grep -q " lora_args " /model/params.json; then
17
+ if grep -q " quantization_args " /model/params.json; then
18
18
export_cmd=" ${export_cmd// -d bf16/ -d fp32} "
19
19
export_cmd+=" \
20
- -qat \
21
- -lora 16 \
22
- --preq_mode 8da4w_output_8da8w \
23
- --preq_group_size 32 \
24
- --xnnpack-extended-ops \
25
- --preq_embedding_quantize 8,0"
20
+ --preq_mode 8da4w_output_8da8w \
21
+ --preq_group_size 32 \
22
+ --xnnpack-extended-ops \
23
+ --preq_embedding_quantize 8,0"
24
+
25
+ if grep -q " lora_args" /model/params.json; then
26
+ export_cmd+=" \
27
+ -qat \
28
+ -lora 16"
29
+ else # SpinQuant
30
+ export_cmd+=" \
31
+ --use_spin_quant native"
32
+ fi
26
33
fi
27
34
28
35
if ! eval " $export_cmd " ; then
You can’t perform that action at this time.
0 commit comments