Skip to content

Commit 5994207

Browse files
committed
Fix export script to properly handle non-quantized models
1 parent ff66622 commit 5994207

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

llama_export/scripts/export_llama.sh

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,25 @@
22

33
set -eu
44

5-
# The quantized versions of LLaMa should cointain a quantization_args key in params.json
6-
QUANTIZED=$(grep "lora_args" /model/params.json)
7-
85
export_cmd="python -m examples.models.llama.export_llama \
96
--checkpoint /model/consolidated.00.pth \
107
--params /model/params.json \
118
-kv \
129
--use_sdpa_with_kv_cache \
1310
-X \
1411
-d bf16 \
12+
--max_seq_length 2048 \
1513
--metadata '{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}' \
1614
--output_name=/outputs/llama3_2.pte"
1715

18-
if [ -n "$QUANTIZED" ]; then
16+
# The quantized versions of Llama should cointain a quantization_args key in params.json
17+
if grep -q "lora_args" /model/params.json; then
1918
export_cmd="${export_cmd//-d bf16/-d fp32}"
2019
export_cmd+=" \
2120
-qat \
2221
-lora 16 \
2322
--preq_mode 8da4w_output_8da8w \
2423
--preq_group_size 32 \
25-
--max_seq_length 2048 \
2624
--xnnpack-extended-ops \
2725
--preq_embedding_quantize 8,0"
2826
fi

0 commit comments

Comments
 (0)