File tree 1 file changed +3
-5
lines changed
1 file changed +3
-5
lines changed Original file line number Diff line number Diff line change 2
2
3
3
set -eu
4
4
5
- # The quantized versions of LLaMa should cointain a quantization_args key in params.json
6
- QUANTIZED=$( grep " lora_args" /model/params.json)
7
-
8
5
export_cmd=" python -m examples.models.llama.export_llama \
9
6
--checkpoint /model/consolidated.00.pth \
10
7
--params /model/params.json \
11
8
-kv \
12
9
--use_sdpa_with_kv_cache \
13
10
-X \
14
11
-d bf16 \
12
+ --max_seq_length 2048 \
15
13
--metadata '{\" get_bos_id\" :128000, \" get_eos_ids\" :[128009, 128001]}' \
16
14
--output_name=/outputs/llama3_2.pte"
17
15
18
- if [ -n " $QUANTIZED " ]; then
16
+ # The quantized versions of Llama should cointain a quantization_args key in params.json
17
+ if grep -q " lora_args" /model/params.json; then
19
18
export_cmd=" ${export_cmd// -d bf16/ -d fp32} "
20
19
export_cmd+=" \
21
20
-qat \
22
21
-lora 16 \
23
22
--preq_mode 8da4w_output_8da8w \
24
23
--preq_group_size 32 \
25
- --max_seq_length 2048 \
26
24
--xnnpack-extended-ops \
27
25
--preq_embedding_quantize 8,0"
28
26
fi
You can’t perform that action at this time.
0 commit comments