@@ -53,11 +53,7 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
5353HF_QWEN_PATH=$( python -c " from huggingface_hub import snapshot_download; print(snapshot_download('unsloth/Qwen3-0.6B'))" )
5454echo " Model downloaded to: $HF_QWEN_PATH "
5555
56- <<< <<< < HEAD
5756# ## BUILD LLAMA RUNNER.
58- =======
59- # Build llama runner.
60- >>>>>>> 3c0898753d (qwen lora test)
6157cmake_install_executorch_libraries
6258cmake_build_llama_runner
6359
@@ -112,17 +108,119 @@ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math.pte --dat
112108NOW=$( date +" %H:%M:%S" )
113109echo " Finished at ${NOW} "
114110
115- RESULT2 =$( cat result2 .txt)
116- if [[ " ${RESULT2 } " == " ${EXPECTED_PREFIX} " * ]]; then
111+ RESULT =$( cat result .txt)
112+ if [[ " ${RESULT } " == " ${EXPECTED_PREFIX} " * ]]; then
117113 echo " Expected result prefix: ${EXPECTED_PREFIX} "
118- echo " Actual result: ${RESULT2 } "
114+ echo " Actual result: ${RESULT } "
119115 echo " Success"
120116else
121117 echo " Expected result prefix: ${EXPECTED_PREFIX} "
122- echo " Actual result: ${RESULT2} "
118+ echo " Actual result: ${RESULT} "
119+ echo " Failure; results not the same"
120+ cleanup_files
121+ exit 1
122+ fi
123+
124+ # Confirm file sizes.
125+ FOUNDATION_SIZE=$( stat -c%s qwen_foundation.ptd)
126+ if [[ $FOUNDATION_SIZE -le " 2400000000" ]]; then
127+ echo " qwen_foundation_q.ptd size is: $FOUNDATION_SIZE "
128+ else
129+ echo " qwen_foundation_q.ptd size: $FOUNDATION_SIZE is greater than threshold 2.4GB"
130+ cleanup_files
131+ exit 1
132+ fi
133+
134+ # ## QUANTIZATION & PROGRAM DATA SEPARATION ###
135+ EXPECTED_QUANT_PREFIX="
136+ <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant:
137+ <think>
138+ Okay, so I need to calculate 15% of 80.
139+ "
140+ EXPECTED_QUANT_LORA_PREFIX=" <|im_start|>user Calculate 15% of 80?<|im_end|><|im_start|>assistant
141+ To calculate 15% of 80, we can multiply 80 by 15/100.
142+ So, 15% of 80 is equal to (80 * 15) / 100 = 1200 / 100 = 12.
143+ #### 12
144+ The answer is: 12<|im_end|>
145+ "
146+
147+ # Export Quantized PTE, PTD file, no LoRA.
148+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
149+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
150+ +export.output_name=" qwen_q.pte" \
151+ +export.foundation_weights_file=" qwen_foundation_q.ptd" \
152+ +quantization.qmode=" 8da4w" \
153+ +quantization.group_size=32
154+
155+ # Export Quantized LoRA PTE, LoRA PTD, foundation PTD file.
156+ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
157+ --config examples/models/qwen3/config/qwen3_xnnpack.yaml \
158+ +base.adapter_checkpoint=" ${HF_ADAPTER_PATH} /adapter_model.safetensors" \
159+ +base.adapter_config=" ${HF_ADAPTER_PATH} /adapter_config.json" \
160+ +export.output_name=" qwen_lora_math_q.pte" \
161+ +export.foundation_weights_file=" qwen_foundation_lora_q.ptd" \
162+ +export.lora_weights_file=" qwen_lora_math_q.ptd" \
163+ +quantization.qmode=" 8da4w" \
164+ +quantization.group_size=32
165+
166+ # Confirm that qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are the same.
167+ if diff -q qwen_foundation_lora_q.ptd qwen_foundation_q.ptd > /dev/null; then
168+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are identical."
169+ else
170+ echo " qwen_foundation_lora_q.ptd and qwen_foundation_q.ptd are not identical."
171+ # cleanup_files
172+ exit 1
173+ fi
174+
175+ # Run quantized qwen model (no adapter).
176+ NOW=$( date +" %H:%M:%S" )
177+ echo " Starting to run llama runner at ${NOW} "
178+ # shellcheck source=/dev/null
179+ cmake-out/examples/models/llama/llama_main --model_path=qwen_q.pte --data_paths=" qwen_foundation_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
180+ NOW=$( date +" %H:%M:%S" )
181+ echo " Finished at ${NOW} "
182+ RESULT=$( cat result.txt)
183+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_LORA_PREFIX} " * ]]; then
184+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
185+ echo " Actual result: ${RESULT} "
186+ echo " Success"
187+ else
188+ echo " Expected result prefix: ${EXPECTED_QUANT_LORA_PREFIX} "
189+ echo " Actual result: ${RESULT} "
123190 echo " Failure; results not the same"
124191 cleanup_files
125192 exit 1
126193fi
127194
195+ # Run quantized lora adapter.
196+ NOW=$( date +" %H:%M:%S" )
197+ echo " Starting to run llama runner at ${NOW} "
198+ # shellcheck source=/dev/null
199+ cmake-out/examples/models/llama/llama_main --model_path=qwen_lora_math_q.pte --data_paths=" qwen_foundation_q.ptd,qwen_lora_math_q.ptd" --prompt=" ${PROMPT} " ${RUNTIME_ARGS} > result.txt
200+ NOW=$( date +" %H:%M:%S" )
201+ echo " Finished at ${NOW} "
202+
203+ RESULT=$( cat result.txt)
204+ if [[ " ${RESULT} " == " ${EXPECTED_QUANT_PREFIX} " * ]]; then
205+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
206+ echo " Actual result: ${RESULT} "
207+ echo " Success"
208+ else
209+ echo " Expected result prefix: ${EXPECTED_QUANT_PREFIX} "
210+ echo " Actual result: ${RESULT} "
211+ echo " Failure; results not the same"
212+ cleanup_files
213+ exit 1
214+ fi
215+
216+ # Confirm qwen_foundation_q.ptd file size.
217+ FOUNDATION_Q_SIZE=$( stat -c%s qwen_foundation_q.ptd)
218+ if [[ $FOUNDATION_Q_SIZE -le " 1000000000" ]]; then
219+ echo " qwen_foundation_q.ptd size is: $FOUNDATION_Q_SIZE "
220+ else
221+ echo " qwen_foundation_q.ptd size: $FOUNDATION_Q_SIZE is greater than threshold 1GB"
222+ cleanup_files
223+ exit 1
224+ fi
225+
128226cleanup_files
0 commit comments