Skip to content

Commit 9fe348c

Browse files
committed
Refactor CMake build for examples/models/llama
1 parent 74403e2 commit 9fe348c

File tree

12 files changed

+135
-333
lines changed

12 files changed

+135
-333
lines changed

.ci/scripts/build_llama_android.sh

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1515
fi
1616
which "${PYTHON_EXECUTABLE}"
1717

18-
install_executorch_and_backend_lib() {
19-
echo "Installing executorch and xnnpack backend"
18+
build_llama_android() {
19+
echo "Building llama runner for Android..."
20+
pushd extension/llm/tokenizers
21+
echo "Updating tokenizers submodule"
22+
git submodule update --init
23+
popd
2024
clean_executorch_install_folders
2125
mkdir cmake-android-out
2226
ANDROID_NDK=${ANDROID_NDK:-/opt/ndk}
@@ -26,30 +30,10 @@ install_executorch_and_backend_lib() {
2630
-DBUCK2="${BUCK2}" \
2731
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
2832
-DANDROID_ABI="${ANDROID_ABI}" \
29-
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
3033
-DCMAKE_BUILD_TYPE=Release \
3134
-DXNNPACK_ENABLE_ARM_BF16=OFF \
3235
-Bcmake-android-out .
3336

34-
cmake --build cmake-android-out -j4 --target install --config Release
37+
cmake --build cmake-android-out -j4 --target llama_main --config Release
3538
}
36-
37-
build_llama_runner() {
38-
echo "Building llama runner for Android..."
39-
pushd extension/llm/tokenizers
40-
echo "Updating tokenizers submodule"
41-
git submodule update --init
42-
popd
43-
ANDROID_ABI=arm64-v8a
44-
cmake -DBUCK2="${BUCK2}" \
45-
-DBUILD_TESTING=OFF \
46-
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
47-
-DANDROID_ABI="${ANDROID_ABI}" \
48-
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
49-
-DCMAKE_BUILD_TYPE=Release \
50-
-Bcmake-android-out/examples/models/llama examples/models/llama
51-
52-
cmake --build cmake-android-out/examples/models/llama -j4 --config Release
53-
}
54-
install_executorch_and_backend_lib
55-
build_llama_runner
39+
build_llama_android

.ci/scripts/test_llama.sh

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -149,35 +149,21 @@ fi
149149

150150
which "${PYTHON_EXECUTABLE}"
151151

152-
cmake_install_executorch_libraries() {
153-
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
152+
cmake_build_llama() {
153+
echo "Building llama runner"
154+
pushd extension/llm/tokenizers
155+
echo "Updating tokenizers submodule"
156+
git submodule update --init
157+
popd
154158
rm -rf cmake-out
155159
retry cmake --preset llm \
156160
-DEXECUTORCH_BUILD_TESTS=ON \
157161
-DBUILD_TESTING=OFF \
158-
-DCMAKE_INSTALL_PREFIX=cmake-out \
159162
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
160163
-DEXECUTORCH_BUILD_QNN="$QNN" \
161164
-DEXECUTORCH_ENABLE_LOGGING=ON \
162165
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
163-
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
164-
}
165-
166-
cmake_build_llama_runner() {
167-
echo "Building llama runner"
168-
pushd extension/llm/tokenizers
169-
echo "Updating tokenizers submodule"
170-
git submodule update --init
171-
popd
172-
dir="examples/models/llama"
173-
if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
174-
PRESET="llama-debug"
175-
else
176-
PRESET="llama-release"
177-
fi
178-
pushd "${dir}"
179-
cmake --workflow --preset "${PRESET}"
180-
popd
166+
cmake --build cmake-out -j9 --target llama_main --config "$CMAKE_BUILD_TYPE"
181167
}
182168

183169
cleanup_files() {
@@ -269,8 +255,7 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
269255
# shellcheck source=/dev/null
270256
$BUCK run examples/models/llama:main -- ${RUNTIME_ARGS} > result.txt
271257
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
272-
cmake_install_executorch_libraries
273-
cmake_build_llama_runner
258+
cmake_build_llama
274259
# Run llama runner
275260
NOW=$(date +"%H:%M:%S")
276261
echo "Starting to run llama runner at ${NOW}"

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ echo "Update tokenizers submodule"
2323
git submodule update --init
2424
popd
2525

26-
# Install ET with CMake
26+
# Build llama runner with torchao
2727
cmake -DPYTHON_EXECUTABLE=python \
28-
-DCMAKE_INSTALL_PREFIX=cmake-out \
2928
-DEXECUTORCH_ENABLE_LOGGING=1 \
3029
-DCMAKE_BUILD_TYPE=Release \
3130
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -41,14 +40,7 @@ cmake -DPYTHON_EXECUTABLE=python \
4140
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
4241
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
4342
-Bcmake-out .
44-
cmake --build cmake-out -j16 --config Release --target install
45-
46-
# Install llama runner with torchao
47-
cmake -DPYTHON_EXECUTABLE=python \
48-
-DCMAKE_BUILD_TYPE=Release \
49-
-Bcmake-out/examples/models/llama \
50-
examples/models/llama
51-
cmake --build cmake-out/examples/models/llama -j16 --config Release
43+
cmake --build cmake-out -j16 --config Release --target llama_main
5244

5345
# Download stories llama110m artifacts
5446
download_stories_model_artifacts

.ci/scripts/test_lora.sh

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,15 @@ set -exu
99
# shellcheck source=/dev/null
1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
cmake_install_executorch_libraries() {
13-
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
14-
rm -rf cmake-out
15-
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
16-
cmake --build --preset llm-release-install
17-
}
18-
19-
cmake_build_llama_runner() {
12+
cmake_build_llama() {
2013
echo "Building llama runner"
2114
pushd extension/llm/tokenizers
2215
echo "Updating tokenizers submodule"
2316
git submodule update --init
2417
popd
25-
make llama-cpu
18+
rm -rf cmake-out
19+
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
20+
cmake --build cmake-out -j9 --target llama_main --config Release
2621
}
2722

2823
cleanup_files() {
@@ -57,8 +52,7 @@ HF_QWEN_PATH=$(python -c "from huggingface_hub import snapshot_download; print(s
5752
echo "Model downloaded to: $HF_QWEN_PATH"
5853

5954
### BUILD LLAMA RUNNER.
60-
cmake_install_executorch_libraries
61-
cmake_build_llama_runner
55+
cmake_build_llama
6256

6357
# Runner constants.
6458
RUNTIME_ARGS="--tokenizer_path=${HF_QWEN_PATH}/ --temperature=0 --seq_len=100 --warmup=1"

.ci/scripts/test_lora_multimethod.sh

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,15 @@ set -exu
99
# shellcheck source=/dev/null
1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
cmake_install_executorch_libraries() {
13-
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
14-
rm -rf cmake-out
15-
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
16-
cmake --build --preset llm-release-install
17-
}
18-
19-
cmake_build_llama_runner() {
12+
cmake_build_llama() {
2013
echo "Building llama runner"
2114
pushd extension/llm/tokenizers
2215
echo "Updating tokenizers submodule"
2316
git submodule update --init
2417
popd
25-
make llama-cpu
18+
rm -rf cmake-out
19+
cmake --preset llm-release -DEXECUTORCH_ENABLE_LOGGING=ON
20+
cmake --build cmake-out -j9 --target llama_main --config Release
2621
}
2722

2823
cleanup_files() {
@@ -55,8 +50,7 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
5550
--config examples/models/qwen3/config/qwen3_multimethod.yaml
5651

5752
### BUILD LLAMA RUNNER ###
58-
cmake_install_executorch_libraries
59-
cmake_build_llama_runner
53+
cmake_build_llama
6054

6155
# Runner constants.
6256
RUNTIME_ARGS="--tokenizer_path=${HF_QWEN_PATH}/ --temperature=0 --seq_len=100 --warmup=1"

.ci/scripts/test_torchao_huggingface_checkpoints.sh

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ fi
147147
if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
148148
echo "[runner] Building and testing llama_main ..."
149149
cmake -DPYTHON_EXECUTABLE=python \
150-
-DCMAKE_INSTALL_PREFIX=cmake-out \
151150
-DEXECUTORCH_ENABLE_LOGGING=1 \
152151
-DCMAKE_BUILD_TYPE=Release \
153152
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -163,15 +162,7 @@ if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
163162
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
164163
-DEXECUTORCH_BUILD_KERNELS_TORCHAO=${EXECUTORCH_BUILD_KERNELS_TORCHAO} \
165164
-Bcmake-out .
166-
cmake --build cmake-out -j16 --config Release --target install
167-
168-
169-
# Install llama runner
170-
cmake -DPYTHON_EXECUTABLE=python \
171-
-DCMAKE_BUILD_TYPE=Release \
172-
-Bcmake-out/examples/models/llama \
173-
examples/models/llama
174-
cmake --build cmake-out/examples/models/llama -j16 --config Release
165+
cmake --build cmake-out -j16 --config Release --target llama_main
175166

176167
# Run the model
177168
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"

.github/workflows/mlx.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,9 +365,7 @@ jobs:
365365
echo "::endgroup::"
366366
367367
echo "::group::Build Llama runner with MLX"
368-
pushd examples/models/llama
369-
${CONDA_RUN} cmake --workflow --preset llama-release
370-
popd
368+
${CONDA_RUN} cmake --build cmake-out --target llama_main
371369
echo "::endgroup::"
372370
373371
echo "::group::Download stories110M artifacts"

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,12 @@ if(EXECUTORCH_BUILD_ANDROID_JNI)
13221322
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
13231323
endif()
13241324

1325+
# Examples — EXCLUDE_FROM_ALL so they only build when explicitly requested
1326+
# (e.g., cmake --build cmake-out --target llama_main).
1327+
add_subdirectory(
1328+
${CMAKE_CURRENT_SOURCE_DIR}/examples/models/llama EXCLUDE_FROM_ALL
1329+
)
1330+
13251331
include(Test.cmake)
13261332

13271333
install(

CMakePresets.json

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,35 @@
442442
"install"
443443
],
444444
"jobs": 0
445+
},
446+
{
447+
"name": "llama-release-build",
448+
"displayName": "Build llama_main (Release)",
449+
"configurePreset": "llm-release",
450+
"configuration": "Release",
451+
"targets": [
452+
"llama_main"
453+
],
454+
"jobs": 0
455+
},
456+
{
457+
"name": "llama-cuda-build",
458+
"displayName": "Build llama_main with CUDA (Release)",
459+
"configurePreset": "llm-release-cuda",
460+
"configuration": "Release",
461+
"targets": [
462+
"llama_main"
463+
],
464+
"jobs": 0
465+
},
466+
{
467+
"name": "llama-cuda-debug-build",
468+
"displayName": "Build llama_main with CUDA (Debug)",
469+
"configurePreset": "llm-debug-cuda",
470+
"targets": [
471+
"llama_main"
472+
],
473+
"jobs": 0
445474
}
446475
],
447476
"workflowPresets": [
@@ -584,6 +613,48 @@
584613
"name": "mlx-debug-install"
585614
}
586615
]
616+
},
617+
{
618+
"name": "llama-release",
619+
"displayName": "Configure and build llama_main (Release, CPU)",
620+
"steps": [
621+
{
622+
"type": "configure",
623+
"name": "llm-release"
624+
},
625+
{
626+
"type": "build",
627+
"name": "llama-release-build"
628+
}
629+
]
630+
},
631+
{
632+
"name": "llama-cuda",
633+
"displayName": "Configure and build llama_main (Release, CUDA)",
634+
"steps": [
635+
{
636+
"type": "configure",
637+
"name": "llm-release-cuda"
638+
},
639+
{
640+
"type": "build",
641+
"name": "llama-cuda-build"
642+
}
643+
]
644+
},
645+
{
646+
"name": "llama-cuda-debug",
647+
"displayName": "Configure and build llama_main (Debug, CUDA)",
648+
"steps": [
649+
{
650+
"type": "configure",
651+
"name": "llm-debug-cuda"
652+
},
653+
{
654+
"type": "build",
655+
"name": "llama-cuda-debug-build"
656+
}
657+
]
587658
}
588659
]
589660
}

Makefile

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -342,28 +342,22 @@ silero-vad-cpu:
342342
@echo " Binary: cmake-out/examples/models/silero_vad/silero_vad_stream_runner"
343343

344344
llama-cpu:
345-
@echo "==> Building and installing ExecuTorch..."
346-
cmake --workflow --preset llm-release
347345
@echo "==> Building Llama runner (CPU)..."
348-
cd examples/models/llama && cmake --workflow --preset llama-release
346+
cmake --workflow --preset llama-release
349347
@echo ""
350348
@echo "✓ Build complete!"
351349
@echo " Binary: cmake-out/examples/models/llama/llama_main"
352350

353351
llama-cuda:
354-
@echo "==> Building and installing ExecuTorch with CUDA..."
355-
cmake --workflow --preset llm-release-cuda
356352
@echo "==> Building Llama runner with CUDA..."
357-
cd examples/models/llama && cmake --workflow --preset llama-cuda
353+
cmake --workflow --preset llama-cuda
358354
@echo ""
359355
@echo "✓ Build complete!"
360356
@echo " Binary: cmake-out/examples/models/llama/llama_main"
361357

362358
llama-cuda-debug:
363-
@echo "==> Building and installing ExecuTorch with CUDA (debug mode)..."
364-
cmake --workflow --preset llm-debug-cuda
365359
@echo "==> Building Llama runner with CUDA (debug mode)..."
366-
cd examples/models/llama && cmake --workflow --preset llama-cuda-debug
360+
cmake --workflow --preset llama-cuda-debug
367361
@echo ""
368362
@echo "✓ Build complete!"
369363
@echo " Binary: cmake-out/examples/models/llama/llama_main"

0 commit comments

Comments
 (0)