@@ -4,8 +4,10 @@ set(BUFFERIZE_SIMPLE_OPTS "bufferize-function-boundaries")
44set (TOSA_PIPELINE "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" )
55
66add_custom_command (
7- OUTPUT ${CMAKE_CURRENT_BINARY_DIR} /forward.mlir
8- ${CMAKE_CURRENT_BINARY_DIR} /subgraph0.mlir
7+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR} /forward_prefill.mlir
8+ ${CMAKE_CURRENT_BINARY_DIR} /subgraph0_prefill.mlir
9+ ${CMAKE_CURRENT_BINARY_DIR} /forward_decode.mlir
10+ ${CMAKE_CURRENT_BINARY_DIR} /subgraph0_decode.mlir
911 ${CMAKE_CURRENT_BINARY_DIR} /arg0.data
1012 COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR} /import-deepseek-r1.py
1113 --output -dir ${CMAKE_CURRENT_BINARY_DIR}
@@ -23,8 +25,8 @@ add_custom_command(
2325)
2426
2527add_custom_command (
26- OUTPUT forward .o
27- COMMAND ${BUDDY_BINARY_DIR} /buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /forward .mlir
28+ OUTPUT forward_prefill .o
29+ COMMAND ${BUDDY_BINARY_DIR} /buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /forward_prefill .mlir
2830 -simplify-tosa-reshape |
2931 ${LLVM_TOOLS_BINARY_DIR} /mlir-opt
3032 -pass-pipeline ${TOSA_PIPELINE} |
@@ -62,14 +64,14 @@ add_custom_command(
6264 ${LLVM_TOOLS_BINARY_DIR} /mlir-translate -mlir-to-llvmir |
6365 ${LLVM_TOOLS_BINARY_DIR} /llvm-as |
6466 ${LLVM_TOOLS_BINARY_DIR} /llc -filetype=obj -relocation-model=pic -O3
65- -o ${CMAKE_CURRENT_BINARY_DIR} /forward .o
66- DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /forward .mlir
67- COMMENT "Building forward .o "
67+ -o ${CMAKE_CURRENT_BINARY_DIR} /forward_prefill .o
68+ DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /forward_prefill .mlir
69+ COMMENT "Building forward_prefill .o "
6870 VERBATIM )
6971
7072add_custom_command (
71- OUTPUT subgraph .o
72- COMMAND ${BUDDY_BINARY_DIR} /buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /subgraph0 .mlir
73+ OUTPUT subgraph_prefill .o
74+ COMMAND ${BUDDY_BINARY_DIR} /buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /subgraph0_prefill .mlir
7375 -simplify-tosa-reshape |
7476 ${LLVM_TOOLS_BINARY_DIR} /mlir-opt
7577 -pass-pipeline ${TOSA_PIPELINE} |
@@ -109,9 +111,101 @@ add_custom_command(
109111 ${LLVM_TOOLS_BINARY_DIR} /mlir-translate -mlir-to-llvmir |
110112 ${LLVM_TOOLS_BINARY_DIR} /llvm-as |
111113 ${LLVM_TOOLS_BINARY_DIR} /llc -filetype=obj -relocation-model=pic -O3
112- -o ${CMAKE_CURRENT_BINARY_DIR} /subgraph.o
113- DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /subgraph0.mlir
114- COMMENT "Building subgraph.o "
114+ -o ${CMAKE_CURRENT_BINARY_DIR} /subgraph_prefill.o
115+ DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /subgraph0_prefill.mlir
116+ COMMENT "Building subgraph_prefill.o "
117+ VERBATIM )
118+
119+ add_custom_command (
120+ OUTPUT forward_decode.o
121+ COMMAND ${BUDDY_BINARY_DIR} /buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /forward_decode.mlir
122+ -simplify-tosa-reshape |
123+ ${LLVM_TOOLS_BINARY_DIR} /mlir-opt
124+ -pass-pipeline ${TOSA_PIPELINE} |
125+ ${BUDDY_BINARY_DIR} /buddy-opt
126+ -eliminate-empty-tensors
127+ -empty-tensor-to-alloc-tensor
128+ -one-shot-bufferize=${BUFFERIZE_SIMPLE_OPTS}
129+ -expand-strided-metadata
130+ -ownership-based-buffer-deallocation
131+ -buffer-deallocation-simplification
132+ -bufferization-lower-deallocations
133+ -matmul-vectorization
134+ -batchmatmul-optimize
135+ -convert-linalg-to-affine-loops
136+ -affine-loop-fusion
137+ -affine-parallelize
138+ -convert-vector-to-scf
139+ -lower-affine
140+ -convert-scf-to-openmp
141+ -cse
142+ -memref-expand
143+ -arith-expand
144+ -convert-vector-to-llvm
145+ -convert-arith-to-llvm
146+ -finalize-memref-to-llvm
147+ -convert-scf-to-cf
148+ -convert-cf-to-llvm
149+ -llvm-request-c-wrappers
150+ -convert-openmp-to-llvm
151+ -convert-arith-to-llvm
152+ -convert-math-to-llvm
153+ -convert-math-to-libm
154+ -convert-func-to-llvm
155+ -reconcile-unrealized-casts |
156+ ${LLVM_TOOLS_BINARY_DIR} /mlir-translate -mlir-to-llvmir |
157+ ${LLVM_TOOLS_BINARY_DIR} /llvm-as |
158+ ${LLVM_TOOLS_BINARY_DIR} /llc -filetype=obj -relocation-model=pic -O3
159+ -o ${CMAKE_CURRENT_BINARY_DIR} /forward_decode.o
160+ DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /forward_decode.mlir
161+ COMMENT "Building forward_decode.o "
162+ VERBATIM )
163+
164+ add_custom_command (
165+ OUTPUT subgraph_decode.o
166+ COMMAND ${BUDDY_BINARY_DIR} /buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /subgraph0_decode.mlir
167+ -simplify-tosa-reshape |
168+ ${LLVM_TOOLS_BINARY_DIR} /mlir-opt
169+ -pass-pipeline ${TOSA_PIPELINE} |
170+ ${BUDDY_BINARY_DIR} /buddy-opt
171+ -eliminate-empty-tensors
172+ -empty-tensor-to-alloc-tensor
173+ -convert-elementwise-to-linalg
174+ -one-shot-bufferize=${BUFFERIZE_SIMPLE_OPTS}
175+ -expand-strided-metadata
176+ -ownership-based-buffer-deallocation
177+ -buffer-deallocation-simplification
178+ -bufferization-lower-deallocations
179+ -matmul-vectorization
180+ # -batchmatmul-optimize
181+ -convert-linalg-to-affine-loops
182+ -affine-loop-fusion
183+ -affine-parallelize
184+ -convert-vector-to-scf
185+ -lower-affine
186+ -convert-scf-to-openmp
187+ -func-bufferize-dynamic-offset
188+ -cse
189+ -memref-expand
190+ -arith-expand
191+ -convert-vector-to-llvm
192+ -convert-arith-to-llvm
193+ -finalize-memref-to-llvm
194+ -convert-scf-to-cf
195+ -convert-cf-to-llvm
196+ -llvm-request-c-wrappers
197+ -convert-openmp-to-llvm
198+ -convert-arith-to-llvm
199+ -convert-math-to-llvm
200+ -convert-math-to-libm
201+ -convert-func-to-llvm
202+ -reconcile-unrealized-casts |
203+ ${LLVM_TOOLS_BINARY_DIR} /mlir-translate -mlir-to-llvmir |
204+ ${LLVM_TOOLS_BINARY_DIR} /llvm-as |
205+ ${LLVM_TOOLS_BINARY_DIR} /llc -filetype=obj -relocation-model=pic -O3
206+ -o ${CMAKE_CURRENT_BINARY_DIR} /subgraph_decode.o
207+ DEPENDS buddy-opt ${CMAKE_CURRENT_BINARY_DIR} /subgraph0_decode.mlir
208+ COMMENT "Building subgraph_decode.o "
115209 VERBATIM )
116210
117211add_custom_command (
@@ -202,7 +296,7 @@ add_custom_command(
202296 COMMENT "Building subgraph.o "
203297 VERBATIM )
204298
205- add_library (DEEPSEEKR1 STATIC forward .o subgraph .o)
299+ add_library (DEEPSEEKR1 STATIC forward_prefill .o subgraph_prefill.o forward_decode.o subgraph_decode .o)
206300add_library (DEEPSEEKR1_F16 STATIC forward-f16.o subgraph-f16.o)
207301
208302SET_SOURCE_FILES_PROPERTIES (
@@ -228,12 +322,12 @@ set(DEEPSEEKR1_EXAMPLE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
228322set (DEEPSEEKR1_EXAMPLE_BUILD_PATH ${CMAKE_CURRENT_BINARY_DIR} )
229323
230324target_compile_definitions (buddy-deepseek-r1-run PRIVATE
231- DEEPSEEKR1_EXAMPLE_PATH="${DEEPSEEKR1_EXAMPLE_PATH} "
232- DEEPSEEKR1_EXAMPLE_BUILD_PATH="${DEEPSEEKR1_EXAMPLE_BUILD_PATH} "
325+ DEEPSEEKR1_EXAMPLE_PATH="${DEEPSEEKR1_EXAMPLE_PATH} / "
326+ DEEPSEEKR1_EXAMPLE_BUILD_PATH="${DEEPSEEKR1_EXAMPLE_BUILD_PATH} / "
233327)
234328target_compile_definitions (buddy-deepseek-r1-f16-run PRIVATE
235- DEEPSEEKR1_EXAMPLE_PATH="${DEEPSEEKR1_EXAMPLE_PATH} "
236- DEEPSEEKR1_EXAMPLE_BUILD_PATH="${DEEPSEEKR1_EXAMPLE_BUILD_PATH} "
329+ DEEPSEEKR1_EXAMPLE_PATH="${DEEPSEEKR1_EXAMPLE_PATH} / "
330+ DEEPSEEKR1_EXAMPLE_BUILD_PATH="${DEEPSEEKR1_EXAMPLE_BUILD_PATH} / "
237331)
238332
239333target_link_directories (buddy-deepseek-r1-run PRIVATE ${LLVM_LIBRARY_DIR} )
0 commit comments