Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions examples/BuddyNext/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,52 @@ MLIR_RUNNER_UTILS := ../../llvm/build/lib/libmlir_runner_utils.dylib
MLIR_C_RUNNER_UTILS := ../../llvm/build/lib/libmlir_c_runner_utils.dylib
MTRIPLE := x86_64-apple-darwin
endif
next-silu-run:
@${MLIR_OPT} ./next-silu.mlir \
-pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-tensor,tosa-to-arith))" | \
${MLIR_OPT} \
-arith-expand \
-eliminate-empty-tensors \
-empty-tensor-to-alloc-tensor \
-one-shot-bufferize="bufferize-function-boundaries" \
-convert-linalg-to-affine-loops \
-affine-loop-fusion \
-lower-affine \
-convert-vector-to-scf \
-expand-strided-metadata \
-convert-vector-to-llvm \
-memref-expand \
-arith-expand \
-convert-arith-to-llvm \
-finalize-memref-to-llvm \
-convert-scf-to-cf \
-convert-cf-to-llvm \
-convert-openmp-to-llvm \
-convert-arith-to-llvm \
-convert-math-to-llvm \
-convert-math-to-libm \
-convert-func-to-llvm \
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

next-silu-silu-run:
@${MLIR_OPT} ./next-silu-silu.mlir \
-pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-tensor,tosa-to-arith))" | \
${MLIR_OPT} \
-convert-linalg-to-loops \
-lower-affine \
-convert-vector-to-scf \
-convert-scf-to-cf \
-convert-cf-to-llvm \
-convert-vector-to-llvm \
-finalize-memref-to-llvm \
-convert-math-to-llvm \
-convert-arith-to-llvm \
-convert-func-to-llvm \
-reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void \
-shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

next-attention-lower:
@${MLIR_OPT} ./next-attention.mlir \
Expand Down
77 changes: 77 additions & 0 deletions examples/BuddyNext/next-silu-silu.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// RUN: buddy-opt %s \
// RUN: -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-tensor,tosa-to-arith))" \
// RUN: | buddy-opt \
// RUN: -convert-linalg-to-loops \
// RUN: -lower-affine \
// RUN: -convert-vector-to-scf \
// RUN: -convert-scf-to-cf \
// RUN: -convert-cf-to-llvm \
// RUN: -convert-vector-to-llvm \
// RUN: -finalize-memref-to-llvm \
// RUN: -convert-math-to-llvm \
// RUN: -convert-arith-to-llvm \
// RUN: -convert-func-to-llvm \
// RUN: -reconcile-unrealized-casts \
// RUN: | mlir-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
// RUN: | FileCheck %s
#map = affine_map<(d0) -> (d0)>
func.func private @rtclock() -> f64
func.func private @printMemrefF32(%ptr: memref<*xf32>) attributes {llvm.emit_c_interface}

func.func @kernel(%arg0: memref<1x40x8960xf32>) {
%t_start = call @rtclock() : () -> f64

%output = memref.alloc() : memref<1x40x8960xf32>

%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%cst_1f = arith.constant 1.0 : f32
%vec_1f = vector.broadcast %cst_1f : f32 to vector<8xf32>
%cst_0f = arith.constant 0.0 : f32 // for padding

%d0 = memref.dim %arg0, %c0 : memref<1x40x8960xf32>
%d1 = memref.dim %arg0, %c1 : memref<1x40x8960xf32>
%d2 = memref.dim %arg0, %c2 : memref<1x40x8960xf32>

affine.for %i = #map(%c0) to #map(%d0) {
affine.for %j = #map(%c0) to #map(%d1) {
affine.for %k = #map(%c0) to #map(%d2) step 8 {
%x_vec = vector.transfer_read %arg0[%i, %j, %k], %cst_0f : memref<1x40x8960xf32>, vector<8xf32>
%neg_x_vec = arith.negf %x_vec : vector<8xf32>
%exp_neg_x_vec = math.exp %neg_x_vec : vector<8xf32>
%one_plus_exp_vec = arith.addf %vec_1f, %exp_neg_x_vec : vector<8xf32>
%sigmoid_x_vec = arith.divf %vec_1f, %one_plus_exp_vec : vector<8xf32>
%silu_vec = arith.mulf %x_vec, %sigmoid_x_vec : vector<8xf32>
vector.transfer_write %silu_vec, %output[%i, %j, %k] : vector<8xf32>, memref<1x40x8960xf32>
}
}
}

%t_end = call @rtclock() : () -> f64
%unranked_result = memref.cast %output : memref<1x40x8960xf32> to memref<*xf32>
// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 8960] strides = [358400, 8960, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [2.85772{{(, 2.85772)*}}],
call @printMemrefF32(%unranked_result) : (memref<*xf32>) -> ()
memref.dealloc %output : memref<1x40x8960xf32>

%time = arith.subf %t_end, %t_start : f64
vector.print %time : f64

return
}

func.func @main() {
%input = memref.alloc() : memref<1x40x8960xf32>
%cst_neg_1_23 = arith.constant 3.0 : f32
linalg.fill ins(%cst_neg_1_23 : f32) outs(%input : memref<1x40x8960xf32>)

call @kernel(%input) : (memref<1x40x8960xf32>) -> ()

memref.dealloc %input : memref<1x40x8960xf32>

return
}
64 changes: 64 additions & 0 deletions examples/BuddyNext/next-silu.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// RUN: buddy-opt %s \
// RUN: -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-tensor,tosa-to-arith))" \
// RUN: | buddy-opt \
// RUN: -arith-expand \
// RUN: -eliminate-empty-tensors \
// RUN: -empty-tensor-to-alloc-tensor \
// RUN: -one-shot-bufferize="bufferize-function-boundaries" \
// RUN: -convert-linalg-to-affine-loops \
// RUN: -affine-loop-fusion \
// RUN: -lower-affine \
// RUN: -convert-vector-to-scf \
// RUN: -expand-strided-metadata \
// RUN: -convert-vector-to-llvm \
// RUN: -memref-expand \
// RUN: -arith-expand \
// RUN: -convert-arith-to-llvm \
// RUN: -finalize-memref-to-llvm \
// RUN: -convert-scf-to-cf \
// RUN: -convert-cf-to-llvm \
// RUN: -convert-openmp-to-llvm \
// RUN: -convert-arith-to-llvm \
// RUN: -convert-math-to-llvm \
// RUN: -convert-math-to-libm \
// RUN: -convert-func-to-llvm \
// RUN: -reconcile-unrealized-casts \
// RUN: | mlir-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
// RUN: | FileCheck %s
func.func private @rtclock() -> f64

func.func @kenerl(%arg0: tensor<1x40x8960xf32>) {
%t_start = call @rtclock() : () -> f64

%sigmoid_x = tosa.sigmoid %arg0 : (tensor<1x40x8960xf32>) -> tensor<1x40x8960xf32>

%silu_result = tosa.mul %arg0, %sigmoid_x {shift = 0 : i8} : (tensor<1x40x8960xf32>, tensor<1x40x8960xf32>) -> tensor<1x40x8960xf32>

%t_end = call @rtclock() : () -> f64
%time = arith.subf %t_end, %t_start : f64

%unranked_result = tensor.cast %silu_result : tensor<1x40x8960xf32> to tensor<*xf32>

// All the elements of the MemRef are the same,
// only check the first line to verify the correctness.
// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 8960] strides = [358400, 8960, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [2.85772{{(, 2.85772)*}}],

// print results.
call @printMemrefF32(%unranked_result) : (tensor<*xf32>) -> ()
// print timings.
vector.print %time : f64

return
}

func.func @main() {
%input_tensor = arith.constant dense<3.0> : tensor<1x40x8960xf32>
call @kenerl(%input_tensor) : (tensor<1x40x8960xf32>) -> ()

return
}
func.func private @printMemrefF32(%ptr : tensor<*xf32>)
1 change: 1 addition & 0 deletions midend/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(LinkedLibs
BatchMatMulOptimization
MatMulParallelVectorization
TransposeOptimization
SiluOptimization
)


Expand Down
1 change: 1 addition & 0 deletions midend/lib/Conversion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ add_subdirectory(LowerLinalgToGemmini)
add_subdirectory(FuncBufferize)
add_subdirectory(DepthwiseConvOptimization)
add_subdirectory(MLIRGPU)
add_subdirectory(SiluOptimization)
5 changes: 5 additions & 0 deletions midend/lib/Conversion/SiluOptimization/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
add_mlir_library(SiluOptimization
SiluOptimization.cpp
LINK_LIBS PUBLIC
BuddyUtils
)
Loading