Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/Gemmini/Ops/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
add_subdirectory(MatMulOp)
add_subdirectory(ConvOp)
52 changes: 52 additions & 0 deletions benchmarks/Gemmini/Ops/ConvOp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@

set(BUDDY_OPT ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt)
set(BUDDY_TRANSLATE ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-translate)
set(BUDDY_LLC ${BUDDY_MLIR_BUILD_DIR}/bin/buddy-llc)
set(INTERFACES ${BUDDY_MLIR_BUILD_DIR}/../frontend/Interfaces)

set(CMAKE_CXX_COMPILER riscv64-unknown-linux-gnu-g++)
set(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc)

include_directories(
${BENCHMARKS_DIR}
${GEMMINI_INCLUDE_DIR}
${GEMMINI_INCLUDE_DIR}/../
${INTERFACES}
)

if (NOT DEFINED ENV{RISCV})
message(FATAL_ERROR "Can't find RISCV environment variable(missing: RISCV_TOOLCHAIN)")
endif()

# CMAKE_C_FLAGS is set when configuring cmake.
separate_arguments(CLANG_FLAGS_LIST UNIX_COMMAND "${CMAKE_C_FLAGS}")

add_custom_command(
OUTPUT buddy_conv.o
COMMAND ${BUDDY_OPT} ${CMAKE_CURRENT_SOURCE_DIR}/conv.mlir
-llvm-request-c-wrappers
-lower-gemmini
> log.mlir
COMMAND ${BUDDY_OPT} ${CMAKE_CURRENT_SOURCE_DIR}/conv.mlir
-llvm-request-c-wrappers
-convert-linalg-to-gemmini
-convert-linalg-to-loops
-lower-gemmini |
${BUDDY_TRANSLATE} -buddy-to-llvmir |
${BUDDY_LLC} -filetype=obj -mtriple=riscv64
-mattr=+buddyext,+D -float-abi=hard
-o buddy_conv.o
VERBATIM)

add_library(BuddyConv STATIC buddy_conv.o)
set_target_properties(BuddyConv PROPERTIES LINKER_LANGUAGE C)

add_library(ExoConv STATIC ExoConv.c)
set_target_properties(ExoConv PROPERTIES LINKER_LANGUAGE C)

add_executable(dl-op-gemmini-conv-benchmark Main.cpp)
target_link_libraries(dl-op-gemmini-conv-benchmark
-static
ExoConv
BuddyConv
)
140 changes: 140 additions & 0 deletions benchmarks/Gemmini/Ops/ConvOp/ExoConv.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
//===- ExoMatmul.c --------------------------------------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// This file implements Exo-lang Matmul kernel.
// The kernels are generated from exo-lang python script.
//
//===----------------------------------------------------------------------===//

#include "ExoUtils.h"
#include "gemmini.h"

// clang-format off
// conv_3(
// output : i8[4, 56, 56, 64] @DRAM,
// bias : i32[1, 64] @DRAM,
// inp : i8[4, 58, 58, 64] @DRAM,
// weights : i8[3, 3, 64, 64] @DRAM,
// act : bool,
// scale : f32 @DRAM
// )
void _exo_conv_3( int8_t* output, const int32_t* bias, const int8_t* inp, const int8_t* weights, bool act, const float* scale ) {
gemmini_extended_config_st((64), (act), (scale)[0]);
gemmini_extended_config_ex(WS, 0, 0, 1, 0, 0);
gemmini_extended3_config_ld((64), 1.0f, 0, 1);
gemmini_extended3_config_ld(0, 1.0f, 0, 0);
int8_t *i_s = (int8_t*) ((uint64_t)gemm_malloc (16 * 16 * 4 * 3 * 30 * sizeof(int8_t)));
int8_t *i_s_1 = (int8_t*) ((uint64_t)gemm_malloc (16 * 8 * 4 * 3 * 30 * sizeof(int8_t)));
int8_t *w_s = (int8_t*) ((uint64_t)gemm_malloc (16 * 16 * 4 * 4 * 3 * 3 * sizeof(int8_t)));
int8_t *w_s_1 = (int8_t*) ((uint64_t)gemm_malloc (16 * 16 * 4 * 4 * 3 * 3 * sizeof(int8_t)));
int32_t *res = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 16 * 4 * sizeof(int32_t)));
int32_t *res_1 = (int32_t*) ((uint32_t)gemm_acc_malloc (16 * 9 * 4 * sizeof(int32_t)));
for (int_fast32_t b = 0; b < 4; b++) {
for (int_fast32_t ocol_o = 0; ocol_o < 3; ocol_o++) {
for (int_fast32_t orow_o = 0; orow_o < 2; orow_o++) {
for (int_fast32_t orow_io = 0; orow_io < 4; orow_io++) {
for (int_fast32_t orow_ii = 0; orow_ii < 7; orow_ii++) {
gemmini_extended_mvin( ((uint64_t) &bias[0]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + (0)/16))), 16, (16) );
gemmini_extended_mvin( ((uint64_t) &bias[16]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + (256)/16))), 16, (16) );
gemmini_extended_mvin( ((uint64_t) &bias[32]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((2) * (256))/16))), 16, (16) );
gemmini_extended_mvin( ((uint64_t) &bias[48]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((3) * (256))/16))), 16, (16) );
for (int_fast32_t krow = 0; krow < 3; krow++) {
for (int_fast32_t kcol = 0; kcol < 3; kcol++) {
if (ocol_o == 0) {
if (b == 0) {
if (orow_o == 0) {
if (orow_ii + 7 * orow_io == 0) {
for (int_fast32_t kch_o = 0; kch_o < 4; kch_o++) {
gemmini_extended_mvin2( &weights[(krow) * (12288) + (kcol) * (4096) + (16 * kch_o) * (64)], ((uint64_t) &*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024))/16))), 16*(4), (16) );
}
}
}
}
}
if (orow_ii + 7 * orow_io == 0 || krow == 2) {
gemmini_extended4_config_ld(((struct exo_win_2i8c){ &inp[(b) * (215296) + (krow + orow_ii + 7 * orow_io + 28 * orow_o) * (3712) + (kcol + 16 * ocol_o) * (64)], { 64, 1 } }).strides[0]*1, 1.0f, 0, (16), 2);
gemmini_extended_mvin3( &inp[(b) * (215296) + (krow + orow_ii + 7 * orow_io + 28 * orow_o) * (3712) + (kcol + 16 * ocol_o) * (64)], ((uint64_t) &*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s)) + ((krow + orow_ii + 7 * orow_io) * (3072) + (kcol) * (1024))/16))), 16*(4), (16) );
}
for (int_fast32_t kch_o = 0; kch_o < 4; kch_o++) {
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + (0)/16))) | 0x40000000, (16), (16), (16), (16));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s)) + ((krow + orow_ii + 7 * orow_io) * (3072) + (kcol) * (1024) + (kch_o) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + (256)/16))) | 0x40000000, (16), (16), (16), (16));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s)) + ((krow + orow_ii + 7 * orow_io) * (3072) + (kcol) * (1024) + (kch_o) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((2) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s)) + ((krow + orow_ii + 7 * orow_io) * (3072) + (kcol) * (1024) + (kch_o) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((3) * (256))/16))) | 0x40000000, (16), (16), (16), (16));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s)) + ((krow + orow_ii + 7 * orow_io) * (3072) + (kcol) * (1024) + (kch_o) * (256))/16))), ~((uint32_t)0), (16), (16), 16, 16);
}
}
}
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (16 * ocol_o) * (64)]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + (0)/16)), (16), (16) );
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (16 * ocol_o) * (64) + 16]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + (256)/16)), (16), (16) );
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (16 * ocol_o) * (64) + 32]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((2) * (256))/16)), (16), (16) );
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (16 * ocol_o) * (64) + 48]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res)) + ((3) * (256))/16)), (16), (16) );
}
}
}
}
for (int_fast32_t orow_o = 0; orow_o < 2; orow_o++) {
for (int_fast32_t orow_io = 0; orow_io < 4; orow_io++) {
for (int_fast32_t orow_ii = 0; orow_ii < 7; orow_ii++) {
gemmini_extended_mvin( ((uint64_t) &bias[0]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + (0)/16))), 16, (8) );
gemmini_extended_mvin( ((uint64_t) &bias[16]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + (144)/16))), 16, (8) );
gemmini_extended_mvin( ((uint64_t) &bias[32]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + ((2) * (144))/16))), 16, (8) );
gemmini_extended_mvin( ((uint64_t) &bias[48]), ((uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + ((3) * (144))/16))), 16, (8) );
for (int_fast32_t krow = 0; krow < 3; krow++) {
for (int_fast32_t kcol = 0; kcol < 3; kcol++) {
if (b == 0) {
if (orow_o == 0) {
if (orow_ii + 7 * orow_io == 0) {
for (int_fast32_t kch_o = 0; kch_o < 4; kch_o++) {
gemmini_extended_mvin2( &weights[(krow) * (12288) + (kcol) * (4096) + (16 * kch_o) * (64)], ((uint64_t) &*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s_1)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024))/16))), 16*(4), (16) );
}
}
}
}
if (orow_ii + 7 * orow_io == 0 || krow == 2) {
gemmini_extended4_config_ld(((struct exo_win_2i8c){ &inp[(b) * (215296) + (krow + orow_ii + 7 * orow_io + 28 * orow_o) * (3712) + (48 + kcol) * (64)], { 64, 1 } }).strides[0]*1, 1.0f, 0, (8), 2);
gemmini_extended_mvin3( &inp[(b) * (215296) + (krow + orow_ii + 7 * orow_io + 28 * orow_o) * (3712) + (48 + kcol) * (64)], ((uint64_t) &*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s_1)) + ((krow + orow_ii + 7 * orow_io) * (1536) + (kcol) * (512))/16))), 16*(4), (8) );
}
for (int_fast32_t kch_o = 0; kch_o < 4; kch_o++) {
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s_1)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + (0)/16))) | 0x40000000, (16), (16), (16), (8));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s_1)) + ((krow + orow_ii + 7 * orow_io) * (1536) + (kcol) * (512) + (kch_o) * (128))/16))), ~((uint32_t)0), (16), (8), 16, 16);
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s_1)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024) + 256)/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + (144)/16))) | 0x40000000, (16), (16), (16), (8));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s_1)) + ((krow + orow_ii + 7 * orow_io) * (1536) + (kcol) * (512) + (kch_o) * (128))/16))), ~((uint32_t)0), (16), (8), 16, 16);
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s_1)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024) + (2) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + ((2) * (144))/16))) | 0x40000000, (16), (16), (16), (8));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s_1)) + ((krow + orow_ii + 7 * orow_io) * (1536) + (kcol) * (512) + (kch_o) * (128))/16))), ~((uint32_t)0), (16), (8), 16, 16);
gemmini_extended_preload((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)w_s_1)) + ((krow) * (12288) + (kcol) * (4096) + (kch_o) * (1024) + (3) * (256))/16))), (uint32_t)(&*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + ((3) * (144))/16))) | 0x40000000, (16), (16), (16), (8));
gemmini_extended_compute_preloaded((uint32_t)(&*(int8_t*)((uint64_t)( ((uint32_t)((uint64_t)i_s_1)) + ((krow + orow_ii + 7 * orow_io) * (1536) + (kcol) * (512) + (kch_o) * (128))/16))), ~((uint32_t)0), (16), (8), 16, 16);
}
}
}
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (48) * (64)]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + (0)/16)), (16), (8) );
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (48) * (64) + 16]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + (144)/16)), (16), (8) );
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (48) * (64) + 32]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + ((2) * (144))/16)), (16), (8) );
gemmini_extended_mvout( ((uint64_t) &output[(b) * (200704) + (orow_ii + 7 * orow_io + 28 * orow_o) * (3584) + (48) * (64) + 48]), (uint32_t) &*(int32_t*)((uint64_t)( ((uint32_t)((uint64_t)res_1)) + ((3) * (144))/16)), (16), (8) );
}
}
}
}
gemm_acc_free((uint32_t)(res_1));
gemm_acc_free((uint32_t)(res));
gemm_free((uint64_t)(w_s_1));
gemm_free((uint64_t)(w_s));
gemm_free((uint64_t)(i_s_1));
gemm_free((uint64_t)(i_s));
}
// clang-format on
169 changes: 169 additions & 0 deletions benchmarks/Gemmini/Ops/ConvOp/ExoUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//===- ExoUtils.h ---------------------------------------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// This file implements Exo-lang helper functions.
// The functions are from exo repository.
//
//===----------------------------------------------------------------------===//

#include <assert.h>
#include <stdint.h>
#include <stdio.h>

#ifndef GEMM_HEAP_SIZE
#define GEMM_HEAP_SIZE 100000
#endif

#ifndef GEMM_DIM
#define GEMM_DIM 16
#endif

#ifndef EXO_WIN_2I8C
#define EXO_WIN_2I8C
struct exo_win_2i8c{
const int8_t * const data;
const int_fast32_t strides[2];
};
#endif

typedef struct __attribute__((__packed__)) NewBlock {
uint32_t size;
uint32_t loc;
uint8_t is_used;
} NewBlock;

NewBlock BLOCKS[GEMM_HEAP_SIZE / sizeof(NewBlock)];
uint32_t gemm_last_ptr;

void gemm_init_mem() {
for (uint32_t i = 0; i < sizeof(BLOCKS); i++)
((uint8_t *)BLOCKS)[i] = 0;
gemm_last_ptr = 0;
}

uint32_t gemm_malloc(long unsigned int size) {
if (size == 0)
return -1;
size = (size + GEMM_DIM - 1) / GEMM_DIM;
int i;
for (i = 0; i < GEMM_HEAP_SIZE / sizeof(NewBlock) && BLOCKS[i].size > 0;
i++) {
if (BLOCKS[i].is_used)
continue;
if (BLOCKS[i].size < size)
continue;
break;
}
if (BLOCKS[i].size == 0) {
BLOCKS[i].loc = gemm_last_ptr;
BLOCKS[i].size = size;
BLOCKS[i].is_used = 1;
gemm_last_ptr += size;
return BLOCKS[i].loc;
}

BLOCKS[i].is_used = 1;
return BLOCKS[i].loc;
}

void gemm_free(uint32_t addr) {
for (int i = 0; BLOCKS[i].size > 0; i++) {
if (BLOCKS[i].is_used && BLOCKS[i].loc == addr) {
BLOCKS[i].is_used = 0;
return;
}
}
return;
}

#ifndef GEMM_ACC_HEAP_SIZE
#define GEMM_ACC_HEAP_SIZE 100000
#endif

#ifndef GEMM_ACC_DIM
#define GEMM_ACC_DIM 16
#endif

typedef struct __attribute__((__packed__)) AccBlock {
uint32_t size;
uint32_t loc;
uint8_t is_used;
} AccBlock;

// maintain a stack of blocks corresponding to
// a stack alloc and free strategy
#define N_ACC_BLOCKS (GEMM_ACC_HEAP_SIZE / sizeof(AccBlock))
AccBlock ACC_BLOCKS[N_ACC_BLOCKS];
uint32_t gemm_acc_free_block;

void gemm_acc_init_mem() {
uint8_t *buf = (uint8_t *)ACC_BLOCKS;
for (uint32_t i = 0; i < sizeof(ACC_BLOCKS); i++)
buf[i] = 0;
gemm_acc_free_block = 0;
}

uint32_t gemm_acc_malloc(long unsigned int size) {
// must have two free metadata blocks and
// this allocation must have > 0 size
if (size == 0)
return -1;
if (gemm_acc_free_block >= N_ACC_BLOCKS)
return -1;

size = (size + GEMM_ACC_DIM - 1) / GEMM_ACC_DIM;
uint32_t i = gemm_acc_free_block;

uint32_t loc = 0;
if (i > 0) {
loc = ACC_BLOCKS[i - 1].loc + ACC_BLOCKS[i - 1].size;
}

ACC_BLOCKS[i].size = size;
ACC_BLOCKS[i].loc = loc;
ACC_BLOCKS[i].is_used = 1;
gemm_acc_free_block = i + 1;

return (ACC_BLOCKS[i].loc | ((uint32_t)0x80000000));
}

void gemm_acc_free(uint32_t addr) {
if (gemm_acc_free_block == 0)
return;
addr = addr & (uint32_t)(0x7FFFFFFF);
// first case: free-ing the top of the block-stack
if (ACC_BLOCKS[gemm_acc_free_block - 1].loc == addr) {
ACC_BLOCKS[gemm_acc_free_block - 1].is_used = 0;

// Then go through and release as many blocks
// as we can
for (int i = gemm_acc_free_block - 1; i >= 0; i--) {
if (ACC_BLOCKS[i].is_used)
break; // loop termination
// otherwise...
gemm_acc_free_block = i;
}
// second case: find the freed block and mark it
} else {
for (int i = gemm_acc_free_block - 1; i >= 0; i--) {
if (ACC_BLOCKS[i].loc == addr) {
ACC_BLOCKS[i].is_used = 0;
break;
}
}
}
return;
}
Loading