Skip to content

Commit d7b93b4

Browse files
authored
Finalized intrinsic benchmark. (#180)
1 parent 78e44f5 commit d7b93b4

File tree

185 files changed

+24910
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

185 files changed

+24910
-0
lines changed

.gitmodules

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,10 @@
1010
[submodule "thirdparty/kfr"]
1111
path = thirdparty/kfr
1212
url = https://github.com/kfrlib/kfr.git
13+
14+
[submodule "benchmarks/Intrinsic/3rdparty/benchmark"]
15+
path = benchmarks/Intrinsic/3rdparty/benchmark
16+
url = https://github.com/google/benchmark.git
17+
[submodule "benchmarks/Intrinsic/3rdparty/googletest"]
18+
path = benchmarks/Intrinsic/3rdparty/googletest
19+
url = https://github.com/google/googletest.git

benchmarks/Intrinsic/.gitignore

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
build*
2+
.vscode
3+
result
4+
output
5+
src/custom*
6+
opencv
7+
8+
# Prerequisites
9+
*.d
10+
11+
# Compiled Object files
12+
*.slo
13+
*.lo
14+
*.o
15+
*.obj
16+
17+
# Precompiled Headers
18+
*.gch
19+
*.pch
20+
21+
# Linker files
22+
*.ilk
23+
24+
# Debugger Files
25+
*.pdb
26+
27+
# Compiled Dynamic libraries
28+
*.so
29+
*.dylib
30+
*.dll
31+
32+
# Fortran module files
33+
*.mod
34+
*.smod
35+
36+
# Compiled Static libraries
37+
*.lai
38+
*.la
39+
*.a
40+
*.lib
41+
42+
# Executables
43+
*.exe
44+
*.out
45+
*.app
46+
47+
# debug information files
48+
*.dwo
49+
50+
run_test.py
Submodule benchmark added at 01deef5
Submodule googletest added at 50b8600
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
project(Intrinsic)
3+
4+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
5+
6+
if(NOT CMAKE_BUILD_TYPE)
7+
message(STATUS "Build type not specified, defaulting to Release")
8+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose build type" FORCE)
9+
endif()
10+
11+
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
12+
13+
add_compile_options(
14+
$<$<CONFIG:Release>:-O3>
15+
$<$<CONFIG:Debug>:-Og>
16+
)
17+
add_subdirectory(3rdparty/googletest)
18+
add_subdirectory(3rdparty/benchmark)
19+
set(BUILD_VERSION "scalar" CACHE STRING "Build version (scalar, native, custom)")
20+
set_property(CACHE BUILD_VERSION PROPERTY STRINGS scalar native custom)
21+
if(NOT DEFINED TARGET_ARCH)
22+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
23+
set(TARGET_ARCH "riscv")
24+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm")
25+
set(TARGET_ARCH "arm")
26+
else(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
27+
set(TARGET_ARCH "avx2")
28+
endif()
29+
endif()
30+
31+
message(STATUS "Selected build version: ${TARGET_ARCH} - ${BUILD_VERSION}")
32+
set(BUILD_CASES "" CACHE STRING "Semicolon-separated list of cases to build (e.g., 'saxpy;merge'). Leave empty to build all available cases.")
33+
if(BUILD_CASES)
34+
message(STATUS "Building only specified cases: ${BUILD_CASES}")
35+
endif()
36+
37+
add_subdirectory(src/)
38+
add_subdirectory(test/)
39+
add_subdirectory(perf/)
40+
if(NOT DEFINED SRC_FILES_FOUND)
41+
message(FATAL_ERROR "SRC_FILES_FOUND variable not set by src/CMakeLists.txt")
42+
endif()
43+
set(SRC_BASENAMES "")
44+
foreach(src_file IN LISTS SRC_FILES_FOUND)
45+
get_filename_component(src_basename ${src_file} NAME_WE)
46+
list(APPEND SRC_BASENAMES ${src_basename})
47+
endforeach()
48+
set(FILTERED_TEST_SOURCES "")
49+
foreach(test_file IN LISTS TEST_SOURCES)
50+
get_filename_component(test_basename ${test_file} NAME_WE)
51+
if(NOT ${test_basename} IN_LIST SRC_BASENAMES)
52+
message(STATUS "Excluding test: ${test_basename}")
53+
continue()
54+
endif()
55+
if(BUILD_CASES)
56+
if(NOT ${test_basename} IN_LIST BUILD_CASES)
57+
message(STATUS "Excluding test: ${test_basename} (not in BUILD_CASES)")
58+
continue()
59+
endif()
60+
endif()
61+
62+
list(APPEND FILTERED_TEST_SOURCES ${test_file})
63+
message(STATUS "Including test: ${test_basename}")
64+
endforeach()
65+
set(FILTERED_PERF_SOURCES "")
66+
foreach(perf_file IN LISTS PERF_SOURCES)
67+
get_filename_component(perf_basename ${perf_file} NAME_WE)
68+
if(NOT ${perf_basename} IN_LIST SRC_BASENAMES)
69+
message(STATUS "Excluding perf: ${perf_basename}")
70+
continue()
71+
endif()
72+
if(BUILD_CASES)
73+
if(NOT ${perf_basename} IN_LIST BUILD_CASES)
74+
message(STATUS "Excluding perf: ${perf_basename} (not in BUILD_CASES)")
75+
continue()
76+
endif()
77+
endif()
78+
79+
list(APPEND FILTERED_PERF_SOURCES ${perf_file})
80+
message(STATUS "Including perf: ${perf_basename}")
81+
endforeach()
82+
83+
add_executable(intrinsicTest intrinsicTest.cpp ${FILTERED_TEST_SOURCES})
84+
target_link_libraries(intrinsicTest PRIVATE static_source)
85+
target_link_libraries(intrinsicTest PRIVATE gtest)
86+
87+
add_executable(intrinsicPerf intrinsicPerf.cpp ${FILTERED_PERF_SOURCES})
88+
target_link_libraries(intrinsicPerf PRIVATE static_source)
89+
target_link_libraries(intrinsicPerf PRIVATE benchmark::benchmark)

benchmarks/Intrinsic/Readme.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# IntrinsicBenchmark
2+
3+
## Build
4+
5+
``` bash
6+
git submodule update --init --recursive
7+
cmake -G Ninja -B build-native-rv . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=toolchain/riscv.cmake -DBUILD_VERSION=native
8+
cmake -G Ninja -B build-scalar-rv . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=toolchain/riscv.cmake -DBUILD_VERSION=scalar
9+
10+
cmake -G Ninja -B build-native-aarch64 . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=toolchain/aarch64.cmake -DBUILD_VERSION=native
11+
cmake -G Ninja -B build-scalar-aarch64 . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=toolchain/aarch64.cmake -DBUILD_VERSION=scalar
12+
13+
cmake -G Ninja -B build-native-sse . -DCMAKE_BUILD_TYPE=Release -DBUILD_VERSION=native -DTARGET_ARCH=sse
14+
cmake -G Ninja -B build-native-avx2 . -DCMAKE_BUILD_TYPE=Release -DBUILD_VERSION=native -DTARGET_ARCH=avx2
15+
cmake -G Ninja -B build-native-avx512 . -DCMAKE_BUILD_TYPE=Release -DBUILD_VERSION=native -DTARGET_ARCH=avx512
16+
cmake -G Ninja -B build-scalar-x86 . -DCMAKE_BUILD_TYPE=Release -DBUILD_VERSION=scalar
17+
18+
cmake -G Ninja -B build-custom . -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=toolchain/riscv.cmake -DBUILD_VERSION=custom -DCMAKE_CXX_FLAGS="-march=rv64gcv"
19+
```
20+
21+
Note: Assume that there are GNU cross-compilation toolchains for RISC-V and ARM platforms in `/opt/riscv/bin` and `/opt/aarch64/bin`. Otherwise the bin directory should be specified via the CMake variable `TOOLCHAIN_COMPILER_LOCATION_HINT`
22+
23+
## Run
24+
25+
Assume that the host machine can access the target device via ssh, then
26+
27+
```bash
28+
# Test on RSIC-V device
29+
python3 ./run.py --host <DEVICE_IP> --user <DEVICE_USER_NAME> --program ./build-native-rv/intrinsicPerf --output ./output/native-rv.json
30+
python3 ./run.py --host <DEVICE_IP> --user <DEVICE_USER_NAME> --program ./build-scalar-rv/intrinsicPerf --output ./output/scalar-rv.json
31+
python3 ./run.py --host <DEVICE_IP> --user <DEVICE_USER_NAME> --program ./build-custom/intrinsicPerf --output ./output/custom.json
32+
python3 ./report.py output/scalar-rv.json output/native-rv.json ./output/custom.json --output result/rv.csv
33+
34+
# Test on ARM device
35+
python3 ./run.py --host <DEVICE_IP> --user <DEVICE_USER_NAME> --program ./build-native-aarch64/intrinsicPerf --output ./output/native-aarch64.json
36+
python3 ./run.py --host <DEVICE_IP> --user <DEVICE_USER_NAME> --program ./build-scalar-aarch64/intrinsicPerf --output ./output/scalar-aarch64.json
37+
python3 ./report.py output/scalar-aarch64.json output/native-aarch64.json --output result/aarch64.csv
38+
39+
```
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#include <benchmark/benchmark.h>
2+
BENCHMARK_MAIN();
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <gtest/gtest.h>
2+
3+
int main(int argc, char **argv) {
4+
testing::InitGoogleTest(&argc, argv);
5+
return RUN_ALL_TESTS();
6+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
file(GLOB PERF_SOURCES "*.cpp")
2+
set(PERF_SOURCES ${PERF_SOURCES} PARENT_SCOPE)

benchmarks/Intrinsic/perf/atan.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include "perf_test.h"
2+
3+
extern void fastAtan32f(const float* y, const float* x, float* dst, size_t n, bool angle_in_deg);
4+
5+
static void BM_FastAtan32f(benchmark::State& state) {
6+
const int n = state.range(0);
7+
const bool deg = (state.range(1) == 0);
8+
std::vector<float> Y = sequence_vector<float>(n);
9+
std::vector<float> X = sequence_vector<float>(n);
10+
std::vector<float> angles(n);
11+
12+
for (auto _ : state) {
13+
fastAtan32f(Y.data(), X.data(), angles.data(), n, deg);
14+
benchmark::DoNotOptimize(angles.data());
15+
}
16+
}
17+
18+
BENCHMARK(BM_FastAtan32f)
19+
->Name("FastAtan32f")
20+
->ArgsProduct({
21+
benchmark::CreateRange(65535, 1 << 20, 2),
22+
{0, 1}
23+
})
24+
->Unit(benchmark::kMicrosecond)
25+
->Iterations(100)
26+
->Repetitions(3);

0 commit comments

Comments
 (0)