Skip to content

Commit ae54bc4

Browse files
authored
add avh picodet demo (PaddlePaddle#6266)
1 parent e25bbc7 commit ae54bc4

10 files changed

+973
-0
lines changed

deploy/third_engine/demo_avh/Makefile

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Makefile to build demo
19+
20+
# Setup build environment
21+
BUILD_DIR := build
22+
23+
ARM_CPU = ARMCM55
24+
ETHOSU_PATH = /opt/arm/ethosu
25+
CMSIS_PATH ?= ${ETHOSU_PATH}/cmsis
26+
ETHOSU_PLATFORM_PATH ?= ${ETHOSU_PATH}/core_platform
27+
STANDALONE_CRT_PATH := $(abspath $(BUILD_DIR))/runtime
28+
CORSTONE_300_PATH = ${ETHOSU_PLATFORM_PATH}/targets/corstone-300
29+
PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -mcpu=cortex-m55 -mthumb -mfloat-abi=hard -std=gnu99
30+
CMAKE ?= cmake
31+
CC = arm-none-eabi-gcc
32+
AR = arm-none-eabi-ar
33+
RANLIB = arm-none-eabi-ranlib
34+
PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
35+
-I${STANDALONE_CRT_PATH}/include \
36+
-I${STANDALONE_CRT_PATH}/src/runtime/crt/include \
37+
-I${PWD}/include \
38+
-I${CORSTONE_300_PATH} \
39+
-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
40+
-I${CMSIS_PATH}/CMSIS/Core/Include \
41+
-I${CMSIS_PATH}/CMSIS/NN/Include \
42+
-I${CMSIS_PATH}/CMSIS/DSP/Include \
43+
-I$(abspath $(BUILD_DIR))/codegen/host/include
44+
CMSIS_NN_CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
45+
-DTARGET_CPU=cortex-m55 \
46+
-DBUILD_CMSIS_NN_FUNCTIONS=YES
47+
PKG_LDFLAGS = -lm -specs=nosys.specs -static -T corstone300.ld
48+
49+
$(ifeq VERBOSE,1)
50+
QUIET ?=
51+
$(else)
52+
QUIET ?= @
53+
$(endif)
54+
55+
DEMO_MAIN = src/demo_bare_metal.c
56+
CODEGEN_SRCS = $(wildcard $(abspath $(BUILD_DIR))/codegen/host/src/*.c)
57+
CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS))
58+
CMSIS_STARTUP_SRCS = $(wildcard ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
59+
UART_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
60+
61+
demo: $(BUILD_DIR)/demo
62+
63+
$(BUILD_DIR)/stack_allocator.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/memory/stack_allocator.c
64+
$(QUIET)mkdir -p $(@D)
65+
$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^
66+
67+
$(BUILD_DIR)/crt_backend_api.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/common/crt_backend_api.c
68+
$(QUIET)mkdir -p $(@D)
69+
$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^
70+
71+
# Build generated code
72+
$(BUILD_DIR)/libcodegen.a: $(CODEGEN_SRCS)
73+
$(QUIET)cd $(abspath $(BUILD_DIR)/codegen/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS)
74+
$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcodegen.a) $(CODEGEN_OBJS)
75+
$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcodegen.a)
76+
77+
# Build CMSIS startup code
78+
${BUILD_DIR}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS)
79+
$(QUIET)mkdir -p $(abspath $(BUILD_DIR)/libcmsis_startup)
80+
$(QUIET)cd $(abspath $(BUILD_DIR)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
81+
$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_startup.a) $(abspath $(BUILD_DIR))/libcmsis_startup/*.o
82+
$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_startup.a)
83+
84+
# Build CMSIS-NN
85+
${BUILD_DIR}/cmsis_nn/Source/SoftmaxFunctions/libCMSISNNSoftmax.a:
86+
$(QUIET)mkdir -p $(@D)
87+
$(QUIET)cd $(CMSIS_PATH)/CMSIS/NN && $(CMAKE) -B $(abspath $(BUILD_DIR)/cmsis_nn) $(CMSIS_NN_CMAKE_FLAGS)
88+
$(QUIET)cd $(abspath $(BUILD_DIR)/cmsis_nn) && $(MAKE) all
89+
90+
# Build demo application
91+
$(BUILD_DIR)/demo: $(DEMO_MAIN) $(UART_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o \
92+
${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a \
93+
${BUILD_DIR}/cmsis_nn/Source/SoftmaxFunctions/libCMSISNNSoftmax.a \
94+
${BUILD_DIR}/cmsis_nn/Source/FullyConnectedFunctions/libCMSISNNFullyConnected.a \
95+
${BUILD_DIR}/cmsis_nn/Source/SVDFunctions/libCMSISNNSVDF.a \
96+
${BUILD_DIR}/cmsis_nn/Source/ReshapeFunctions/libCMSISNNReshape.a \
97+
${BUILD_DIR}/cmsis_nn/Source/ActivationFunctions/libCMSISNNActivation.a \
98+
${BUILD_DIR}/cmsis_nn/Source/NNSupportFunctions/libCMSISNNSupport.a \
99+
${BUILD_DIR}/cmsis_nn/Source/ConcatenationFunctions/libCMSISNNConcatenation.a \
100+
${BUILD_DIR}/cmsis_nn/Source/BasicMathFunctions/libCMSISNNBasicMaths.a \
101+
${BUILD_DIR}/cmsis_nn/Source/ConvolutionFunctions/libCMSISNNConvolutions.a \
102+
${BUILD_DIR}/cmsis_nn/Source/PoolingFunctions/libCMSISNNPooling.a
103+
$(QUIET)mkdir -p $(@D)
104+
$(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS)
105+
106+
clean:
107+
$(QUIET)rm -rf $(BUILD_DIR)/codegen
108+
109+
cleanall:
110+
$(QUIET)rm -rf $(BUILD_DIR)
111+
112+
.SUFFIXES:
113+
114+
.DEFAULT: demo
+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
2+
<!--- or more contributor license agreements. See the NOTICE file -->
3+
<!--- distributed with this work for additional information -->
4+
<!--- regarding copyright ownership. The ASF licenses this file -->
5+
<!--- to you under the Apache License, Version 2.0 (the -->
6+
<!--- "License"); you may not use this file except in compliance -->
7+
<!--- with the License. You may obtain a copy of the License at -->
8+
<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
9+
<!--- Unless required by applicable law or agreed to in writing, -->
10+
<!--- software distributed under the License is distributed on an -->
11+
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
12+
<!--- KIND, either express or implied. See the License for the -->
13+
<!--- specific language governing permissions and limitations -->
14+
<!--- under the License. -->
15+
Running PP-PicoDet via TVM on bare metal Arm(R) Cortex(R)-M55 CPU and CMSIS-NN
16+
===============================================================
17+
18+
This folder contains an example of how to use TVM to run a PP-PicoDet model
19+
on bare metal Cortex(R)-M55 CPU and CMSIS-NN.
20+
21+
Prerequisites
22+
-------------
23+
If the demo is run in the ci_cpu Docker container provided with TVM, then the following
24+
software will already be installed.
25+
26+
If the demo is not run in the ci_cpu Docker container, then you will need the following:
27+
- Software required to build and run the demo (These can all be installed by running
28+
tvm/docker/install/ubuntu_install_ethosu_driver_stack.sh.)
29+
- [Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software](https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps)
30+
- [cmake 3.19.5](https://github.com/Kitware/CMake/releases/)
31+
- [GCC toolchain from Arm(R)](https://developer.arm.com/-/media/Files/downloads/gnu-rm/10-2020q4/gcc-arm-none-eabi-10-2020-q4-major-x86_64-linux.tar.bz2)
32+
- [Arm(R) Ethos(TM)-U NPU driver stack](https://review.mlplatform.org)
33+
- [CMSIS](https://github.com/ARM-software/CMSIS_5)
34+
- The python libraries listed in the requirements.txt of this directory
35+
- These can be installed by running the following from the current directory:
36+
```bash
37+
pip install -r ./requirements.txt
38+
```
39+
40+
You will also need TVM which can either be:
41+
- Built from source (see [Install from Source](https://tvm.apache.org/docs/install/from_source.html))
42+
- When building from source, the following need to be set in config.cmake:
43+
- set(USE_CMSISNN ON)
44+
- set(USE_MICRO ON)
45+
- set(USE_LLVM ON)
46+
- Installed from TLCPack(see [TLCPack](https://tlcpack.ai/))
47+
48+
You will need to update your PATH environment variable to include the path to cmake 3.19.5 and the FVP.
49+
For example if you've installed these in ```/opt/arm``` , then you would do the following:
50+
```bash
51+
export PATH=/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4:/opt/arm/cmake/bin:$PATH
52+
```
53+
54+
Running the demo application
55+
----------------------------
56+
Type the following command to run the bare metal text recognition application ([src/demo_bare_metal.c](./src/demo_bare_metal.c)):
57+
```bash
58+
./run_demo.sh
59+
```
60+
If the Ethos(TM)-U platform and/or CMSIS have not been installed in /opt/arm/ethosu then
61+
the locations for these can be specified as arguments to run_demo.sh, for example:
62+
63+
```bash
64+
./run_demo.sh --cmsis_path /home/tvm-user/cmsis \
65+
--ethosu_platform_path /home/tvm-user/ethosu/core_platform
66+
```
67+
68+
This will:
69+
- Download a PP-PicoDet text recognition model
70+
- Use tvmc to compile the text recognition model for Cortex(R)-M55 CPU and CMSIS-NN
71+
- Create a C header file inputs.c containing the image data as a C array
72+
- Create a C header file outputs.c containing a C array where the output of inference will be stored
73+
- Build the demo application
74+
- Run the demo application on a Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software
75+
- The application will report the text on the image and the corresponding score.
76+
77+
Using your own image
78+
--------------------
79+
The create_image.py script takes a single argument on the command line which is the path of the
80+
image to be converted into an array of bytes for consumption by the model.
81+
82+
The demo can be modified to use an image of your choice by changing the following line in run_demo.sh
83+
84+
```bash
85+
python3 ./convert_image.py ../../demo/000000014439_640x640.jpg
86+
```
87+
88+
Model description
89+
-----------------
90+
In this demo, the model we used is based on [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/picodet). Because of the excellent performance, PP-PicoDet are very suitable for deployment on mobile or CPU. And it is released by [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
if (__TOOLCHAIN_LOADED)
19+
return()
20+
endif()
21+
set(__TOOLCHAIN_LOADED TRUE)
22+
23+
set(CMAKE_SYSTEM_NAME Generic)
24+
set(CMAKE_C_COMPILER "arm-none-eabi-gcc")
25+
set(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
26+
set(CMAKE_SYSTEM_PROCESSOR "cortex-m55" CACHE STRING "Select Arm(R) Cortex(R)-M architecture. (cortex-m0, cortex-m3, cortex-m33, cortex-m4, cortex-m55, cortex-m7, etc)")
27+
28+
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
29+
30+
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
31+
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
32+
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
33+
34+
set(CMAKE_C_STANDARD 99)
35+
set(CMAKE_CXX_STANDARD 14)
36+
37+
# The system processor could for example be set to cortex-m33+nodsp+nofp.
38+
set(__CPU_COMPILE_TARGET ${CMAKE_SYSTEM_PROCESSOR})
39+
string(REPLACE "+" ";" __CPU_FEATURES ${__CPU_COMPILE_TARGET})
40+
list(POP_FRONT __CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
41+
42+
string(FIND ${__CPU_COMPILE_TARGET} "+" __OFFSET)
43+
if(__OFFSET GREATER_EQUAL 0)
44+
string(SUBSTRING ${__CPU_COMPILE_TARGET} ${__OFFSET} -1 CPU_FEATURES)
45+
endif()
46+
47+
# Add -mcpu to the compile options to override the -mcpu the CMake toolchain adds
48+
add_compile_options(-mcpu=${__CPU_COMPILE_TARGET})
49+
50+
# Set floating point unit
51+
if("${__CPU_COMPILE_TARGET}" MATCHES "\\+fp")
52+
set(FLOAT hard)
53+
elseif("${__CPU_COMPILE_TARGET}" MATCHES "\\+nofp")
54+
set(FLOAT soft)
55+
elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
56+
"${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
57+
set(FLOAT hard)
58+
else()
59+
set(FLOAT soft)
60+
endif()
61+
62+
add_compile_options(-mfloat-abi=${FLOAT})
63+
add_link_options(-mfloat-abi=${FLOAT})
64+
65+
# Link target
66+
add_link_options(-mcpu=${__CPU_COMPILE_TARGET})
67+
add_link_options(-Xlinker -Map=output.map)
68+
69+
#
70+
# Compile options
71+
#
72+
set(cxx_flags "-fno-unwind-tables;-fno-rtti;-fno-exceptions")
73+
74+
add_compile_options("-Wall;-Wextra;-Wsign-compare;-Wunused;-Wswitch-default;\
75+
-Wdouble-promotion;-Wredundant-decls;-Wshadow;-Wnull-dereference;\
76+
-Wno-format-extra-args;-Wno-unused-function;-Wno-unused-label;\
77+
-Wno-missing-field-initializers;-Wno-return-type;-Wno-format;-Wno-int-conversion"
78+
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
79+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import os
19+
import pathlib
20+
import re
21+
import sys
22+
import cv2
23+
import math
24+
from PIL import Image
25+
import numpy as np
26+
27+
def resize_norm_img(img, image_shape, padding=True):
28+
imgC, imgH, imgW = image_shape
29+
img = cv2.resize(
30+
img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
31+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
32+
img = np.transpose(img, [2, 0, 1]) / 255
33+
img = np.expand_dims(img, 0)
34+
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
35+
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
36+
img -= img_mean
37+
img /= img_std
38+
return img.astype(np.float32)
39+
40+
41+
def create_header_file(name, tensor_name, tensor_data, output_path):
42+
"""
43+
This function generates a header file containing the data from the numpy array provided.
44+
"""
45+
file_path = pathlib.Path(f"{output_path}/" + name).resolve()
46+
# Create header file with npy_data as a C array
47+
raw_path = file_path.with_suffix(".h").resolve()
48+
with open(raw_path, "a") as header_file:
49+
header_file.write(
50+
"\n"
51+
+ f"const size_t {tensor_name}_len = {tensor_data.size};\n"
52+
+ f'__attribute__((section(".data.tvm"), aligned(16))) float {tensor_name}[] = '
53+
)
54+
55+
header_file.write("{")
56+
for i in np.ndindex(tensor_data.shape):
57+
header_file.write(f"{tensor_data[i]}, ")
58+
header_file.write("};\n\n")
59+
60+
61+
def create_headers(image_name):
62+
"""
63+
This function generates C header files for the input and output arrays required to run inferences
64+
"""
65+
img_path = os.path.join("./", f"{image_name}")
66+
67+
# Resize image to 32x320
68+
img = cv2.imread(img_path)
69+
img = resize_norm_img(img, [3,32,320])
70+
img_data = img.astype("float32")
71+
72+
# # Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
73+
img_data = np.expand_dims(img_data, axis=0)
74+
75+
os.remove("./include/inputs.h")
76+
os.remove("./include/outputs.h")
77+
# Create input header file
78+
create_header_file("inputs", "input", img_data, "./include")
79+
# Create output header file
80+
output_data = np.zeros([8500], np.float)
81+
create_header_file(
82+
"outputs",
83+
"output0",
84+
output_data,
85+
"./include",
86+
)
87+
output_data = np.zeros([170000], np.float)
88+
create_header_file(
89+
"outputs",
90+
"output1",
91+
output_data,
92+
"./include",
93+
)
94+
95+
96+
if __name__ == "__main__":
97+
create_headers(sys.argv[1])

0 commit comments

Comments
 (0)