Skip to content

Commit d7c1294

Browse files
authored
fastrt patch update
Summary: move div255 to gpu add read/write numpy ndarray, which will make the comparison between torch and trt results more easily. Reviewed By: l1aoxingyu
1 parent 0cc9fb9 commit d7c1294

File tree

14 files changed

+872
-15
lines changed

14 files changed

+872
-15
lines changed

projects/FastRT/CMakeLists.txt

+6-1
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,13 @@ endif()
2929

3030
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use Static CUDA" OFF)
3131
option(BUILD_FASTRT_ENGINE "Build FastRT Engine" ON)
32-
option(BUILD_DEMO "Build DEMO" OFF)
32+
option(BUILD_DEMO "Build DEMO" ON)
3333
option(BUILD_FP16 "Build Engine as FP16" OFF)
34+
option(USE_CNUMPY "Include CNPY libs" OFF)
35+
36+
if(USE_CNUMPY)
37+
add_definitions(-DUSE_CNUMPY)
38+
endif()
3439

3540
if(BUILD_FP16)
3641
add_definitions(-DBUILD_FP16)

projects/FastRT/README.md

+26-10
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,24 @@ So we don't use any parsers here.
1313

1414
2. Config your model
1515

16-
See [Tensorrt Model Config](#ConfigSection)
16+
See [Tensorrt Model Config](#ConfigSection)
1717

18-
3. Build `fastrt` execute file
18+
3. (Optional) Build <a name="step3"></a>`third party` libs
19+
20+
See [Build third_party section](#third_party)
21+
22+
4. Build <a name="step4"></a>`fastrt` execute file
1923

2024
```
2125
mkdir build
2226
cd build
23-
cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON ..
27+
cmake -DBUILD_FASTRT_ENGINE=ON \
28+
-DBUILD_DEMO=ON \
29+
-DUSE_CNUMPY=ON ..
2430
make
2531
```
26-
4. Run <a name="step4"></a>`fastrt`
32+
33+
5. Run <a name="step5"></a>`fastrt`
2734

2835
put `model_best.wts` into `FastRT/`
2936

@@ -35,20 +42,20 @@ So we don't use any parsers here.
3542
./demo/fastrt -d // deserialize 'xxx.engine' file and run inference
3643
```
3744

38-
5. Verify the output with pytorch
45+
6. Verify the output with pytorch
3946

4047

41-
6. (Optional) Once you verify the result, you can set FP16 for speed up
48+
7. (Optional) Once you verify the result, you can set FP16 for speed up
4249
```
4350
mkdir build
4451
cd build
4552
cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_FP16=ON ..
4653
make
4754
```
4855

49-
then go to [step 4](#step4)
56+
then go to [step 5](#step5)
5057

51-
7. (Optional) Build tensorrt model as shared libs
58+
8. (Optional) Build tensorrt model as shared libs
5259

5360
```
5461
mkdir build
@@ -65,7 +72,7 @@ So we don't use any parsers here.
6572
make
6673
```
6774

68-
then go to [step 4](#step4)
75+
then go to [step 5](#step5)
6976

7077
### <a name="ConfigSection"></a>`Tensorrt Model Config`
7178

@@ -213,5 +220,14 @@ static const int EMBEDDING_DIM = 0;
213220
sudo docker run --gpus all -it --name fastrt -v /home/YOURID/workspace:/workspace -d trt7:cuda102
214221
// then put the repo into `/home/YOURID/workspace/` before you getin container
215222
```
216-
223+
217224
* [Installation reference](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md)
225+
226+
### Build <a name="third_party"></a> third party
227+
228+
* for read/write numpy
229+
230+
```
231+
cd third_party/cnpy
232+
cmake -DCMAKE_INSTALL_PREFIX=../../libs/cnpy -DENABLE_STATIC=OFF . && make -j4 && make install
233+
```

projects/FastRT/demo/CMakeLists.txt

+9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ link_directories(/usr/lib/x86_64-linux-gnu/)
1212
include_directories(${SOLUTION_DIR}/include)
1313
add_executable(${APP_PROJECT_NAME} inference.cpp)
1414

15+
# numpy
16+
if(USE_CNUMPY)
17+
include_directories(${SOLUTION_DIR}/libs/cnpy/include)
18+
SET(CNPY_LIB ${SOLUTION_DIR}/libs/cnpy/lib/libcnpy.so)
19+
else()
20+
SET(CNPY_LIB)
21+
endif()
22+
1523
# OpenCV
1624
find_package(OpenCV)
1725
target_include_directories(${APP_PROJECT_NAME}
@@ -33,4 +41,5 @@ target_link_libraries(${APP_PROJECT_NAME}
3341
PRIVATE
3442
${FASTRTENGINE_LIB}
3543
nvinfer
44+
${CNPY_LIB}
3645
)

projects/FastRT/demo/inference.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
using namespace fastrt;
88
using namespace nvinfer1;
99

10+
#ifdef USE_CNUMPY
11+
#include "cnpy.h"
12+
#endif
13+
1014
/* Ex1. sbs_R50-ibn */
1115
static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts";
1216
static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
@@ -92,6 +96,11 @@ int main(int argc, char** argv) {
9296
/* get output from cudaMallocHost */
9397
float* feat_embedding = baseline.getOutput();
9498

99+
#ifdef USE_CNUMPY
100+
/* save as numpy. shape = (OUTPUT_SIZE,) */
101+
cnpy::npy_save("./feat_embedding.npy", feat_embedding, {OUTPUT_SIZE}, "w");
102+
#endif
103+
95104
/* print output */
96105
TRTASSERT(feat_embedding);
97106
for (size_t img_idx = 0; img_idx < input.size(); ++img_idx) {

projects/FastRT/fastrt/meta_arch/baseline.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@ namespace fastrt {
99
void Baseline::preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) {
1010
/* Normalization & BGR->RGB */
1111
for (std::size_t i = 0; i < stride; ++i) {
12-
data[i] = img.at<cv::Vec3b>(i)[2] / 255.0;
13-
data[i + stride] = img.at<cv::Vec3b>(i)[1] / 255.0;
14-
data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0] / 255.0;
12+
data[i] = img.at<cv::Vec3b>(i)[2];
13+
data[i + stride] = img.at<cv::Vec3b>(i)[1];
14+
data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0];
1515
}
1616
}
1717

1818
ITensor* Baseline::preprocessing_gpu(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor* input) {
1919
/* Standardization */
2020
static const float mean[3] = {123.675, 116.28, 103.53};
2121
static const float std[3] = {58.395, 57.120000000000005, 57.375};
22-
return addMeanStd(network, weightMap, input, "", mean, std, false); // true for div 255
22+
return addMeanStd(network, weightMap, input, "", mean, std, true); // true for div 255
2323
}
2424

2525
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
2+
if(COMMAND cmake_policy)
3+
cmake_policy(SET CMP0003 NEW)
4+
endif(COMMAND cmake_policy)
5+
6+
project(CNPY)
7+
8+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
9+
10+
option(ENABLE_STATIC "Build static (.a) library" ON)
11+
12+
find_package(ZLIB REQUIRED)
13+
14+
include_directories(${ZLIB_INCLUDE_DIRS})
15+
16+
add_library(cnpy SHARED "cnpy.cpp")
17+
target_link_libraries(cnpy ${ZLIB_LIBRARIES})
18+
install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
19+
20+
if(ENABLE_STATIC)
21+
add_library(cnpy-static STATIC "cnpy.cpp")
22+
set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy")
23+
install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib)
24+
endif(ENABLE_STATIC)
25+
26+
install(FILES "cnpy.h" DESTINATION include)
27+
install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
28+
29+
add_executable(example1 example1.cpp)
30+
target_link_libraries(example1 cnpy)
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License
2+
3+
Copyright (c) Carl Rogers, 2011
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Purpose:
2+
3+
NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file.
4+
5+
`cnpy` lets you read and write to these formats in C++.
6+
7+
The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python.
8+
9+
Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size.
10+
The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary.
11+
12+
Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice.
13+
14+
# Installation:
15+
16+
Default installation directory is /usr/local.
17+
To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4.
18+
19+
1. get [cmake](www.cmake.org)
20+
2. create a build directory, say $HOME/build
21+
3. cd $HOME/build
22+
4. cmake /path/to/cnpy
23+
5. make
24+
6. make install
25+
26+
# Using:
27+
28+
To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as
29+
30+
```bash
31+
g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11
32+
```
33+
34+
# Description:
35+
36+
There are two functions for writing data: `npy_save` and `npz_save`.
37+
38+
There are 3 functions for reading:
39+
- `npy_load` will load a .npy file.
40+
- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues.
41+
- `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file.
42+
43+
The data structure for loaded data is below.
44+
Data is accessed via the `data<T>()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data).
45+
The array shape and word size are read from the npy header.
46+
47+
```c++
48+
struct NpyArray {
49+
std::vector<size_t> shape;
50+
size_t word_size;
51+
template<typename T> T* data();
52+
};
53+
```
54+
55+
See [example1.cpp](example1.cpp) for examples of how to use the library. example1 will also be build during cmake installation.

0 commit comments

Comments
 (0)