JDAI-CV
diff --git a/‎projects/FastRT/CMakeLists.txt
+6-1 b/‎projects/FastRT/CMakeLists.txt
+6-1
diff --git a/‎projects/FastRT/README.md
+26-10 b/‎projects/FastRT/README.md
+26-10
diff --git a/‎projects/FastRT/demo/CMakeLists.txt
+9 b/‎projects/FastRT/demo/CMakeLists.txt
+9
diff --git a/‎projects/FastRT/demo/inference.cpp
+9 b/‎projects/FastRT/demo/inference.cpp
+9
diff --git a/‎projects/FastRT/fastrt/meta_arch/baseline.cpp
+4-4 b/‎projects/FastRT/fastrt/meta_arch/baseline.cpp
+4-4
diff --git a/‎projects/FastRT/third_party/cnpy/CMakeLists.txt
+30 b/‎projects/FastRT/third_party/cnpy/CMakeLists.txt
+30
diff --git a/‎projects/FastRT/third_party/cnpy/LICENSE
+21 b/‎projects/FastRT/third_party/cnpy/LICENSE
+21
diff --git a/‎projects/FastRT/third_party/cnpy/README.md
+55 b/‎projects/FastRT/third_party/cnpy/README.md
+55
@@ -29,8 +29,13 @@ endif()
 
 option(CUDA_USE_STATIC_CUDA_RUNTIME "Use Static CUDA"     OFF)
 option(BUILD_FASTRT_ENGINE     "Build FastRT Engine"       ON)
-option(BUILD_DEMO              "Build DEMO"               OFF)
+option(BUILD_DEMO              "Build DEMO"                ON)
 option(BUILD_FP16              "Build Engine as FP16"     OFF)
+option(USE_CNUMPY              "Include CNPY libs"        OFF)
+
+if(USE_CNUMPY)
+  add_definitions(-DUSE_CNUMPY)
+endif()
 
 if(BUILD_FP16)
   add_definitions(-DBUILD_FP16)
 
@@ -13,17 +13,24 @@ So we don't use any parsers here.
 
 2. Config your model
 
-   See [Tensorrt Model Config](#ConfigSection) 
+   See [Tensorrt Model Config](#ConfigSection)
 
-3. Build `fastrt` execute file
+3. (Optional) Build <a name="step3"></a>`third party` libs
+
+   See [Build third_party section](#third_party)
+   
+4. Build <a name="step4"></a>`fastrt` execute file
 
    ``` 
    mkdir build
    cd build
-   cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON ..
+   cmake -DBUILD_FASTRT_ENGINE=ON \
+         -DBUILD_DEMO=ON \
+         -DUSE_CNUMPY=ON ..
    make
    ```
-4. Run <a name="step4"></a>`fastrt`
+
+5. Run <a name="step5"></a>`fastrt`
 
    put `model_best.wts` into `FastRT/`
 
@@ -35,20 +42,20 @@ So we don't use any parsers here.
    ./demo/fastrt -d  // deserialize 'xxx.engine' file and run inference
    ```
 
-5. Verify the output with pytorch
+6. Verify the output with pytorch
 
 
-6. (Optional) Once you verify the result, you can set FP16 for speed up
+7. (Optional) Once you verify the result, you can set FP16 for speed up
    ``` 
    mkdir build
    cd build
    cmake -DBUILD_FASTRT_ENGINE=ON -DBUILD_DEMO=ON -DBUILD_FP16=ON ..
    make
    ```
 
-   then go to [step 4](#step4)  
+   then go to [step 5](#step5)  
 
-7. (Optional) Build tensorrt model as shared libs
+8. (Optional) Build tensorrt model as shared libs
 
    ``` 
    mkdir build
@@ -65,7 +72,7 @@ So we don't use any parsers here.
    make
    ```
 
-   then go to [step 4](#step4)  
+   then go to [step 5](#step5)  
 
 ### <a name="ConfigSection"></a>`Tensorrt Model Config`
 
@@ -213,5 +220,14 @@ static const int EMBEDDING_DIM = 0;
    sudo docker run --gpus all -it --name fastrt -v /home/YOURID/workspace:/workspace -d trt7:cuda102 
    // then put the repo into `/home/YOURID/workspace/` before you getin container
    ```
-   
+
 * [Installation reference](https://github.com/wang-xinyu/tensorrtx/blob/master/tutorials/install.md)
+
+### Build <a name="third_party"></a> third party
+
+* for read/write numpy
+
+   ```
+   cd third_party/cnpy
+   cmake -DCMAKE_INSTALL_PREFIX=../../libs/cnpy -DENABLE_STATIC=OFF . && make -j4 && make install
+   ```
@@ -12,6 +12,14 @@ link_directories(/usr/lib/x86_64-linux-gnu/)
 include_directories(${SOLUTION_DIR}/include)
 add_executable(${APP_PROJECT_NAME} inference.cpp)
 
+# numpy
+if(USE_CNUMPY)
+  include_directories(${SOLUTION_DIR}/libs/cnpy/include)
+  SET(CNPY_LIB ${SOLUTION_DIR}/libs/cnpy/lib/libcnpy.so)
+else()
+  SET(CNPY_LIB)
+endif()
+
 # OpenCV
 find_package(OpenCV)
 target_include_directories(${APP_PROJECT_NAME}
@@ -33,4 +41,5 @@ target_link_libraries(${APP_PROJECT_NAME}
 PRIVATE
   ${FASTRTENGINE_LIB}
   nvinfer
+  ${CNPY_LIB}
 )
@@ -7,6 +7,10 @@
 using namespace fastrt;
 using namespace nvinfer1;
 
+#ifdef USE_CNUMPY
+#include "cnpy.h"
+#endif
+
 /* Ex1. sbs_R50-ibn */
 static const std::string WEIGHTS_PATH = "../sbs_R50-ibn.wts"; 
 static const std::string ENGINE_PATH = "./sbs_R50-ibn.engine";
@@ -92,6 +96,11 @@ int main(int argc, char** argv) {
         /* get output from cudaMallocHost */
         float* feat_embedding = baseline.getOutput();
 
+#ifdef USE_CNUMPY
+        /* save as numpy. shape = (OUTPUT_SIZE,) */
+        cnpy::npy_save("./feat_embedding.npy", feat_embedding, {OUTPUT_SIZE}, "w");
+#endif
+
         /* print output */
         TRTASSERT(feat_embedding);
         for (size_t img_idx = 0; img_idx < input.size(); ++img_idx) {
 
@@ -9,17 +9,17 @@ namespace fastrt {
     void Baseline::preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) {
         /* Normalization & BGR->RGB */
         for (std::size_t i = 0; i < stride; ++i) { 
-            data[i] = img.at<cv::Vec3b>(i)[2] / 255.0; 
-            data[i + stride] = img.at<cv::Vec3b>(i)[1] / 255.0;
-            data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0] / 255.0;
+            data[i] = img.at<cv::Vec3b>(i)[2]; 
+            data[i + stride] = img.at<cv::Vec3b>(i)[1];
+            data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0];
         }
     }
 
     ITensor* Baseline::preprocessing_gpu(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor* input) {
         /* Standardization */
         static const float mean[3] = {123.675, 116.28, 103.53};
         static const float std[3] = {58.395, 57.120000000000005, 57.375};
-        return addMeanStd(network, weightMap, input, "", mean, std, false); // true for div 255
+        return addMeanStd(network, weightMap, input, "", mean, std, true); // true for div 255
     }
 
 }
@@ -0,0 +1,30 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
+if(COMMAND cmake_policy)
+	cmake_policy(SET CMP0003 NEW)
+endif(COMMAND cmake_policy)
+
+project(CNPY)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+
+option(ENABLE_STATIC "Build static (.a) library" ON)
+
+find_package(ZLIB REQUIRED)
+
+include_directories(${ZLIB_INCLUDE_DIRS})
+
+add_library(cnpy SHARED "cnpy.cpp")
+target_link_libraries(cnpy ${ZLIB_LIBRARIES})
+install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
+
+if(ENABLE_STATIC)
+    add_library(cnpy-static STATIC "cnpy.cpp")
+    set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy")
+    install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib)
+endif(ENABLE_STATIC)
+
+install(FILES "cnpy.h" DESTINATION include)
+install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
+
+add_executable(example1 example1.cpp)
+target_link_libraries(example1 cnpy)
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) Carl Rogers, 2011
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
@@ -0,0 +1,55 @@
+# Purpose:
+
+NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file. 
+
+`cnpy` lets you read and write to these formats in C++. 
+
+The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python.
+
+Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. 
+The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary.
+
+Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice.
+
+# Installation:
+
+Default installation directory is /usr/local. 
+To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4.
+
+1. get [cmake](www.cmake.org)
+2. create a build directory, say $HOME/build
+3. cd $HOME/build
+4. cmake /path/to/cnpy
+5. make
+6. make install
+
+# Using:
+
+To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as
+
+```bash
+g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11
+```
+
+# Description:
+
+There are two functions for writing data: `npy_save` and `npz_save`.
+
+There are 3 functions for reading:
+- `npy_load` will load a .npy file. 
+- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues. 
+- `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file.
+
+The data structure for loaded data is below. 
+Data is accessed via the `data<T>()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data). 
+The array shape and word size are read from the npy header.
+
+```c++
+struct NpyArray {
+    std::vector<size_t> shape;
+    size_t word_size;
+    template<typename T> T* data();
+};
+```
+
+See [example1.cpp](example1.cpp) for examples of how to use the library. example1 will also be build during cmake installation.
Original file line number	Diff line number	Diff line change
`@@ -9,17 +9,17 @@ namespace fastrt {`
`9`	`9`	`void Baseline::preprocessing_cpu(const cv::Mat& img, float* const data, const std::size_t stride) {`
`10`	`10`	`/* Normalization & BGR->RGB */`
`11`	`11`	`for (std::size_t i = 0; i < stride; ++i) {`
`12`		`- data[i] = img.at<cv::Vec3b>(i)[2] / 255.0;`
`13`		`- data[i + stride] = img.at<cv::Vec3b>(i)[1] / 255.0;`
`14`		`- data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0] / 255.0;`
	`12`	`+ data[i] = img.at<cv::Vec3b>(i)[2];`
	`13`	`+ data[i + stride] = img.at<cv::Vec3b>(i)[1];`
	`14`	`+ data[i + (stride<<1)] = img.at<cv::Vec3b>(i)[0];`
`15`	`15`	`}`
`16`	`16`	`}`
`17`	`17`
`18`	`18`	`ITensor* Baseline::preprocessing_gpu(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor* input) {`
`19`	`19`	`/* Standardization */`
`20`	`20`	`static const float mean[3] = {123.675, 116.28, 103.53};`
`21`	`21`	`static const float std[3] = {58.395, 57.120000000000005, 57.375};`
`22`		`- return addMeanStd(network, weightMap, input, "", mean, std, false); // true for div 255`
	`22`	`+ return addMeanStd(network, weightMap, input, "", mean, std, true); // true for div 255`
`23`	`23`	`}`
`24`	`24`
`25`	`25`	`}`