Skip to content

Commit 0a751c3

Browse files
Support operators to execute on CANN backend
CANN (Compute Architecture of Neural Networks), developped by Huawei, is a heterogeneous computing architecture for AI. With this commit, execute opeartors with [Ascend](https://www.hiascend.com/) NPU is available. Tested with Ascend 310, other NPUs are theoretically supported, but not tested. To use OPENCV operators with CANN backend, please refer to the following [sections] (https://gist.github.com/fengyuentau/083f7f339592545c1f1d2c1fde6a53dc#file-a_ocv_cann-md): 1. [Install dependencies] (https://gist.github.com/fengyuentau/083f7f339592545c1f1d2c1fde6a53dc#install-dependencies) 2. [Install CANN] (https://gist.github.com/fengyuentau/083f7f339592545c1f1d2c1fde6a53dc#install-cann) 3. [Compile OpenCV with CANN] (https://gist.github.com/fengyuentau/083f7f339592545c1f1d2c1fde6a53dc#build-opencv-with-cann) The CANN backend is used in a similar way to CUDA: | Object | CANN | CUDA | | --------- | --------- | -------- | | Namespace | cv::cann | cv::cuda | | Matrix | AclMat | GpuMat | | Stream | AclStream | Stream | | Event | AclEvent | Event | Current Operator support: - [x] Add - [x] subtract - [x] multiply - [x] divide - [x] bitwise_and - [x] bitwise_or - [x] bitwise_xor Co-authored-by: CaoMengqing <[email protected]>
1 parent 9e13469 commit 0a751c3

20 files changed

+2275
-0
lines changed

modules/cannarithm/CMakeLists.txt

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
if(IOS OR WINRT OR ANDROID OR APPLE OR WIN32 OR (NOT HAVE_CANN))
2+
ocv_module_disable(cannarithm)
3+
endif()
4+
5+
set(the_description "Ascend-accelerated Operations on Matrices")
6+
7+
ocv_add_module(cannarithm opencv_core WRAP python)
8+
ocv_module_include_directories(${CANN_INCLUDE_DIRS})
9+
ocv_glob_module_sources()
10+
ocv_install_used_external_targets(${CANN_LIBRARIES})
11+
ocv_create_module(${CANN_LIBRARIES})
12+
13+
ocv_include_directories(${CMAKE_SOURCE_DIR}/modules/ts/include)
14+
15+
ocv_add_accuracy_tests(DEPENDS_ON opencv_cannarithm)
16+
#ocv_add_perf_tests(DEPENDS_ON opencv_cannarithm)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#ifndef OPENCV_CANN_STREAM_ACCESSOR_HPP
6+
#define OPENCV_CANN_STREAM_ACCESSOR_HPP
7+
8+
#include <acl/acl.h>
9+
#include "opencv2/cann.hpp"
10+
11+
namespace cv
12+
{
13+
namespace cann
14+
{
15+
16+
//! @addtogroup cann_struct
17+
//! @{
18+
19+
/** @brief Class that enables getting aclrtAclStream from cann::AclStream
20+
*/
21+
struct AclStreamAccessor
22+
{
23+
CV_EXPORTS static aclrtStream getStream(const AclStream& stream);
24+
CV_EXPORTS static AclStream wrapStream(aclrtStream stream);
25+
};
26+
27+
/** @brief Class that enables getting aclrtAclEvent from cann::AclEvent
28+
*/
29+
struct AclEventAccessor
30+
{
31+
CV_EXPORTS static aclrtEvent getEvent(const AclEvent& event);
32+
CV_EXPORTS static AclEvent wrapEvent(aclrtEvent event);
33+
};
34+
35+
//! @} cann_struct
36+
37+
} // namespace cann
38+
} // namespace cv
39+
40+
#endif // OPENCV_CANN_STREAM_ACCESSOR_HPP
+335
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#ifndef OPENCV_CANN_HPP
6+
#define OPENCV_CANN_HPP
7+
8+
#include "opencv2/core.hpp"
9+
10+
/**
11+
@defgroup cann Ascend-accelerated Computer Vision
12+
@{
13+
@defgroup canncore Core part
14+
@{
15+
@defgroup cann_struct Data Structures
16+
@defgroup cann_init Initializeation and Information
17+
@}
18+
@}
19+
*/
20+
21+
namespace cv
22+
{
23+
namespace cann
24+
{
25+
class AclStream;
26+
27+
//! @addtogroup cann_struct
28+
//! @{
29+
30+
//===================================================================================
31+
// AclMat
32+
//===================================================================================
33+
34+
/** @brief Base storage class for NPU memory with reference counting.
35+
* AclMat class has a similar interface with Mat and AclMat, and work on [Ascend
36+
* NPU](https://www.hiascend.com/) backend.
37+
* @sa Mat cuda::GpuMat
38+
*/
39+
40+
class CV_EXPORTS_W AclMat
41+
{
42+
public:
43+
class CV_EXPORTS_W Allocator
44+
{
45+
public:
46+
virtual ~Allocator() {}
47+
48+
// allocator must fill data, step and refcount fields
49+
virtual bool allocate(AclMat* mat, int rows, int cols, size_t elemSize) = 0;
50+
virtual void free(AclMat* mat) = 0;
51+
};
52+
53+
/**
54+
* @brief Create default allocator for AclMat. This allocator alloc memory from device for
55+
* specific size.
56+
*/
57+
CV_WRAP static AclMat::Allocator* defaultAllocator();
58+
59+
/**
60+
* @brief Set allocator for AclMat.
61+
* @param allocator
62+
*/
63+
CV_WRAP static void setDefaultAllocator(AclMat::Allocator* allocator);
64+
65+
//! default constructor
66+
CV_WRAP explicit AclMat(AclMat::Allocator* allocator_ = AclMat::defaultAllocator());
67+
68+
//! constructs AclMat of the specified size and type
69+
CV_WRAP AclMat(int rows, int cols, int type,
70+
AclMat::Allocator* allocator = AclMat::defaultAllocator());
71+
//! constructs AclMat of the specified size and type
72+
CV_WRAP AclMat(Size size, int type, AclMat::Allocator* allocator = AclMat::defaultAllocator());
73+
74+
//! constructs AclMat and fills it with the specified value s
75+
CV_WRAP AclMat(int rows, int cols, int type, Scalar& s,
76+
AclMat::Allocator* allocator = AclMat::defaultAllocator());
77+
//! constructs AclMat and fills it with the specified value s
78+
CV_WRAP AclMat(Size size, int type, Scalar& s,
79+
AclMat::Allocator* allocator = AclMat::defaultAllocator());
80+
81+
//! copy constructor
82+
CV_WRAP AclMat(const AclMat& m);
83+
84+
//! constructor for AclMat headers pointing to user-allocated data
85+
AclMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
86+
//! constructor for AclMat headers pointing to user-allocated data
87+
AclMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
88+
89+
//! builds AclMat from host memory (Blocking call)
90+
CV_WRAP explicit AclMat(InputArray arr,
91+
AclMat::Allocator* allocator = AclMat::defaultAllocator());
92+
93+
//! assignment operators
94+
AclMat& operator=(const AclMat& m);
95+
96+
//! destructor - calls release()
97+
~AclMat();
98+
99+
//! sets some of the AclMat elements to s (Blocking call)
100+
CV_WRAP AclMat& setTo(Scalar s);
101+
//! sets some of the AclMat elements to s (Non-Blocking call)
102+
CV_WRAP AclMat& setTo(Scalar s, AclStream& stream);
103+
104+
//! swaps with other smart pointer
105+
CV_WRAP void swap(AclMat& mat);
106+
107+
//! allocates new AclMat data unless the AclMat already has specified size and type
108+
CV_WRAP void create(int rows, int cols, int type);
109+
110+
//! upload host memory data to AclMat (Blocking call)
111+
CV_WRAP void upload(InputArray arr);
112+
//! upload host memory data to AclMat (Non-Blocking call)
113+
CV_WRAP void upload(InputArray arr, AclStream& stream);
114+
115+
//! download data from AclMat to host (Blocking call)
116+
CV_WRAP void download(OutputArray dst) const;
117+
//! download data from AclMat to host (Non-Blocking call)
118+
CV_WRAP void download(OutputArray dst, AclStream& stream) const;
119+
120+
//! converts AclMat to another datatype (Blocking call)
121+
CV_WRAP void convertTo(CV_OUT AclMat& dst, int rtype) const;
122+
123+
//! converts AclMat to another datatype (Non-Blocking call)
124+
CV_WRAP void convertTo(CV_OUT AclMat& dst, int rtype, AclStream& stream) const;
125+
126+
//! decreases reference counter, deallocate the data when reference counter reaches 0
127+
CV_WRAP void release();
128+
129+
//! returns element size in bytes
130+
CV_WRAP size_t elemSize() const;
131+
132+
//! returns the size of element channel in bytes
133+
CV_WRAP size_t elemSize1() const;
134+
135+
//! returns element type
136+
CV_WRAP int type() const;
137+
138+
//! returns element type
139+
CV_WRAP int depth() const;
140+
141+
//! returns number of channels
142+
CV_WRAP int channels() const;
143+
144+
//! returns step/elemSize1()
145+
CV_WRAP size_t step1() const;
146+
147+
//! returns AclMat size : width == number of columns, height == number of rows
148+
CV_WRAP Size size() const;
149+
150+
//! returns true if AclMat data is NULL
151+
CV_WRAP bool empty() const;
152+
153+
//! internal use method: updates the continuity flag
154+
CV_WRAP void updateContinuityFlag();
155+
156+
//! expand one channel mat to multi-channels (Blocking call)
157+
//! @note, source mat must only have one channel, copy value to all channels.
158+
CV_WRAP void expandTo(CV_OUT AclMat& dst, int channels) const;
159+
160+
//! expand one channel mat to multi-channels (Non-Blocking call)
161+
//! @note, source mat must only have one channel, copy value to all channels.
162+
CV_WRAP void expandTo(CV_OUT AclMat& dst, int channels, AclStream& stream) const;
163+
164+
/*! includes several bit-fields:
165+
- the magic signature
166+
- continuity flag
167+
- depth
168+
- number of channels
169+
*/
170+
int flags;
171+
172+
//! the number of rows and columns
173+
int rows, cols;
174+
175+
//! a distance between successive rows in bytes; includes the gap if any
176+
CV_PROP size_t step;
177+
178+
//! pointer to the data
179+
uchar* data;
180+
181+
//! pointer to the reference counter;
182+
//! when AclMat points to user-allocated data, the pointer is NULL
183+
int* refcount;
184+
185+
//! helper fields used in locateROI and adjustROI
186+
uchar* datastart;
187+
const uchar* dataend;
188+
189+
//! allocator
190+
Allocator* allocator;
191+
};
192+
193+
class AclStream;
194+
class AclStreamAccessor;
195+
class AclEvent;
196+
class AclEventAccessor;
197+
class DefaultDeviceInitializer;
198+
199+
//===================================================================================
200+
// AclStream
201+
//===================================================================================
202+
203+
/** @brief In AscendCL Stream(AclStream) is a task queue. Stream is used to manage the parallelism
204+
* of tasks. The tasks inside a Stream are executed sequentially, that is, the Stream executes
205+
* sequentially according to the sent tasks; the tasks in different Streams are executed in
206+
* parallel.
207+
*
208+
* All Non-blocking functions should pass parameter stream, These function returns immediately after
209+
* the task is submitted. Caller should wait stream until completion.
210+
*
211+
* Blocking functions implicityly use the default stream, and synchronize stream before function
212+
* return.
213+
* @sa cuda::Stream
214+
*/
215+
216+
// TODO: Stream is defined in namespace cuda, and pybind code does not use a namespace of stream,
217+
// change stream name to AclStream to avoid confilct.
218+
class CV_EXPORTS_W AclStream
219+
{
220+
public:
221+
CV_WRAP AclStream();
222+
223+
//! blocks the current CPU thread until all operations in the stream are complete.
224+
CV_WRAP void waitForCompletion();
225+
226+
//! blocks the current CPU thread until event trigger.
227+
CV_WRAP void waitAclEvent(const cv::cann::AclEvent& event);
228+
229+
/**
230+
* @brief return default AclStream object for default Acl stream.
231+
*/
232+
CV_WRAP static AclStream& Null();
233+
234+
// acl symbols CANNOT used in any hpp files. Use a inner class to avoid acl symbols defined in
235+
// hpp.
236+
class Impl;
237+
238+
// add temporary mat for async release.
239+
void addToAsyncRelease(const AclMat& mat);
240+
241+
private:
242+
Ptr<Impl> impl_;
243+
AclStream(const Ptr<Impl>& impl);
244+
245+
friend class AclStreamAccessor;
246+
friend class DefaultDeviceInitializer;
247+
};
248+
249+
/**
250+
* @brief AclEvent to synchronize between different streams.
251+
*/
252+
class CV_EXPORTS_W AclEvent
253+
{
254+
public:
255+
CV_WRAP AclEvent();
256+
257+
//! records an event
258+
CV_WRAP void record(AclStream& stream = AclStream::Null());
259+
260+
//! waits for an event to complete
261+
CV_WRAP void waitForComplete() const;
262+
263+
class Impl;
264+
265+
private:
266+
Ptr<Impl> impl_;
267+
AclEvent(const Ptr<Impl>& impl);
268+
269+
friend class AclEventAccessor;
270+
};
271+
272+
/** @brief Bindings overload to create a Stream object from the address stored in an existing CANN
273+
* Runtime API stream pointer (aclrtStream).
274+
* @param aclStreamAddress Memory address stored in a CANN Runtime API stream pointer
275+
* (aclrtStream). The created Stream object does not perform any allocation or deallocation and simply
276+
* wraps existing raw CANN Runtime API stream pointer.
277+
* @note Overload for generation of bindings only, not exported or intended for use internally fro C++.
278+
*/
279+
CV_EXPORTS_W AclStream wrapStream(size_t aclStreamAddress);
280+
281+
//! @} cann_struct
282+
283+
//===================================================================================
284+
// Initialization & Info
285+
//===================================================================================
286+
287+
//! @addtogroup cann_init
288+
//! @{
289+
290+
//! Get Ascend matrix object from Input array, upload matrix memory if need. (Blocking call)
291+
AclMat getInputMat(InputArray src);
292+
//! Get Ascend matrix object from Input array, upload matrix memory if need. (Non-Blocking call)
293+
AclMat getInputMat(InputArray src, AclStream& stream);
294+
295+
//! Get Ascend matrix object from Output array, upload matrix memory if need.
296+
AclMat getOutputMat(OutputArray dst, int rows, int cols, int type);
297+
298+
//! Sync output matrix to Output array, download matrix memory if need.
299+
void syncOutput(const AclMat& dst, OutputArray _dst);
300+
301+
/**
302+
* @brief Choose Ascend npu device.
303+
*/
304+
CV_EXPORTS_W void setDevice(int device);
305+
306+
/**
307+
* @brief Clear all context created in current Ascend device.
308+
*/
309+
CV_EXPORTS_W void resetDevice();
310+
311+
/**
312+
* @brief Get current Ascend device.
313+
*/
314+
CV_EXPORTS_W int32_t getDevice();
315+
316+
/**
317+
* @brief init AscendCL.
318+
*/
319+
CV_EXPORTS_W void initAcl();
320+
321+
/**
322+
* @brief finalize AscendCL.
323+
* @note finalizeAcl only can be called once for a process. Call this function after all AscendCL
324+
* options finished.
325+
*/
326+
CV_EXPORTS_W void finalizeAcl();
327+
328+
//! @} cann_init
329+
330+
} // namespace cann
331+
} // namespace cv
332+
333+
#include "opencv2/cann.inl.hpp"
334+
335+
#endif /* OPENCV_CANN_HPP */

0 commit comments

Comments
 (0)