-
Notifications
You must be signed in to change notification settings - Fork 5.8k
/
Copy pathextractchannel.cu
95 lines (87 loc) · 3.28 KB
/
extractchannel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include "opencv2/cudev/util/vec_traits.hpp"
#include "opencv2/core/cuda_types.hpp"
namespace
{
template<typename T, int cn>
constexpr T __CV_CUDA_HOST_DEVICE__ get_channel(const int index, const typename cv::cudev::MakeVec<T, cn>::type& value) noexcept
{
return reinterpret_cast<const T*>(&value)[index];
}
template<typename T, int cn>
__global__ void extract_channel_kernel(cv::cuda::PtrStepSz<typename cv::cudev::MakeVec<T, cn>::type> many_channel,
cv::cuda::PtrStepSz<T> single_channel, const int channel_index)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= many_channel.cols || y >= many_channel.rows) {
return;
}
single_channel(y, x) = ::get_channel<T, cn>(channel_index, many_channel(y, x));
}
template<typename T, int cn>
void extract_channel_impl(cv::cuda::PtrStepSz<typename cv::cudev::MakeVec<T, cn>::type> many_channel,
cv::cuda::PtrStepSz<T> single_channel, const int channel_index, cv::cuda::Stream& stream)
{
static constexpr dim3 block(32, 8);
const dim3 grid(cv::cudev::divUp(many_channel.cols, block.x), cv::cudev::divUp(many_channel.rows, block.y));
::extract_channel_kernel<T, cn><<<grid, block, 0, cv::cuda::StreamAccessor::getStream(stream)>>>(many_channel, single_channel, channel_index);
}
template<typename T, int depth>
void extract_channel_t(const cv::cuda::GpuMat input, cv::cuda::GpuMat& output, const int channel_index, cv::cuda::Stream& stream)
{
static constexpr auto output_type = CV_MAKE_TYPE(depth, 1);
if (output.size() != input.size() || output.type() != output_type) {
output = cv::cuda::GpuMat(input.size(), output_type);
}
switch (input.channels()) {
case 1:
::extract_channel_impl<T, 1>(input, output, channel_index, stream);
break;
case 2:
::extract_channel_impl<T, 2>(input, output, channel_index, stream);
break;
case 3:
::extract_channel_impl<T, 3>(input, output, channel_index, stream);
break;
case 4:
::extract_channel_impl<T, 4>(input, output, channel_index, stream);
break;
default:
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count");
}
}
} // namespace
namespace cv {
namespace cuda {
void extractChannel(const GpuMat input, GpuMat& output, const int channel_index, Stream& stream)
{
switch (CV_MAT_DEPTH(input.type())) {
case CV_8U:
::extract_channel_t<std::uint8_t, CV_8U>(input, output, channel_index, stream);
break;
case CV_8S:
::extract_channel_t<std::int8_t, CV_8S>(input, output, channel_index, stream);
break;
case CV_16S:
::extract_channel_t<std::int16_t, CV_16S>(input, output, channel_index, stream);
break;
case CV_16U:
::extract_channel_t<std::uint16_t, CV_16U>(input, output, channel_index, stream);
break;
case CV_32F:
::extract_channel_t<std::float_t, CV_32F>(input, output, channel_index, stream);
break;
case CV_32S:
::extract_channel_t<std::int32_t, CV_32S>(input, output, channel_index, stream);
break;
case CV_64F:
::extract_channel_t<std::double_t, CV_64F>(input, output, channel_index, stream);
break;
case CV_16F:
[[fallthrough]];
default:
CV_Error(Error::StsUnsupportedFormat, "Unsupported data type");
}
}
} // namespace cuda
} // namespace cv