-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathroi_iou_op.cu
88 lines (65 loc) · 2.07 KB
/
roi_iou_op.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include <functional>
#include "caffe2/core/context_gpu.h"
#include "roi_iou_op.h"
namespace caffe2 {
namespace {
//# compute overlaps
//# intersection
// ixmin = np.maximum(BBGT[:, 0], bb[0])
// iymin = np.maximum(BBGT[:, 1], bb[1])
// ixmax = np.minimum(BBGT[:, 2], bb[2])
// iymax = np.minimum(BBGT[:, 3], bb[3])
// iw = np.maximum(ixmax - ixmin + 1., 0.)
// ih = np.maximum(iymax - iymin + 1., 0.)
// inters = iw * ih
//# union
// uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
//(BBGT[:, 2] - BBGT[:, 0] + 1.) *
//(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
// overlaps = inters / uni
template <typename T>
__global__ void iou(const int nthreads, const T* Rdata, const int n, T* Jdata) {
CUDA_1D_KERNEL_LOOP(idx, nthreads) {
int i = idx % n;
int j = idx / n;
if (i == j) {
Jdata[idx] = 1.0;
continue;
}
int ixmin = Rdata[i * 5 + 1];
int iymin = Rdata[i * 5 + 2];
int ixmax = Rdata[i * 5 + 3];
int iymax = Rdata[i * 5 + 4];
int jxmin = Rdata[j * 5 + 1];
int jymin = Rdata[j * 5 + 2];
int jxmax = Rdata[j * 5 + 3];
int jymax = Rdata[j * 5 + 4];
int xmin = max(ixmin, jxmin);
int ymin = max(iymin, jymin);
int xmax = min(ixmax, jxmax);
int ymax = min(iymax, jymax);
int w = max(xmax - xmin + 1., 0.);
int h = max(ymax - ymin + 1., 0.);
float inters = w * h;
float uni = (ixmax - ixmin + 1.) * (iymax - iymin + 1.) +
(jxmax - jxmin + 1.) * (jymax - jymin + 1.) - inters;
float iou = inters / uni;
Jdata[idx] = iou;
}
}
} // namespace
template <>
bool RoIIoUOp<float, CUDAContext>::RunOnDevice() {
const auto& R = Input(0);
CAFFE_ENFORCE_EQ(R.dim(), 2);
CAFFE_ENFORCE_EQ(R.dim32(1), 5);
const int n = R.dim32(0);
auto* J = Output(0);
J->Resize(n, n);
iou<float><<<CAFFE_GET_BLOCKS(n * n), CAFFE_CUDA_NUM_THREADS, 0,
context_.cuda_stream()>>>(n * n, R.data<float>(), n,
J->mutable_data<float>());
return true;
}
REGISTER_CUDA_OPERATOR(RoIIoU, RoIIoUOp<float, CUDAContext>);
} // namespace caffe2