-
Notifications
You must be signed in to change notification settings - Fork 7k
/
Copy pathroi_pool_kernel.cpp
59 lines (49 loc) · 1.57 KB
/
roi_pool_kernel.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include "../roi_pool.h"
#include <ATen/autocast_mode.h>
#include <torch/library.h>
#include <torch/types.h>
namespace vision {
namespace ops {
namespace {
template <c10::DispatchKey autocast_key, c10::DeviceType device_type>
std::tuple<at::Tensor, at::Tensor> roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width) {
c10::impl::ExcludeDispatchKeyGuard no_autocast(autocast_key);
auto result = roi_pool(
at::autocast::cached_cast(at::kFloat, input, device_type),
at::autocast::cached_cast(at::kFloat, rois, device_type),
spatial_scale,
pooled_height,
pooled_width);
return std::make_tuple(
std::get<0>(result).to(input.scalar_type()),
std::get<1>(result).to(input.scalar_type()));
}
} // namespace
TORCH_LIBRARY_IMPL(torchvision, Autocast, m) {
m.impl(
TORCH_SELECTIVE_NAME("torchvision::roi_pool"),
TORCH_FN((roi_pool_autocast<
c10::DispatchKey::Autocast,
c10::DeviceType::CUDA>)));
}
TORCH_LIBRARY_IMPL(torchvision, AutocastCPU, m) {
m.impl(
TORCH_SELECTIVE_NAME("torchvision::roi_pool"),
TORCH_FN((roi_pool_autocast<
c10::DispatchKey::AutocastCPU,
c10::DeviceType::CPU>)));
}
TORCH_LIBRARY_IMPL(torchvision, AutocastXPU, m) {
m.impl(
TORCH_SELECTIVE_NAME("torchvision::roi_pool"),
TORCH_FN((roi_pool_autocast<
c10::DispatchKey::AutocastXPU,
c10::DeviceType::XPU>)));
}
} // namespace ops
} // namespace vision