support for torch.bfloat16 in radius ops (#206)

viktortnk · web-flow · commit e0eb0c1143de · 2024-02-04T10:11:51.000+01:00
diff --git a/csrc/cuda/radius_cuda.cu b/csrc/cuda/radius_cuda.cu
@@ -81,13 +81,15 @@ torch::Tensor radius_cuda(const torch::Tensor x, const torch::Tensor y,
 
   auto stream = at::cuda::getCurrentCUDAStream();
   auto scalar_type = x.scalar_type();
-  AT_DISPATCH_FLOATING_TYPES_AND(at::ScalarType::Half, scalar_type, "_", [&] {
-    radius_kernel<scalar_t><<<BLOCKS, THREADS, 0, stream>>>(
-        x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
-        ptr_x.value().data_ptr<int64_t>(), ptr_y.value().data_ptr<int64_t>(),
-        row.data_ptr<int64_t>(), col.data_ptr<int64_t>(), r * r, x.size(0),
-        y.size(0), x.size(1), ptr_x.value().numel() - 1, max_num_neighbors);
-  });
+  AT_DISPATCH_FLOATING_TYPES_AND2(
+      at::ScalarType::Half, at::ScalarType::BFloat16, scalar_type, "_", [&] {
+        radius_kernel<scalar_t><<<BLOCKS, THREADS, 0, stream>>>(
+            x.data_ptr<scalar_t>(), y.data_ptr<scalar_t>(),
+            ptr_x.value().data_ptr<int64_t>(),
+            ptr_y.value().data_ptr<int64_t>(), row.data_ptr<int64_t>(),
+            col.data_ptr<int64_t>(), r * r, x.size(0), y.size(0), x.size(1),
+            ptr_x.value().numel() - 1, max_num_neighbors);
+      });
 
   auto mask = row != -1;
   return torch::stack({row.masked_select(mask), col.masked_select(mask)}, 0);
diff --git a/test/test_radius.py b/test/test_radius.py
@@ -4,14 +4,14 @@
 import scipy.spatial
 import torch
 from torch_cluster import radius, radius_graph
-from torch_cluster.testing import devices, grad_dtypes, tensor
+from torch_cluster.testing import devices, floating_dtypes, tensor
 
 
 def to_set(edge_index):
     return set([(i, j) for i, j in edge_index.t().tolist()])
 
 
-@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
+@pytest.mark.parametrize('dtype,device', product(floating_dtypes, devices))
 def test_radius(dtype, device):
     x = tensor([
         [-1, -1],
@@ -52,7 +52,7 @@ def test_radius(dtype, device):
                                       (1, 6)])
 
 
-@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
+@pytest.mark.parametrize('dtype,device', product(floating_dtypes, devices))
 def test_radius_graph(dtype, device):
     x = tensor([
         [-1, -1],
diff --git a/torch_cluster/testing.py b/torch_cluster/testing.py
@@ -7,6 +7,7 @@
     torch.long
 ]
 grad_dtypes = [torch.half, torch.float, torch.double]
+floating_dtypes = grad_dtypes + [torch.bfloat16]
 
 devices = [torch.device('cpu')]
 if torch.cuda.is_available():

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`	`torch.long`
`8`	`8`	`]`
`9`	`9`	`grad_dtypes = [torch.half, torch.float, torch.double]`
	`10`	`+floating_dtypes = grad_dtypes + [torch.bfloat16]`
`10`	`11`
`11`	`12`	`devices = [torch.device('cpu')]`
`12`	`13`	`if torch.cuda.is_available():`