Skip to content

Commit b328b5b

Browse files
committed
Add bridging PTX code for cooperative_groups API
This appears to be a working solution. The PTX should be fairly portable as well. Overall, this system will work as follows: - Wrap a `cc::Builder` to compile the bridging code on demand as part of a `sys` crate. This will use nvcc under the hood and we will pass along any other needed flags. - Folks that need the bridging code will then use cust::link::Linker to link the bridging PTX code with their PTX. All of the above is tested and working, however it currently deadlocks invocations because we have not exposed the cooperative launch interface. This should be quite simple though, given that the generated cuda bindgen code already has this in place. LOTS TO DO STILL! - Update the new cuda_std_cg crate to use `cc` to compile the C++ bridging code on demand, and produce the PTX path as output. - Remove the Justfile. I was only using it for POC testing. - MAJOR: update cust to expose the `cuLaunchCooperativeKernel` in a nice interface.
1 parent 8a6cb73 commit b328b5b

File tree

10 files changed

+540
-14
lines changed

10 files changed

+540
-14
lines changed

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ members = [
1414
]
1515

1616
exclude = [
17-
"crates/optix/examples/common"
17+
"crates/optix/examples/common",
18+
"crates/cuda_std_cg",
1819
]
1920

2021
[profile.dev.package.rustc_codegen_nvvm]

Justfile

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
build_cuda_std_cg:
2+
#!/usr/bin/env bash
3+
set -euxo pipefail
4+
nvcc --ptx -arch=sm_75 \
5+
-I crates/cuda_std_cg/src -I${CUDA_ROOT}/include \
6+
--device-c crates/cuda_std_cg/src/cg_bridge.cu \
7+
-o crates/cuda_std_cg/cg_bridge.ptx

crates/cuda_std/src/cg.rs

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
use crate::gpu_only;
2+
3+
mod ffi {
4+
use core::ffi::c_void;
5+
6+
pub type GridGroup = *mut c_void;
7+
extern "C" {
8+
pub(super) fn this_grid() -> GridGroup;
9+
pub(super) fn GridGroup_destroy(gg: GridGroup);
10+
pub(super) fn GridGroup_is_valid(gg: GridGroup) -> bool;
11+
pub(super) fn GridGroup_sync(gg: GridGroup);
12+
pub(super) fn GridGroup_size(gg: GridGroup) -> u64;
13+
pub(super) fn GridGroup_thread_rank(gg: GridGroup) -> u64;
14+
pub(super) fn GridGroup_num_threads(gg: GridGroup) -> u64;
15+
pub(super) fn GridGroup_num_blocks(gg: GridGroup) -> u64;
16+
pub(super) fn GridGroup_block_rank(gg: GridGroup) -> u64;
17+
// dim3 GridGroup_group_dim(); // TODO: impl these.
18+
// dim3 GridGroup_dim_blocks(); // TODO: impl these.
19+
// dim3 GridGroup_block_index(); // TODO: impl these.
20+
}
21+
}
22+
23+
pub struct GridGroup(ffi::GridGroup);
24+
25+
impl Drop for GridGroup {
26+
fn drop(&mut self) {
27+
unsafe { ffi::GridGroup_destroy(self.0) }
28+
}
29+
}
30+
31+
impl GridGroup {
32+
#[gpu_only]
33+
pub fn this_grid() -> Self {
34+
let ptr = unsafe { ffi::this_grid() };
35+
GridGroup(ptr)
36+
}
37+
38+
#[gpu_only]
39+
pub fn is_valid(&mut self) -> bool {
40+
unsafe { ffi::GridGroup_is_valid(self.0) }
41+
}
42+
43+
#[gpu_only]
44+
pub fn sync(&mut self) {
45+
unsafe { ffi::GridGroup_sync(self.0) }
46+
}
47+
48+
#[gpu_only]
49+
pub fn size(&mut self) -> u64 {
50+
unsafe { ffi::GridGroup_size(self.0) }
51+
}
52+
53+
#[gpu_only]
54+
pub fn thread_rank(&mut self) -> u64 {
55+
unsafe { ffi::GridGroup_thread_rank(self.0) }
56+
}
57+
58+
#[gpu_only]
59+
pub fn num_threads(&mut self) -> u64 {
60+
unsafe { ffi::GridGroup_num_threads(self.0) }
61+
}
62+
63+
#[gpu_only]
64+
pub fn num_blocks(&mut self) -> u64 {
65+
unsafe { ffi::GridGroup_num_blocks(self.0) }
66+
}
67+
68+
#[gpu_only]
69+
pub fn block_rank(&mut self) -> u64 {
70+
unsafe { ffi::GridGroup_block_rank(self.0) }
71+
}
72+
}

crates/cuda_std/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub mod misc;
4646
// pub mod rt;
4747
pub mod atomic;
4848
pub mod cfg;
49+
pub mod cg;
4950
pub mod ptr;
5051
pub mod shared;
5152
pub mod thread;

0 commit comments

Comments
 (0)