-
Notifications
You must be signed in to change notification settings - Fork 180
/
Copy pathcg.rs
174 lines (159 loc) · 7.41 KB
/
cg.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
//! Cooperative Groups compilation and linking.
use std::path::{Path, PathBuf};
use anyhow::Context;
use crate::{CudaBuilderError, CudaBuilderResult};
/// An artifact which may be linked together with the Cooperative Groups API bridge PTX code.
pub enum LinkableArtifact {
/// A PTX artifact.
Ptx(PathBuf),
/// A cubin artifact.
Cubin(PathBuf),
/// A fatbin artifact.
Fatbin(PathBuf),
}
impl LinkableArtifact {
/// Add this artifact to the given linker.
fn link_artifact(&self, linker: &mut cust::link::Linker) -> CudaBuilderResult<()> {
match &self {
LinkableArtifact::Ptx(path) => {
let mut data = std::fs::read_to_string(&path).with_context(|| {
format!("error reading PTX file for linking, file={:?}", path)
})?;
if !data.ends_with('\0') {
// If the PTX is not null-terminated, then linking will fail. Only required for PTX.
data.push('\0');
}
linker
.add_ptx(&data)
.with_context(|| format!("error linking PTX file={:?}", path))?;
}
LinkableArtifact::Cubin(path) => {
let data = std::fs::read(&path).with_context(|| {
format!("error reading cubin file for linking, file={:?}", path)
})?;
linker
.add_cubin(&data)
.with_context(|| format!("error linking cubin file={:?}", path))?;
}
LinkableArtifact::Fatbin(path) => {
let data = std::fs::read(&path).with_context(|| {
format!("error reading fatbin file for linking, file={:?}", path)
})?;
linker
.add_fatbin(&data)
.with_context(|| format!("error linking fatbin file={:?}", path))?;
}
}
Ok(())
}
}
/// A builder which will compile the Cooperative Groups API bridging code, and will then link it
/// together with any other artifacts provided to this builder.
///
/// The result of this process will be a `cubin` file containing the linked Cooperative Groups
/// PTX code along with any other linked artifacts provided to this builder. The output `cubin`
/// may then be loaded via `cust::module::Module::from_cubin(..)` and used as normal.
#[derive(Default)]
pub struct CooperativeGroups {
/// Artifacts to be linked together with the Cooperative Groups bridge code.
artifacts: Vec<LinkableArtifact>,
/// Flags to pass to nvcc for Cooperative Groups API bridge compilation.
nvcc_flags: Vec<String>,
}
impl CooperativeGroups {
/// Construct a new instance.
pub fn new() -> Self {
Self::default()
}
/// Add the artifact at the given path for linking.
///
/// This only applies to linking with the Cooperative Groups API bridge code. Typically,
/// this will be the PTX of your main program which has already been built via `CudaBuilder`.
pub fn link(mut self, artifact: LinkableArtifact) -> Self {
self.artifacts.push(artifact);
self
}
/// Add a flag to be passed along to `nvcc` during compilation of the Cooperative Groups API bridge code.
///
/// This provides maximum flexibility for code generation. If needed, multiple architectures
/// may be generated by adding the appropriate flags to the `nvcc` call.
///
/// By default, `nvcc` will generate code for `sm_52`. Override by specifying any of `--gpu-architecture`,
/// `--gpu-code`, or `--generate-code` flags.
///
/// Regardless of the flags added via this method, this builder will always added the following flags:
/// - `-I<cudaRoot>/include`: ensuring `cooperative_groups.h` can be found.
/// - `-Icg`: ensuring the bridging header can be found.
/// - `--ptx`: forces the compiled output to be in PTX form.
/// - `--device-c`: to compile the bridging code as relocatable device code.
/// - `src/cg_bridge.cu` will be added as the code to be compiled, which generates the
/// Cooperative Groups API bridge.
///
/// Docs: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#command-option-description
pub fn nvcc_flag(mut self, val: impl AsRef<str>) -> Self {
self.nvcc_flags.push(val.as_ref().to_string());
self
}
/// Compile the Cooperative Groups API bridging code, and then link it together
/// with any other artifacts provided to this builder.
///
/// - `cg_out` specifies the output location for the Cooperative Groups API bridge PTX.
/// - `cubin_out` specifies the output location for the fully linked `cubin`.
///
/// ## Errors
/// - At least one artifact must be provided to this builder for linking.
/// - Any errors which take place from the `nvcc` compilation of the Cooperative Groups briding
/// code, or any errors which take place during module linking.
pub fn compile(
mut self,
cg_out: impl AsRef<Path>,
cubin_out: impl AsRef<Path>,
) -> CudaBuilderResult<()> {
// Perform some initial validation.
if self.artifacts.is_empty() {
return Err(anyhow::anyhow!("must provide at least 1 ptx/cubin/fatbin artifact to be linked with the Cooperative Groups API bridge code").into());
}
// Find the cuda installation directory for compilation of CG API.
let cuda_root =
find_cuda_helper::find_cuda_root().ok_or(CudaBuilderError::CudaRootNotFound)?;
let cuda_include = cuda_root.join("include");
let cg_src = std::path::Path::new(std::file!())
.parent()
.context("error accessing parent dir cuda_builder/src")?
.parent()
.context("error accessing parent dir cuda_builder")?
.join("cg")
.canonicalize()
.context("error taking canonical path to cooperative groups API bridge code")?;
let cg_bridge_cu = cg_src.join("cg_bridge.cu");
// Build up the `nvcc` invocation and then build the bridging code.
let mut nvcc = std::process::Command::new("nvcc");
nvcc.arg(format!("-I{:?}", &cuda_include).as_str())
.arg(format!("-I{:?}", &cg_src).as_str())
.arg("--ptx")
.arg("-o")
.arg(cg_out.as_ref().to_string_lossy().as_ref())
.arg("--device-c")
.arg(cg_bridge_cu.to_string_lossy().as_ref());
for flag in self.nvcc_flags.iter() {
nvcc.arg(flag.as_str());
}
nvcc.status()
.context("error calling nvcc for Cooperative Groups API bridge compilation")?;
// Link together the briding code with any given PTX/cubin/fatbin artifacts.
let _ctx = cust::quick_init().context("error building cuda context")?;
let mut linker = cust::link::Linker::new().context("error building cust linker")?;
self.artifacts
.push(LinkableArtifact::Ptx(cg_out.as_ref().to_path_buf()));
for artifact in self.artifacts.iter() {
artifact.link_artifact(&mut linker)?;
}
let linked_cubin = linker
.complete()
.context("error linking artifacts with Cooperative Groups API bridge PTX")?;
// Write finalized cubin.
std::fs::write(&cubin_out, &linked_cubin)
.with_context(|| format!("error writing linked cubin to {:?}", cubin_out.as_ref()))?;
Ok(())
}
}