Skip to content

add nvptx_target_feature #138689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
// Filter out features that are not supported by the current LLVM version
("aarch64", "fpmr") => None, // only existed in 18
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
// NVPTX targets added in LLVM 20
("nvptx64", "sm_100") if get_version().0 < 20 => None,
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
("nvptx64", "sm_101") if get_version().0 < 20 => None,
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
("nvptx64", "sm_120") if get_version().0 < 20 => None,
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
("nvptx64", "ptx86") if get_version().0 < 20 => None,
("nvptx64", "ptx87") if get_version().0 < 20 => None,
// Filter out features that are not supported by the current LLVM version
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
if get_version().0 < 20 =>
Expand Down Expand Up @@ -337,11 +346,12 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
let target_machine = create_informational_target_machine(sess, true);
// Compute which of the known target features are enabled in the 'base' target machine. We only
// consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
let mut cpu_implied_features: Vec<(bool, Symbol)> = Vec::new();
let mut features: FxHashSet<Symbol> = sess
.target
.rust_target_features()
.iter()
.filter(|(feature, _, _)| {
.filter(|(feature, _, implied)| {
// skip checking special features, as LLVM may not understand them
if RUSTC_SPECIAL_FEATURES.contains(feature) {
return true;
Expand All @@ -356,6 +366,7 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
return false;
}
}
cpu_implied_features.extend(implied.iter().map(|f| (true, Symbol::intern(f))));
true
} else {
false
Expand All @@ -364,14 +375,15 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
.map(|(feature, _, _)| Symbol::intern(feature))
.collect();

// Add enabled and remove disabled features.
for (enabled, feature) in
// Parse -Ctarget-feature=+feature1,-feature2
let cg_target_features =
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
Some('+') => Some((true, Symbol::intern(&s[1..]))),
Some('-') => Some((false, Symbol::intern(&s[1..]))),
_ => None,
})
{
});
// Add features implied by -Ctarget-cpu followed by enabling/removing those specified by -Ctarget-feature
for (enabled, feature) in cpu_implied_features.into_iter().chain(cg_target_features) {
if enabled {
// Also add all transitively implied features.

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ declare_features! (
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
(unstable, mips_target_feature, "1.27.0", Some(44839)),
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
(unstable, riscv_target_feature, "1.45.0", Some(44839)),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1487,6 +1487,7 @@ symbols! {
not,
notable_trait,
note,
nvptx_target_feature,
object_safe_for_dispatch,
of,
off,
Expand Down
69 changes: 68 additions & 1 deletion compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-end
];

const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
// tidy-alphabetical-end
];

static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("a", Stable, &["zaamo", "zalrsc"]),
Expand Down Expand Up @@ -770,6 +835,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
.chain(HEXAGON_FEATURES.iter())
.chain(POWERPC_FEATURES.iter())
.chain(MIPS_FEATURES.iter())
.chain(NVPTX_FEATURES.iter())
.chain(RISCV_FEATURES.iter())
.chain(WASM_FEATURES.iter())
.chain(BPF_FEATURES.iter())
Expand Down Expand Up @@ -835,6 +901,7 @@ impl Target {
"x86" | "x86_64" => X86_FEATURES,
"hexagon" => HEXAGON_FEATURES,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
"nvptx64" => NVPTX_FEATURES,
"powerpc" | "powerpc64" => POWERPC_FEATURES,
"riscv32" | "riscv64" => RISCV_FEATURES,
"wasm32" | "wasm64" => WASM_FEATURES,
Expand All @@ -861,7 +928,7 @@ impl Target {
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
"bpf" | "m68k" => &[], // no vector ABI
"nvptx64" | "bpf" | "m68k" => &[], // no vector ABI
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
// when passing args in vector registers.
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@
#![feature(keylocker_x86)]
#![feature(loongarch_target_feature)]
#![feature(mips_target_feature)]
#![feature(nvptx_target_feature)]
#![feature(powerpc_target_feature)]
#![feature(riscv_target_feature)]
#![feature(rtm_target_feature)]
Expand Down
40 changes: 40 additions & 0 deletions src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,46 @@ platform.
[@RDambrosio016](https://github.com/RDambrosio016)
[@kjetilkjeka](https://github.com/kjetilkjeka)

## Requirements

This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
The necessary components for this workflow are:

- `rustup toolchain add nightly`
- `rustup component add llvm-tools --toolchain nightly`
- `rustup component add llvm-bitcode-linker --toolchain nightly`

There are two options for using the core library:

- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`

### Target and features

It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
Later PTX versions may allow more efficient code generation.

Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.

## Building Rust kernels

A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

```console
$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options
```

Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow:

```console
$ rustup component add rust-src --toolchain nightly
$ rustup component add llvm-tools --toolchain nightly
$ rustup component add llvm-bitcode-linker --toolchain nightly
```


<!-- FIXME: fill this out

## Requirements
Expand Down
56 changes: 56 additions & 0 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`power9-altivec`
`power9-vector`
`prfchw`
`ptx32`
`ptx40`
`ptx41`
`ptx42`
`ptx43`
`ptx50`
`ptx60`
`ptx61`
`ptx62`
`ptx63`
`ptx64`
`ptx65`
`ptx70`
`ptx71`
`ptx72`
`ptx73`
`ptx74`
`ptx75`
`ptx76`
`ptx77`
`ptx78`
`ptx80`
`ptx81`
`ptx82`
`ptx83`
`ptx84`
`ptx85`
`ptx86`
`ptx87`
`quadword-atomics`
`rand`
`ras`
Expand All @@ -213,6 +242,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`simd128`
`sm3`
`sm4`
`sm_100`
`sm_100a`
`sm_101`
`sm_101a`
`sm_120`
`sm_120a`
`sm_20`
`sm_21`
`sm_30`
`sm_32`
`sm_35`
`sm_37`
`sm_50`
`sm_52`
`sm_53`
`sm_60`
`sm_61`
`sm_62`
`sm_70`
`sm_72`
`sm_75`
`sm_80`
`sm_86`
`sm_87`
`sm_89`
`sm_90`
`sm_90a`
`sme`
`sme-b16b16`
`sme-f16f16`
Expand Down
1 change: 1 addition & 0 deletions tests/ui/target-feature/gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// gate-test-arm_target_feature
// gate-test-hexagon_target_feature
// gate-test-mips_target_feature
// gate-test-nvptx_target_feature
// gate-test-wasm_target_feature
// gate-test-adx_target_feature
// gate-test-cmpxchg16b_target_feature
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/target-feature/gate.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error[E0658]: the target feature `x87` is currently unstable
--> $DIR/gate.rs:29:18
--> $DIR/gate.rs:30:18
|
LL | #[target_feature(enable = "x87")]
| ^^^^^^^^^^^^^^
Expand Down
28 changes: 28 additions & 0 deletions tests/ui/target-feature/implied-features-nvptx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -Z unstable-options -Clinker-flavor=llbc
//@ only-nvptx64
//@ build-pass
#![no_std]
#![allow(dead_code)]

#[panic_handler]
pub fn panic(_info: &core::panic::PanicInfo) -> ! {
loop {}
}

// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70
#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))]
compile_error!("direct target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(not(all(
target_feature = "sm_60",
target_feature = "sm_70",
target_feature = "ptx50",
target_feature = "ptx60",
)))]
compile_error!("implied target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(target_feature = "ptx71")]
compile_error!("sm_80 requires only ptx70, but ptx71 enabled");
Loading