From bdb726b499c1eddcf9066b8f7786059a8d96e3fe Mon Sep 17 00:00:00 2001 From: Flakebi Date: Thu, 11 Dec 2025 02:47:45 +0100 Subject: [PATCH] Add inline asm support for amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for inline assembly for the amdgpu backend (the amdgcn-amd-amdhsa target). Add register classes for `vgpr` (vector general purpose register) and `sgpr` (scalar general purpose register). The LLVM backend supports two more classes, `reg`, which is either VGPR or SGPR, up to the compiler to decide. As instructions often rely on a register being either a VGPR or SGPR for the assembly to be valid, reg doesn’t seem that useful (I struggled to write correct tests for it), so I didn’t end up adding it. The fourth register class is AGPRs, which only exist on some hardware versions (not the consumer ones) and they have restricted ways to write and read from them, which makes it hard to write a Rust variable into them. They could be used inside assembly blocks, but I didn’t add them as Rust register class. There is one change affecting general inline assembly code, that is `InlineAsmReg::name()` now returns a `Cow` instead of a `&'static str`. Because amdgpu has many registers, 256 VGPRs plus combinations of 2 or 4 VGPRs, and I didn’t want to list hundreds of static strings, the amdgpu reg stores the register number(s) and a non-static String is generated at runtime for the register name. --- compiler/rustc_codegen_gcc/src/asm.rs | 4 + compiler/rustc_codegen_llvm/src/asm.rs | 4 + compiler/rustc_span/src/symbol.rs | 2 + compiler/rustc_target/src/asm/amdgpu.rs | 234 +++++++++++++++++++++++ compiler/rustc_target/src/asm/mod.rs | 61 ++++-- tests/assembly-llvm/asm/amdgpu-types.rs | 236 ++++++++++++++++++++++++ 6 files changed, 523 insertions(+), 18 deletions(-) create mode 100644 compiler/rustc_target/src/asm/amdgpu.rs create mode 100644 tests/assembly-llvm/asm/amdgpu-types.rs diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs index f237861b1595a..1dd0f2ed58bd8 100644 --- a/compiler/rustc_codegen_gcc/src/asm.rs +++ b/compiler/rustc_codegen_gcc/src/asm.rs @@ -665,6 +665,8 @@ fn reg_class_to_gcc(reg_class: InlineAsmRegClass) -> &'static str { InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { unreachable!("clobber-only") } + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::vgpr) => "v", + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::sgpr) => "Sg", InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => "r", InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::dreg_low16) @@ -761,6 +763,7 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { unreachable!("clobber-only") } + InlineAsmRegClass::Amdgpu(_) => cx.type_i32(), InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg_low16) => cx.type_f32(), @@ -946,6 +949,7 @@ fn modifier_to_gcc( InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { unreachable!("clobber-only") } + InlineAsmRegClass::Amdgpu(_) => None, InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => None, InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg_low16) => None, diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index 8cd4bdc372789..273f7a585bf4c 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -229,6 +229,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC | InlineAsmArch::Arm => { constraints.push("~{cc}".to_string()); } + InlineAsmArch::Amdgpu => {} InlineAsmArch::X86 | InlineAsmArch::X86_64 => { constraints.extend_from_slice(&[ "~{dirflag}".to_string(), @@ -645,6 +646,7 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) -> | Arm(ArmInlineAsmRegClass::dreg_low8) | Arm(ArmInlineAsmRegClass::qreg_low4) => "x", Arm(ArmInlineAsmRegClass::dreg) | Arm(ArmInlineAsmRegClass::qreg) => "w", + Amdgpu(class) => class.prefix(), Hexagon(HexagonInlineAsmRegClass::reg) => "r", Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"), LoongArch(LoongArchInlineAsmRegClass::reg) => "r", @@ -745,6 +747,7 @@ fn modifier_to_llvm( modifier } } + Amdgpu(_) => None, Hexagon(_) => None, LoongArch(_) => None, Mips(_) => None, @@ -825,6 +828,7 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &' Arm(ArmInlineAsmRegClass::qreg) | Arm(ArmInlineAsmRegClass::qreg_low8) | Arm(ArmInlineAsmRegClass::qreg_low4) => cx.type_vector(cx.type_i64(), 2), + Amdgpu(_) => cx.type_i32(), Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(), Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"), LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index f2b13dad1fd90..800474d21ec20 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2028,6 +2028,7 @@ symbols! { self_struct_ctor, semiopaque, semitransparent, + sgpr, sha2, sha3, sha512_sm_x86, @@ -2448,6 +2449,7 @@ symbols! { verbatim, version, vfp2, + vgpr, vis, visible_private_types, volatile, diff --git a/compiler/rustc_target/src/asm/amdgpu.rs b/compiler/rustc_target/src/asm/amdgpu.rs new file mode 100644 index 0000000000000..06ae68a46da8e --- /dev/null +++ b/compiler/rustc_target/src/asm/amdgpu.rs @@ -0,0 +1,234 @@ +use std::fmt; + +use rustc_span::Symbol; + +use super::{InlineAsmArch, InlineAsmType, ModifierInfo}; + +def_reg_class! { + Amdgpu AmdgpuInlineAsmRegClass { + sgpr, + vgpr, + } +} + +// See https://llvm.org/docs/AMDGPUOperandSyntax.html +impl AmdgpuInlineAsmRegClass { + pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] { + &[] + } + + pub fn suggest_class(self, _arch: InlineAsmArch, _ty: InlineAsmType) -> Option { + None + } + + pub fn suggest_modifier( + self, + _arch: InlineAsmArch, + _ty: InlineAsmType, + ) -> Option { + None + } + + pub fn default_modifier(self, _arch: InlineAsmArch) -> Option { + None + } + + pub fn supported_types( + self, + _arch: InlineAsmArch, + ) -> &'static [(InlineAsmType, Option)] { + types! { _: I16, F16, I32, F32, I64, F64, I128; } + } + + /// The number of supported registers in this class. + /// The returned number is the length, so supported register + /// indices are 0 to max_num()-1. + fn max_num(self) -> u32 { + match self { + Self::sgpr => 106, + Self::vgpr => 256, + } + } + + /// Prefix when printed and register constraint in LLVM. + pub fn prefix(self) -> &'static str { + match self { + Self::sgpr => "s", + Self::vgpr => "v", + } + } + + /// Get register class from prefix. + fn parse_prefix(prefix: char) -> Result { + match prefix { + 's' => Ok(Self::sgpr), + 'v' => Ok(Self::vgpr), + _ => Err("unknown register prefix"), + } + } +} + +#[derive( + Copy, + Clone, + rustc_macros::Encodable, + rustc_macros::Decodable, + Debug, + Eq, + PartialEq, + PartialOrd, + Hash, + rustc_macros::HashStable_Generic +)] +enum AmdgpuRegRange { + /// Low 16-bit of a register + Low(u32), + /// High 16-bit of a register + High(u32), + /// One or more 32-bit registers, in the inclusive range + Range { start: u32, end: u32 }, +} + +#[derive( + Copy, + Clone, + rustc_macros::Encodable, + rustc_macros::Decodable, + Debug, + Eq, + PartialEq, + PartialOrd, + Hash, + rustc_macros::HashStable_Generic +)] +#[allow(non_camel_case_types)] +pub struct AmdgpuInlineAsmReg { + class: AmdgpuInlineAsmRegClass, + range: AmdgpuRegRange, +} + +impl AmdgpuInlineAsmReg { + pub fn name(self) -> String { + let c = self.class.prefix(); + match self.range { + AmdgpuRegRange::Low(n) => format!("{c}{n}.l"), + AmdgpuRegRange::High(n) => format!("{c}{n}.h"), + AmdgpuRegRange::Range { start, end } if start == end => format!("{c}{start}"), + AmdgpuRegRange::Range { start, end } => format!("{c}[{start}:{end}]"), + } + } + + pub fn reg_class(self) -> AmdgpuInlineAsmRegClass { + self.class + } + + pub fn parse(name: &str) -> Result { + if name.is_empty() { + return Err("invalid empty register"); + } + let class = AmdgpuInlineAsmRegClass::parse_prefix(name.chars().next().unwrap())?; + // Form with range, e.g. s[2:3] + let res; + if name[1..].starts_with('[') { + if !name.ends_with(']') { + return Err("invalid register, missing closing bracket"); + } + if let Some((start, end)) = name[2..name.len() - 1].split_once(':') { + let Ok(start) = start.parse() else { + return Err("invalid register range start"); + }; + let Ok(end) = end.parse() else { + return Err("invalid register range end"); + }; + + // Check range + if start > end { + return Err("invalid reversed register range"); + } + + if end >= class.max_num() { + return Err("too large register for this class"); + } + res = Self { class, range: AmdgpuRegRange::Range { start, end } }; + } else { + return Err("invalid register range"); + } + } else { + let parse_num = |core: &str| { + let Ok(start) = core.parse() else { + return Err("invalid register number"); + }; + + if start >= class.max_num() { + return Err("too large register for this class"); + } + + Ok(start) + }; + + let name = &name[1..]; + let range = if let Some(name) = name.strip_suffix(".l") { + AmdgpuRegRange::Low(parse_num(name)?) + } else if let Some(name) = name.strip_suffix(".h") { + AmdgpuRegRange::High(parse_num(name)?) + } else { + let start = parse_num(name)?; + AmdgpuRegRange::Range { start, end: start } + }; + res = Self { class, range }; + } + Ok(res) + } + + pub fn validate( + self, + _arch: super::InlineAsmArch, + _reloc_model: crate::spec::RelocModel, + _target_features: &rustc_data_structures::fx::FxIndexSet, + _target: &crate::spec::Target, + _is_clobber: bool, + ) -> Result<(), &'static str> { + Ok(()) + } +} + +pub(super) fn fill_reg_map( + _arch: super::InlineAsmArch, + _reloc_model: crate::spec::RelocModel, + _target_features: &rustc_data_structures::fx::FxIndexSet, + _target: &crate::spec::Target, + map: &mut rustc_data_structures::fx::FxHashMap< + super::InlineAsmRegClass, + rustc_data_structures::fx::FxIndexSet, + >, +) { + use super::{InlineAsmReg, InlineAsmRegClass}; + + // Add single registers of each class (no register ranges) + #[allow(rustc::potential_query_instability)] + for class in regclass_map().keys() { + let InlineAsmRegClass::Amdgpu(class) = *class else { unreachable!("Must be amdgpu class") }; + if let Some(set) = map.get_mut(&InlineAsmRegClass::Amdgpu(class)) { + for i in 0..class.max_num() { + set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { + class, + range: AmdgpuRegRange::Range { start: i, end: i }, + })); + } + } + } +} + +impl AmdgpuInlineAsmReg { + pub fn emit( + self, + out: &mut dyn fmt::Write, + _arch: InlineAsmArch, + _modifier: Option, + ) -> fmt::Result { + out.write_str(&self.name()) + } + + // There are too many conflicts to list + pub fn overlapping_regs(self, mut _cb: impl FnMut(AmdgpuInlineAsmReg)) {} +} diff --git a/compiler/rustc_target/src/asm/mod.rs b/compiler/rustc_target/src/asm/mod.rs index 57d9cdad454ac..cab800f553c75 100644 --- a/compiler/rustc_target/src/asm/mod.rs +++ b/compiler/rustc_target/src/asm/mod.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt; use rustc_abi::Size; @@ -177,6 +178,7 @@ macro_rules! types { } mod aarch64; +mod amdgpu; mod arm; mod avr; mod bpf; @@ -196,6 +198,7 @@ mod wasm; mod x86; pub use aarch64::{AArch64InlineAsmReg, AArch64InlineAsmRegClass}; +pub use amdgpu::{AmdgpuInlineAsmReg, AmdgpuInlineAsmRegClass}; pub use arm::{ArmInlineAsmReg, ArmInlineAsmRegClass}; pub use avr::{AvrInlineAsmReg, AvrInlineAsmRegClass}; pub use bpf::{BpfInlineAsmReg, BpfInlineAsmRegClass}; @@ -224,6 +227,7 @@ pub enum InlineAsmArch { RiscV32, RiscV64, Nvptx64, + Amdgpu, Hexagon, LoongArch32, LoongArch64, @@ -252,6 +256,7 @@ impl InlineAsmArch { Arch::Arm => Some(Self::Arm), Arch::Arm64EC => Some(Self::Arm64EC), Arch::AArch64 => Some(Self::AArch64), + Arch::AmdGpu => Some(Self::Amdgpu), Arch::RiscV32 => Some(Self::RiscV32), Arch::RiscV64 => Some(Self::RiscV64), Arch::Nvptx64 => Some(Self::Nvptx64), @@ -273,7 +278,7 @@ impl InlineAsmArch { Arch::Msp430 => Some(Self::Msp430), Arch::M68k => Some(Self::M68k), Arch::CSky => Some(Self::CSKY), - Arch::AmdGpu | Arch::Xtensa | Arch::Other(_) => None, + Arch::Xtensa | Arch::Other(_) => None, } } } @@ -283,6 +288,7 @@ impl InlineAsmArch { pub enum InlineAsmReg { X86(X86InlineAsmReg), Arm(ArmInlineAsmReg), + Amdgpu(AmdgpuInlineAsmReg), AArch64(AArch64InlineAsmReg), RiscV(RiscVInlineAsmReg), Nvptx(NvptxInlineAsmReg), @@ -304,24 +310,25 @@ pub enum InlineAsmReg { } impl InlineAsmReg { - pub fn name(self) -> &'static str { + pub fn name(self) -> Cow<'static, str> { match self { - Self::X86(r) => r.name(), - Self::Arm(r) => r.name(), - Self::AArch64(r) => r.name(), - Self::RiscV(r) => r.name(), - Self::PowerPC(r) => r.name(), - Self::Hexagon(r) => r.name(), - Self::LoongArch(r) => r.name(), - Self::Mips(r) => r.name(), - Self::S390x(r) => r.name(), - Self::Sparc(r) => r.name(), - Self::Bpf(r) => r.name(), - Self::Avr(r) => r.name(), - Self::Msp430(r) => r.name(), - Self::M68k(r) => r.name(), - Self::CSKY(r) => r.name(), - Self::Err => "", + Self::X86(r) => r.name().into(), + Self::Arm(r) => r.name().into(), + Self::AArch64(r) => r.name().into(), + Self::Amdgpu(r) => r.name().into(), + Self::RiscV(r) => r.name().into(), + Self::PowerPC(r) => r.name().into(), + Self::Hexagon(r) => r.name().into(), + Self::LoongArch(r) => r.name().into(), + Self::Mips(r) => r.name().into(), + Self::S390x(r) => r.name().into(), + Self::Sparc(r) => r.name().into(), + Self::Bpf(r) => r.name().into(), + Self::Avr(r) => r.name().into(), + Self::Msp430(r) => r.name().into(), + Self::M68k(r) => r.name().into(), + Self::CSKY(r) => r.name().into(), + Self::Err => "".into(), } } @@ -330,6 +337,7 @@ impl InlineAsmReg { Self::X86(r) => InlineAsmRegClass::X86(r.reg_class()), Self::Arm(r) => InlineAsmRegClass::Arm(r.reg_class()), Self::AArch64(r) => InlineAsmRegClass::AArch64(r.reg_class()), + Self::Amdgpu(r) => InlineAsmRegClass::Amdgpu(r.reg_class()), Self::RiscV(r) => InlineAsmRegClass::RiscV(r.reg_class()), Self::PowerPC(r) => InlineAsmRegClass::PowerPC(r.reg_class()), Self::Hexagon(r) => InlineAsmRegClass::Hexagon(r.reg_class()), @@ -356,6 +364,7 @@ impl InlineAsmReg { InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC => { Self::AArch64(AArch64InlineAsmReg::parse(name)?) } + InlineAsmArch::Amdgpu => Self::Amdgpu(AmdgpuInlineAsmReg::parse(name)?), InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { Self::RiscV(RiscVInlineAsmReg::parse(name)?) } @@ -398,6 +407,7 @@ impl InlineAsmReg { Self::X86(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::Arm(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::AArch64(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), + Self::Amdgpu(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::RiscV(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::PowerPC(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::Hexagon(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), @@ -428,6 +438,7 @@ impl InlineAsmReg { Self::X86(r) => r.emit(out, arch, modifier), Self::Arm(r) => r.emit(out, arch, modifier), Self::AArch64(r) => r.emit(out, arch, modifier), + Self::Amdgpu(r) => r.emit(out, arch, modifier), Self::RiscV(r) => r.emit(out, arch, modifier), Self::PowerPC(r) => r.emit(out, arch, modifier), Self::Hexagon(r) => r.emit(out, arch, modifier), @@ -449,6 +460,7 @@ impl InlineAsmReg { Self::X86(r) => r.overlapping_regs(|r| cb(Self::X86(r))), Self::Arm(r) => r.overlapping_regs(|r| cb(Self::Arm(r))), Self::AArch64(_) => cb(self), + Self::Amdgpu(_) => cb(self), Self::RiscV(_) => cb(self), Self::PowerPC(r) => r.overlapping_regs(|r| cb(Self::PowerPC(r))), Self::Hexagon(r) => r.overlapping_regs(|r| cb(Self::Hexagon(r))), @@ -472,6 +484,7 @@ pub enum InlineAsmRegClass { X86(X86InlineAsmRegClass), Arm(ArmInlineAsmRegClass), AArch64(AArch64InlineAsmRegClass), + Amdgpu(AmdgpuInlineAsmRegClass), RiscV(RiscVInlineAsmRegClass), Nvptx(NvptxInlineAsmRegClass), PowerPC(PowerPCInlineAsmRegClass), @@ -497,6 +510,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.name(), Self::Arm(r) => r.name(), Self::AArch64(r) => r.name(), + Self::Amdgpu(r) => r.name(), Self::RiscV(r) => r.name(), Self::Nvptx(r) => r.name(), Self::PowerPC(r) => r.name(), @@ -524,6 +538,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::X86), Self::Arm(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Arm), Self::AArch64(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::AArch64), + Self::Amdgpu(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Amdgpu), Self::RiscV(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::RiscV), Self::Nvptx(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Nvptx), Self::PowerPC(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::PowerPC), @@ -554,6 +569,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.suggest_modifier(arch, ty), Self::Arm(r) => r.suggest_modifier(arch, ty), Self::AArch64(r) => r.suggest_modifier(arch, ty), + Self::Amdgpu(r) => r.suggest_modifier(arch, ty), Self::RiscV(r) => r.suggest_modifier(arch, ty), Self::Nvptx(r) => r.suggest_modifier(arch, ty), Self::PowerPC(r) => r.suggest_modifier(arch, ty), @@ -584,6 +600,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.default_modifier(arch), Self::Arm(r) => r.default_modifier(arch), Self::AArch64(r) => r.default_modifier(arch), + Self::Amdgpu(r) => r.default_modifier(arch), Self::RiscV(r) => r.default_modifier(arch), Self::Nvptx(r) => r.default_modifier(arch), Self::PowerPC(r) => r.default_modifier(arch), @@ -617,6 +634,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.supported_types(arch), Self::Arm(r) => r.supported_types(arch), Self::AArch64(r) => r.supported_types(arch), + Self::Amdgpu(r) => r.supported_types(arch), Self::RiscV(r) => r.supported_types(arch), Self::Nvptx(r) => r.supported_types(arch), Self::PowerPC(r) => r.supported_types(arch), @@ -645,6 +663,7 @@ impl InlineAsmRegClass { InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC => { Self::AArch64(AArch64InlineAsmRegClass::parse(name)?) } + InlineAsmArch::Amdgpu => Self::Amdgpu(AmdgpuInlineAsmRegClass::parse(name)?), InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { Self::RiscV(RiscVInlineAsmRegClass::parse(name)?) } @@ -682,6 +701,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.valid_modifiers(arch), Self::Arm(r) => r.valid_modifiers(arch), Self::AArch64(r) => r.valid_modifiers(arch), + Self::Amdgpu(r) => r.valid_modifiers(arch), Self::RiscV(r) => r.valid_modifiers(arch), Self::Nvptx(r) => r.valid_modifiers(arch), Self::PowerPC(r) => r.valid_modifiers(arch), @@ -843,6 +863,11 @@ pub fn allocatable_registers( aarch64::fill_reg_map(arch, reloc_model, target_features, target, &mut map); map } + InlineAsmArch::Amdgpu => { + let mut map = amdgpu::regclass_map(); + amdgpu::fill_reg_map(arch, reloc_model, target_features, target, &mut map); + map + } InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { let mut map = riscv::regclass_map(); riscv::fill_reg_map(arch, reloc_model, target_features, target, &mut map); diff --git a/tests/assembly-llvm/asm/amdgpu-types.rs b/tests/assembly-llvm/asm/amdgpu-types.rs new file mode 100644 index 0000000000000..3728ae0cf7d09 --- /dev/null +++ b/tests/assembly-llvm/asm/amdgpu-types.rs @@ -0,0 +1,236 @@ +//@ add-minicore +//@ revisions: gfx11 gfx12 +//@ assembly-output: emit-asm +//@ compile-flags: --target amdgcn-amd-amdhsa +//@[gfx11] compile-flags: -Ctarget-cpu=gfx1100 +//@[gfx12] compile-flags: -Ctarget-cpu=gfx1200 +//@ needs-llvm-components: amdgpu +//@ needs-rust-lld + +#![feature(abi_gpu_kernel, no_core, asm_experimental_arch, f16)] +#![crate_type = "rlib"] +#![no_core] +#![allow(asm_sub_register, non_camel_case_types)] + +extern crate minicore; +use minicore::*; + +type ptr = *mut u8; + +macro_rules! check { + ($func:ident $ty:ident $class:ident $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) -> $ty { + let y; + asm!(concat!($mov, " {}, {}"), out($class) y, in($class) x); + y + } + }; + + ($func:ident $ret_ty:ident $ret_class:ident $($arg_name:ident: $arg_ty:ident $arg_class:ident,)* + $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) -> $ret_ty { + let result; + asm!(concat!($mov, " {}", $(", {", stringify!($arg_name), "}",)*), + out($ret_class) result, $($arg_name = in($arg_class) $arg_name,)*); + result + } + }; +} + +macro_rules! check_reg { + ($func:ident $ty:ident $reg:tt $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) -> $ty { + let y; + asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x); + y + } + }; + + ($func:ident $ret_ty:ident $ret_reg:tt $($arg_name:ident: $arg_ty:ident $arg_reg:tt,)* + $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) -> $ret_ty { + let result; + asm!(concat!($mov, " ", $ret_reg, $(", ", $arg_reg,)*), lateout($ret_reg) result, + $(in($arg_reg) $arg_name,)*); + result + } + }; +} + +// CHECK-LABEL: sgpr_i16: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check!(sgpr_i16 i32 sgpr x: i16 sgpr, y: i16 sgpr, "s_pack_ll_b32_b16"); + +// gfx11-LABEL: vgpr_i16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_i16 i16 vgpr "v_mov_b16"); + +// gfx12-LABEL: sgpr_f16: +// gfx12: #ASMSTART +// gfx12: s_add_f16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(sgpr_f16 f16 sgpr x: f16 sgpr, y: f16 sgpr, "s_add_f16"); + +// gfx11-LABEL: vgpr_f16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_f16 f16 vgpr "v_mov_b16"); + +// CHECK-LABEL: sgpr_i32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i32 i32 sgpr "s_mov_b32"); + +// CHECK-LABEL: vgpr_i32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check!(vgpr_i32 i32 vgpr "v_mov_b32"); + +// CHECK-LABEL: sgpr_f32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f32 f32 sgpr "s_mov_b32"); + +// CHECK-LABEL: vgpr_f32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check!(vgpr_f32 f32 vgpr "v_mov_b32"); + +// CHECK-LABEL: sgpr_i64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(sgpr_i64 i64 sgpr "s_mov_b64"); + +// CHECK-LABEL: vgpr_i64: +// CHECK: #ASMSTART +// CHECK: v_lshlrev_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i64 i64 vgpr x: i32 vgpr, y: i64 vgpr, "v_lshlrev_b64"); + +// CHECK-LABEL: sgpr_f64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(sgpr_f64 f64 sgpr "s_mov_b64"); + +// CHECK-LABEL: vgpr_f64: +// CHECK: #ASMSTART +// CHECK: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f64 f64 vgpr x: f64 vgpr, y: f64 vgpr, "v_add_f64"); + +// CHECK-LABEL: sgpr_i128: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i128 i128 sgpr x: ptr sgpr, y: i32 sgpr, "s_load_b128"); + +// CHECK-LABEL: vgpr_i128: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i128 i128 vgpr x: i32 vgpr, y: ptr sgpr, "global_load_b128"); + +// CHECK-LABEL: s0_i16: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check_reg!(s0_i16 i32 "s0" x: i16 "s1", y: i16 "s2", "s_pack_ll_b32_b16"); + +// gfx11-LABEL: v0_i16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_i16 i16 "v0.l" "v_mov_b16"); + +// gfx12-LABEL: s0_f16: +// gfx12: #ASMSTART +// gfx12: s_add_f16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(s0_f16 f16 "s0" x: f16 "s1", y: f16 "s2", "s_add_f16"); + +// gfx11-LABEL: v0_f16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_f16 f16 "v0.l" "v_mov_b16"); + +// CHECK-LABEL: s0_i32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i32 i32 "s0" "s_mov_b32"); + +// CHECK-LABEL: v0_i32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(v0_i32 i32 "v0" "v_mov_b32"); + +// CHECK-LABEL: s0_f32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f32 f32 "s0" "s_mov_b32"); + +// CHECK-LABEL: v0_f32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(v0_f32 f32 "v0" "v_mov_b32"); + +// CHECK-LABEL: s0_i64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(s0_i64 i64 "s[0:1]" "s_mov_b64"); + +// CHECK-LABEL: v0_i64: +// CHECK: #ASMSTART +// CHECK: v_lshlrev_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i64 i64 "v[0:1]" x: i32 "v0", y: i64 "v[0:1]", "v_lshlrev_b64"); + +// CHECK-LABEL: s0_f64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(s0_f64 f64 "s[0:1]" "s_mov_b64"); + +// CHECK-LABEL: v0_f64: +// CHECK: #ASMSTART +// CHECK: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f64 f64 "v[0:1]" x: f64 "v[0:1]", y: f64 "v[2:3]", "v_add_f64"); + +// CHECK-LABEL: s0_i128: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i128 i128 "s[0:3]" x: ptr "s[0:1]", y: i32 "s0", "s_load_b128"); + +// CHECK-LABEL: v0_i128: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i128 i128 "v[0:3]" x: i32 "v0", y: ptr "s[0:1]", "global_load_b128");