diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index e035f0809d685..0ac068787ff2a 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -387,6 +387,27 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { let pair = self.insert_value(pair, high, 1); pair } + + // FIXME move into the branch below when LLVM 22 is the lowest version we support. + sym::carryless_mul if crate::llvm_util::get_version() >= (22, 0, 0) => { + let ty = args[0].layout.ty; + if !ty.is_integral() { + tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType { + span, + name, + ty, + }); + return Ok(()); + } + let (size, _) = ty.int_size_and_signed(self.tcx); + let width = size.bits(); + let llty = self.type_ix(width); + + let lhs = args[0].immediate(); + let rhs = args[1].immediate(); + self.call_intrinsic("llvm.clmul", &[llty], &[lhs, rhs]) + } + sym::ctlz | sym::ctlz_nonzero | sym::cttz @@ -2763,6 +2784,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( | sym::simd_ctlz | sym::simd_ctpop | sym::simd_cttz + | sym::simd_carryless_mul | sym::simd_funnel_shl | sym::simd_funnel_shr ) { @@ -2787,6 +2809,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( sym::simd_cttz => "llvm.cttz", sym::simd_funnel_shl => "llvm.fshl", sym::simd_funnel_shr => "llvm.fshr", + sym::simd_carryless_mul => "llvm.clmul", _ => unreachable!(), }; let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits(); @@ -2812,6 +2835,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>( &[vec_ty], &[args[0].immediate(), args[1].immediate(), args[2].immediate()], )), + sym::simd_carryless_mul => { + if crate::llvm_util::get_version() >= (22, 0, 0) { + Ok(bx.call_intrinsic( + llvm_intrinsic, + &[vec_ty], + &[args[0].immediate(), args[1].immediate()], + )) + } else { + span_bug!(span, "`simd_carryless_mul` needs LLVM 22 or higher"); + } + } _ => unreachable!(), }; } diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index bf3ec1f393302..eea4dfc08b7c7 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -345,7 +345,14 @@ impl CodegenBackend for LlvmCodegenBackend { } fn replaced_intrinsics(&self) -> Vec { - vec![sym::unchecked_funnel_shl, sym::unchecked_funnel_shr, sym::carrying_mul_add] + let mut will_not_use_fallback = + vec![sym::unchecked_funnel_shl, sym::unchecked_funnel_shr, sym::carrying_mul_add]; + + if llvm_util::get_version() >= (22, 0, 0) { + will_not_use_fallback.push(sym::carryless_mul); + } + + will_not_use_fallback } fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Box { diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index e526f6120689a..09922d401657d 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -733,6 +733,33 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { sym::fmuladdf128 => { self.float_muladd_intrinsic::(args, dest, MulAddType::Nondeterministic)? } + sym::carryless_mul => { + let size = dest.layout.size; + + let left = self.read_scalar(&args[0])?.to_bits(size)?; + let right = self.read_scalar(&args[1])?.to_bits(size)?; + + // perform carry-less multiplication. + // + // this operation is like long multiplication, but ignores the carries. + // that idea corresponds to the xor operator, which is used in the implementation. + // + // wikipedia has an example https://en.wikipedia.org/wiki/carry-less_product#example + let mut result: u128 = 0; + + for i in 0..size.bits() { + // if the i-th bit in right is set + if (right >> i) & 1 != 0 { + // xor result with `left` shifted to the left by i positions + result ^= left << i; + } + } + + // Only return the lower bits. + result &= u128::MAX >> (128 - size.bits()); + + self.write_scalar(Scalar::from_uint(result, dest.layout.size), dest)?; + } // Unsupported intrinsic: skip the return_to_block below. _ => return interp_ok(false), diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 22ee490b81a7b..6946d1a70040d 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -82,6 +82,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::bswap | sym::caller_location | sym::carrying_mul_add + | sym::carryless_mul | sym::ceilf16 | sym::ceilf32 | sym::ceilf64 @@ -564,6 +565,7 @@ pub(crate) fn check_intrinsic_type( (1, 0, vec![param(0), param(0)], param(0)) } sym::saturating_add | sym::saturating_sub => (1, 0, vec![param(0), param(0)], param(0)), + sym::carryless_mul => (1, 0, vec![param(0), param(0)], param(0)), sym::fadd_fast | sym::fsub_fast | sym::fmul_fast | sym::fdiv_fast | sym::frem_fast => { (1, 0, vec![param(0), param(0)], param(0)) } @@ -711,7 +713,8 @@ pub(crate) fn check_intrinsic_type( | sym::simd_fmin | sym::simd_fmax | sym::simd_saturating_add - | sym::simd_saturating_sub => (1, 0, vec![param(0), param(0)], param(0)), + | sym::simd_saturating_sub + | sym::simd_carryless_mul => (1, 0, vec![param(0), param(0)], param(0)), sym::simd_arith_offset => (2, 0, vec![param(0), param(1)], param(0)), sym::simd_neg | sym::simd_bswap diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index aac4cf1de8c2b..4ffe813440b82 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -642,6 +642,7 @@ symbols! { caller_location, capture_disjoint_fields, carrying_mul_add, + carryless_mul, catch_unwind, cause, cdylib, @@ -2083,6 +2084,7 @@ symbols! { simd_bitmask, simd_bitreverse, simd_bswap, + simd_carryless_mul, simd_cast, simd_cast_ptr, simd_ceil, diff --git a/library/core/src/intrinsics/fallback.rs b/library/core/src/intrinsics/fallback.rs index 932537f2581f8..3244ccb8989be 100644 --- a/library/core/src/intrinsics/fallback.rs +++ b/library/core/src/intrinsics/fallback.rs @@ -218,3 +218,101 @@ macro_rules! impl_funnel_shifts { impl_funnel_shifts! { u8, u16, u32, u64, u128, usize } + +#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")] +pub const trait CarrylessMul: Copy + 'static { + /// See [`super::carryless_mul`]; we just need the trait indirection to handle + /// different types since calling intrinsics with generics doesn't work. + fn carryless_mul(self, rhs: Self) -> Self; +} + +macro_rules! impl_carryless_mul{ + ($($type:ident),*) => {$( + /// This approach uses a bitmask of the form `0b100010001...0001` to avoid carry spilling. + /// When carries do occur, they wind up in a "hole" of zeros and are subsequently masked + /// out of the result. + #[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")] + impl const CarrylessMul for $type { + #[inline] + fn carryless_mul(self, rhs: Self) -> Self { + use crate::num::Wrapping; + + // i.e. 0b100010001...0001 in binary. + const MASK: u64 = 0x1111_1111_1111_1111u64; + + const M0: $type = MASK as $type; + const M1: $type = M0 << 1; + const M2: $type = M1 << 1; + const M3: $type = M2 << 1; + + let x = self; + let y = rhs; + + let x0 = Wrapping(x & M0); + let x1 = Wrapping(x & M1); + let x2 = Wrapping(x & M2); + let x3 = Wrapping(x & M3); + + let y0 = Wrapping(y & M0); + let y1 = Wrapping(y & M1); + let y2 = Wrapping(y & M2); + let y3 = Wrapping(y & M3); + + let z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); + let z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); + let z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); + let z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); + + (z0.0 & M0) | (z1.0 & M1) | (z2.0 & M2) | (z3.0 & M3) + } + } + )*}; +} + +impl_carryless_mul! { + u8, u16, u32, u64, usize +} + +#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")] +impl const CarrylessMul for u128 { + #[inline] + fn carryless_mul(self, rhs: Self) -> Self { + let l = u64::carryless_mul(self as u64, rhs as u64); + let lh = u64::carryless_mul(self as u64, (rhs >> 64) as u64); + let hl = u64::carryless_mul((self >> 64) as u64, rhs as u64); + let h = lh ^ hl ^ carryless_mul_high(self as u64, rhs as u64); + ((h as u128) << 64) | l as u128 + } +} + +#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")] +#[inline] +const fn carryless_mul_high(x: u64, y: u64) -> u64 { + // i.e. 0b100010001...0001 in binary. + const MASK: u64 = 0x1111_1111_1111_1111u64; + + const M0: u64 = MASK; + const M1: u64 = M0 << 1; + const M2: u64 = M1 << 1; + const M3: u64 = M2 << 1; + + macro_rules! mul { + ($x_mask_shift:literal, $y_mask_shift:literal) => {{ + let x = x & (MASK << $x_mask_shift); + let y = y & (MASK << $y_mask_shift); + crate::hint::select_unpredictable( + x == MASK << $x_mask_shift && y == MASK << $y_mask_shift, + // only case where the multiply overflows the 4-bit parts + 0x0101_0101_0101_0101u64 << ($x_mask_shift + $y_mask_shift), + x.carrying_mul(y, 0).1, + ) + }}; + } + + let z0 = mul!(0, 0) ^ mul!(1, 3) ^ mul!(2, 2) ^ mul!(3, 1); + let z1 = mul!(0, 1) ^ mul!(1, 0) ^ mul!(2, 3) ^ mul!(3, 2); + let z2 = mul!(0, 2) ^ mul!(1, 1) ^ mul!(2, 0) ^ mul!(3, 3); + let z3 = mul!(0, 3) ^ mul!(1, 2) ^ mul!(2, 1) ^ mul!(3, 0); + + (z0 & M0) | (z1 & M1) | (z2 & M2) | (z3 & M3) +} diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 051dda731881f..7c6dbfdb7ab70 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -2179,6 +2179,20 @@ pub const unsafe fn unchecked_funnel_shr( unsafe { a.unchecked_funnel_shr(b, shift) } } +/// Carryless multiply. +/// +/// Safe versions of this intrinsic are available on the integer primitives +/// via the `carryless_mul` method. For example, [`u32::carryless_mul`]. +#[rustc_intrinsic] +#[rustc_nounwind] +#[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")] +#[unstable(feature = "uint_carryless_mul", issue = "152080")] +pub const fn carryless_mul(a: T, b: T) -> T { + // NOTE: while this implementation could serve as the specification, rustc_const_eval + // actually implements a simpler but less efficient variant as the specification. + a.carryless_mul(b) +} + /// This is an implementation detail of [`crate::ptr::read`] and should /// not be used anywhere else. See its comments for why this exists. /// diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs index f70262c38ae50..5fb2102c319e2 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd.rs @@ -162,6 +162,18 @@ pub const unsafe fn simd_funnel_shl(a: T, b: T, shift: T) -> T; #[rustc_nounwind] pub const unsafe fn simd_funnel_shr(a: T, b: T, shift: T) -> T; +/// Compute the carry-less product. +/// +/// This is similar to long multiplication except that the carry is discarded. +/// +/// This operation can be used to model multiplication in `GF(2)[X]`, the polynomial +/// ring over `GF(2)`. +/// +/// `T` must be a vector of integers. +#[rustc_intrinsic] +#[rustc_nounwind] +pub unsafe fn simd_carryless_mul(a: T, b: T) -> T; + /// "And"s vectors elementwise. /// /// `T` must be a vector of integers. diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 432ca50b33613..c1568b58b5fd3 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -188,6 +188,7 @@ #![feature(trait_alias)] #![feature(transparent_unions)] #![feature(try_blocks)] +#![feature(uint_carryless_mul)] #![feature(unboxed_closures)] #![feature(unsized_fn_params)] #![feature(with_negative_coherence)] diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 558426c94e5dc..839a6fbdc9b7e 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -244,6 +244,104 @@ macro_rules! midpoint_impl { }; } +macro_rules! widening_carryless_mul_impl { + ($SelfT:ty, $WideT:ty) => { + /// Performs a widening carry-less multiplication. + /// + /// # Examples + /// + /// ``` + /// #![feature(uint_carryless_mul)] + /// + #[doc = concat!("assert_eq!(", stringify!($SelfT), "::MAX.widening_carryless_mul(", + stringify!($SelfT), "::MAX), ", stringify!($WideT), "::MAX / 3);")] + /// ``` + #[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")] + #[doc(alias = "clmul")] + #[unstable(feature = "uint_carryless_mul", issue = "152080")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub const fn widening_carryless_mul(self, rhs: $SelfT) -> $WideT { + (self as $WideT).carryless_mul(rhs as $WideT) + } + } +} + +macro_rules! carrying_carryless_mul_impl { + (u128, u256) => { + carrying_carryless_mul_impl! { @internal u128 => + pub const fn carrying_carryless_mul(self, rhs: Self, carry: Self) -> (Self, Self) { + let x0 = self as u64; + let x1 = (self >> 64) as u64; + let y0 = rhs as u64; + let y1 = (rhs >> 64) as u64; + + let z0 = u64::widening_carryless_mul(x0, y0); + let z2 = u64::widening_carryless_mul(x1, y1); + + // The grade school algorithm would compute: + // z1 = x0y1 ^ x1y0 + + // Instead, Karatsuba first computes: + let z3 = u64::widening_carryless_mul(x0 ^ x1, y0 ^ y1); + // Since it distributes over XOR, + // z3 == x0y0 ^ x0y1 ^ x1y0 ^ x1y1 + // |--| |---------| |--| + // == z0 ^ z1 ^ z2 + // so we can compute z1 as + let z1 = z3 ^ z0 ^ z2; + + let lo = z0 ^ (z1 << 64); + let hi = z2 ^ (z1 >> 64); + + (lo ^ carry, hi) + } + } + }; + ($SelfT:ty, $WideT:ty) => { + carrying_carryless_mul_impl! { @internal $SelfT => + pub const fn carrying_carryless_mul(self, rhs: Self, carry: Self) -> (Self, Self) { + // Can't use widening_carryless_mul because it's not implemented for usize. + let p = (self as $WideT).carryless_mul(rhs as $WideT); + + let lo = (p as $SelfT); + let hi = (p >> Self::BITS) as $SelfT; + + (lo ^ carry, hi) + } + } + }; + (@internal $SelfT:ty => $($fn:tt)*) => { + /// Calculates the "full carryless multiplication" without the possibility to overflow. + /// + /// This returns the low-order (wrapping) bits and the high-order (overflow) bits + /// of the result as two separate values, in that order. + /// + /// # Examples + /// + /// Please note that this example is shared among integer types, which is why `u8` is used. + /// + /// ``` + /// #![feature(uint_carryless_mul)] + /// + /// assert_eq!(0b1000_0000u8.carrying_carryless_mul(0b1000_0000, 0b0000), (0, 0b0100_0000)); + /// assert_eq!(0b1000_0000u8.carrying_carryless_mul(0b1000_0000, 0b1111), (0b1111, 0b0100_0000)); + #[doc = concat!("assert_eq!(", + stringify!($SelfT), "::MAX.carrying_carryless_mul(", stringify!($SelfT), "::MAX, ", stringify!($SelfT), "::MAX), ", + "(!(", stringify!($SelfT), "::MAX / 3), ", stringify!($SelfT), "::MAX / 3));" + )] + /// ``` + #[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")] + #[doc(alias = "clmul")] + #[unstable(feature = "uint_carryless_mul", issue = "152080")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + $($fn)* + } +} + impl i8 { int_impl! { Self = i8, @@ -458,6 +556,9 @@ impl u8 { fsh_op = "0x36", fshl_result = "0x8", fshr_result = "0x8d", + clmul_lhs = "0x12", + clmul_rhs = "0x34", + clmul_result = "0x28", swap_op = "0x12", swapped = "0x12", reversed = "0x48", @@ -468,6 +569,8 @@ impl u8 { bound_condition = "", } midpoint_impl! { u8, u16, unsigned } + widening_carryless_mul_impl! { u8, u16 } + carrying_carryless_mul_impl! { u8, u16 } /// Checks if the value is within the ASCII range. /// @@ -1095,6 +1198,9 @@ impl u16 { fsh_op = "0x2de", fshl_result = "0x30", fshr_result = "0x302d", + clmul_lhs = "0x9012", + clmul_rhs = "0xcd34", + clmul_result = "0x928", swap_op = "0x1234", swapped = "0x3412", reversed = "0x2c48", @@ -1105,6 +1211,8 @@ impl u16 { bound_condition = "", } midpoint_impl! { u16, u32, unsigned } + widening_carryless_mul_impl! { u16, u32 } + carrying_carryless_mul_impl! { u16, u32 } /// Checks if the value is a Unicode surrogate code point, which are disallowed values for [`char`]. /// @@ -1145,6 +1253,9 @@ impl u32 { fsh_op = "0x2fe78e45", fshl_result = "0xb32f", fshr_result = "0xb32fe78e", + clmul_lhs = "0x56789012", + clmul_rhs = "0xf52ecd34", + clmul_result = "0x9b980928", swap_op = "0x12345678", swapped = "0x78563412", reversed = "0x1e6a2c48", @@ -1155,6 +1266,8 @@ impl u32 { bound_condition = "", } midpoint_impl! { u32, u64, unsigned } + widening_carryless_mul_impl! { u32, u64 } + carrying_carryless_mul_impl! { u32, u64 } } impl u64 { @@ -1171,6 +1284,9 @@ impl u64 { fsh_op = "0x2fe78e45983acd98", fshl_result = "0x6e12fe", fshr_result = "0x6e12fe78e45983ac", + clmul_lhs = "0x7890123456789012", + clmul_rhs = "0xdd358416f52ecd34", + clmul_result = "0xa6299579b980928", swap_op = "0x1234567890123456", swapped = "0x5634129078563412", reversed = "0x6a2c48091e6a2c48", @@ -1181,6 +1297,8 @@ impl u64 { bound_condition = "", } midpoint_impl! { u64, u128, unsigned } + widening_carryless_mul_impl! { u64, u128 } + carrying_carryless_mul_impl! { u64, u128 } } impl u128 { @@ -1197,6 +1315,9 @@ impl u128 { fsh_op = "0x2fe78e45983acd98039000008736273", fshl_result = "0x4f7602fe", fshr_result = "0x4f7602fe78e45983acd9803900000873", + clmul_lhs = "0x12345678901234567890123456789012", + clmul_rhs = "0x4317e40ab4ddcf05dd358416f52ecd34", + clmul_result = "0xb9cf660de35d0c170a6299579b980928", swap_op = "0x12345678901234567890123456789012", swapped = "0x12907856341290785634129078563412", reversed = "0x48091e6a2c48091e6a2c48091e6a2c48", @@ -1209,6 +1330,7 @@ impl u128 { bound_condition = "", } midpoint_impl! { u128, unsigned } + carrying_carryless_mul_impl! { u128, u256 } } #[cfg(target_pointer_width = "16")] @@ -1223,9 +1345,12 @@ impl usize { rot = 4, rot_op = "0xa003", rot_result = "0x3a", - fsh_op = "0x2fe78e45983acd98039000008736273", - fshl_result = "0x4f7602fe", - fshr_result = "0x4f7602fe78e45983acd9803900000873", + fsh_op = "0x2de", + fshl_result = "0x30", + fshr_result = "0x302d", + clmul_lhs = "0x9012", + clmul_rhs = "0xcd34", + clmul_result = "0x928", swap_op = "0x1234", swapped = "0x3412", reversed = "0x2c48", @@ -1236,6 +1361,7 @@ impl usize { bound_condition = " on 16-bit targets", } midpoint_impl! { usize, u32, unsigned } + carrying_carryless_mul_impl! { usize, u32 } } #[cfg(target_pointer_width = "32")] @@ -1253,6 +1379,9 @@ impl usize { fsh_op = "0x2fe78e45", fshl_result = "0xb32f", fshr_result = "0xb32fe78e", + clmul_lhs = "0x56789012", + clmul_rhs = "0xf52ecd34", + clmul_result = "0x9b980928", swap_op = "0x12345678", swapped = "0x78563412", reversed = "0x1e6a2c48", @@ -1263,6 +1392,7 @@ impl usize { bound_condition = " on 32-bit targets", } midpoint_impl! { usize, u64, unsigned } + carrying_carryless_mul_impl! { usize, u64 } } #[cfg(target_pointer_width = "64")] @@ -1280,6 +1410,9 @@ impl usize { fsh_op = "0x2fe78e45983acd98", fshl_result = "0x6e12fe", fshr_result = "0x6e12fe78e45983ac", + clmul_lhs = "0x7890123456789012", + clmul_rhs = "0xdd358416f52ecd34", + clmul_result = "0xa6299579b980928", swap_op = "0x1234567890123456", swapped = "0x5634129078563412", reversed = "0x6a2c48091e6a2c48", @@ -1290,6 +1423,7 @@ impl usize { bound_condition = " on 64-bit targets", } midpoint_impl! { usize, u128, unsigned } + carrying_carryless_mul_impl! { usize, u128 } } impl usize { diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 5c263ea845cc2..62c0e3e317da4 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -17,6 +17,9 @@ macro_rules! uint_impl { fsh_op = $fsh_op:literal, fshl_result = $fshl_result:literal, fshr_result = $fshr_result:literal, + clmul_lhs = $clmul_rhs:literal, + clmul_rhs = $clmul_lhs:literal, + clmul_result = $clmul_result:literal, swap_op = $swap_op:literal, swapped = $swapped:literal, reversed = $reversed:literal, @@ -482,6 +485,62 @@ macro_rules! uint_impl { unsafe { intrinsics::unchecked_funnel_shr(self, rhs, n) } } + /// Performs a carry-less multiplication, returning the lower bits. + /// + /// This operation is similar to long multiplication, except that exclusive or is used + /// instead of addition. The implementation is equivalent to: + /// + /// ```no_run + #[doc = concat!("pub fn carryless_mul(lhs: ", stringify!($SelfT), ", rhs: ", stringify!($SelfT), ") -> ", stringify!($SelfT), "{")] + /// let mut retval = 0; + #[doc = concat!(" for i in 0..", stringify!($SelfT), "::BITS {")] + /// if (rhs >> i) & 1 != 0 { + /// // long multiplication would use += + /// retval ^= lhs << i; + /// } + /// } + /// retval + /// } + /// ``` + /// + /// The actual implementation is more efficient, and on some platforms lowers directly to a + /// dedicated instruction. + /// + /// # Uses + /// + /// Carryless multiplication can be used to turn a bitmask of quote characters into a + /// bit mask of characters surrounded by quotes: + /// + /// ```no_run + /// r#"abc xxx "foobar" zzz "a"!"#; // input string + /// 0b0000000010000001000001010; // quote_mask + /// 0b0000000001111110000000100; // quote_mask.carryless_mul(!0) & !quote_mask + /// ``` + /// + /// Another use is in cryptography, where carryless multiplication allows for efficient + /// implementations of polynomial multiplication in `GF(2)[X]`, the polynomial ring + /// over `GF(2)`. + /// + /// # Examples + /// + /// ``` + /// #![feature(uint_carryless_mul)] + /// + #[doc = concat!("let a = ", $clmul_lhs, stringify!($SelfT), ";")] + #[doc = concat!("let b = ", $clmul_rhs, stringify!($SelfT), ";")] + /// + #[doc = concat!("assert_eq!(a.carryless_mul(b), ", $clmul_result, ");")] + /// ``` + #[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")] + #[doc(alias = "clmul")] + #[unstable(feature = "uint_carryless_mul", issue = "152080")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline(always)] + pub const fn carryless_mul(self, rhs: Self) -> Self { + intrinsics::carryless_mul(self, rhs) + } + /// Reverses the byte order of the integer. /// /// # Examples diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs index d085e4ad1a8fe..f3b36ef9092b7 100644 --- a/library/coretests/tests/lib.rs +++ b/library/coretests/tests/lib.rs @@ -120,6 +120,7 @@ #![feature(try_trait_v2)] #![feature(type_info)] #![feature(uint_bit_width)] +#![feature(uint_carryless_mul)] #![feature(uint_gather_scatter_bits)] #![feature(unsize)] #![feature(unwrap_infallible)] diff --git a/library/coretests/tests/num/uint_macros.rs b/library/coretests/tests/num/uint_macros.rs index 7c4fb22599c03..240c66fd5c715 100644 --- a/library/coretests/tests/num/uint_macros.rs +++ b/library/coretests/tests/num/uint_macros.rs @@ -117,6 +117,13 @@ macro_rules! uint_module { assert_eq_const_safe!($T: <$T>::funnel_shr(_1, _1, 4), <$T>::rotate_right(_1, 4)); } + fn test_carryless_mul() { + assert_eq_const_safe!($T: <$T>::carryless_mul(0, 0), 0); + assert_eq_const_safe!($T: <$T>::carryless_mul(1, 1), 1); + + assert_eq_const_safe!($T: <$T>::carryless_mul(0b0100, 2), 0b1000); + } + fn test_swap_bytes() { assert_eq_const_safe!($T: A.swap_bytes().swap_bytes(), A); assert_eq_const_safe!($T: B.swap_bytes().swap_bytes(), B); diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index dcde208fac77b..12fa3efa84803 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -315,6 +315,7 @@ #![feature(try_blocks)] #![feature(try_trait_v2)] #![feature(type_alias_impl_trait)] +#![feature(uint_carryless_mul)] // tidy-alphabetical-end // // Library features (core):