Skip to content

Commit 4d2911b

Browse files
TDeckingAmanieu
authored andcommitted
Rework SIMD zeroing
1 parent 91c0dab commit 4d2911b

File tree

20 files changed

+1468
-2906
lines changed

20 files changed

+1468
-2906
lines changed

library/stdarch/crates/core_arch/src/simd.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ macro_rules! simd_ty {
1010

1111
#[allow(clippy::use_self)]
1212
impl $id {
13+
/// A value of this type where all elements are zeroed out.
14+
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
15+
1316
#[inline(always)]
1417
pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
1518
$id([$($param_name),*])

library/stdarch/crates/core_arch/src/wasm32/simd128.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,7 +2232,7 @@ pub fn v128_any_true(a: v128) -> bool {
22322232
pub fn i8x16_abs(a: v128) -> v128 {
22332233
unsafe {
22342234
let a = a.as_i8x16();
2235-
let zero = simd::i8x16::splat(0);
2235+
let zero = simd::i8x16::ZERO;
22362236
simd_select::<simd::m8x16, simd::i8x16>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
22372237
}
22382238
}
@@ -2524,7 +2524,7 @@ pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16;
25242524
#[stable(feature = "wasm_simd", since = "1.54.0")]
25252525
pub fn i16x8_abs(a: v128) -> v128 {
25262526
let a = a.as_i16x8();
2527-
let zero = simd::i16x8::splat(0);
2527+
let zero = simd::i16x8::ZERO;
25282528
unsafe {
25292529
simd_select::<simd::m16x8, simd::i16x8>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
25302530
}
@@ -3012,7 +3012,7 @@ pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8;
30123012
#[stable(feature = "wasm_simd", since = "1.54.0")]
30133013
pub fn i32x4_abs(a: v128) -> v128 {
30143014
let a = a.as_i32x4();
3015-
let zero = simd::i32x4::splat(0);
3015+
let zero = simd::i32x4::ZERO;
30163016
unsafe {
30173017
simd_select::<simd::m32x4, simd::i32x4>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
30183018
}
@@ -3394,7 +3394,7 @@ pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8;
33943394
#[stable(feature = "wasm_simd", since = "1.54.0")]
33953395
pub fn i64x2_abs(a: v128) -> v128 {
33963396
let a = a.as_i64x2();
3397-
let zero = simd::i64x2::splat(0);
3397+
let zero = simd::i64x2::ZERO;
33983398
unsafe {
33993399
simd_select::<simd::m64x2, simd::i64x2>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
34003400
}
@@ -4105,7 +4105,7 @@ pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
41054105
let ret: simd::i32x4 = unsafe {
41064106
simd_shuffle!(
41074107
llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),
4108-
simd::i32x2::splat(0),
4108+
simd::i32x2::ZERO,
41094109
[0, 1, 2, 3],
41104110
)
41114111
};
@@ -4129,7 +4129,7 @@ pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
41294129
let ret: simd::i32x4 = unsafe {
41304130
simd_shuffle!(
41314131
llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),
4132-
simd::i32x2::splat(0),
4132+
simd::i32x2::ZERO,
41334133
[0, 1, 2, 3],
41344134
)
41354135
};
@@ -4176,7 +4176,7 @@ pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 {
41764176
unsafe {
41774177
simd_cast::<simd::f64x4, simd::f32x4>(simd_shuffle!(
41784178
a.as_f64x2(),
4179-
simd::f64x2::splat(0.0),
4179+
simd::f64x2::ZERO,
41804180
[0, 1, 2, 3]
41814181
))
41824182
.v128()

library/stdarch/crates/core_arch/src/x86/avx.rs

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
515515
#[cfg_attr(test, assert_instr(vblendvpd))]
516516
#[stable(feature = "simd_x86", since = "1.27.0")]
517517
pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
518-
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::splat(0));
518+
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
519519
transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
520520
}
521521

@@ -528,7 +528,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
528528
#[cfg_attr(test, assert_instr(vblendvps))]
529529
#[stable(feature = "simd_x86", since = "1.27.0")]
530530
pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
531-
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0));
531+
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
532532
transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
533533
}
534534

@@ -983,11 +983,7 @@ pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
983983
#[stable(feature = "simd_x86", since = "1.27.0")]
984984
pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
985985
static_assert_uimm_bits!(IMM1, 1);
986-
let dst: i64x2 = simd_shuffle!(
987-
a.as_i64x4(),
988-
_mm256_undefined_si256().as_i64x4(),
989-
[[0, 1], [2, 3]][IMM1 as usize],
990-
);
986+
let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
991987
transmute(dst)
992988
}
993989

@@ -2139,7 +2135,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
21392135
pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
21402136
// Propagate the highest bit to the rest, because simd_bitmask
21412137
// requires all-1 or all-0.
2142-
let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0));
2138+
let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
21432139
simd_bitmask::<i64x4, u8>(mask).into()
21442140
}
21452141

@@ -2155,7 +2151,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
21552151
pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
21562152
// Propagate the highest bit to the rest, because simd_bitmask
21572153
// requires all-1 or all-0.
2158-
let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0));
2154+
let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
21592155
simd_bitmask::<i32x8, u8>(mask).into()
21602156
}
21612157

@@ -2167,7 +2163,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
21672163
#[cfg_attr(test, assert_instr(vxorp))]
21682164
#[stable(feature = "simd_x86", since = "1.27.0")]
21692165
pub unsafe fn _mm256_setzero_pd() -> __m256d {
2170-
_mm256_set1_pd(0.0)
2166+
const { mem::zeroed() }
21712167
}
21722168

21732169
/// Returns vector of type __m256 with all elements set to zero.
@@ -2178,7 +2174,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d {
21782174
#[cfg_attr(test, assert_instr(vxorps))]
21792175
#[stable(feature = "simd_x86", since = "1.27.0")]
21802176
pub unsafe fn _mm256_setzero_ps() -> __m256 {
2181-
_mm256_set1_ps(0.0)
2177+
const { mem::zeroed() }
21822178
}
21832179

21842180
/// Returns vector of type __m256i with all elements set to zero.
@@ -2189,7 +2185,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 {
21892185
#[cfg_attr(test, assert_instr(vxor))]
21902186
#[stable(feature = "simd_x86", since = "1.27.0")]
21912187
pub unsafe fn _mm256_setzero_si256() -> __m256i {
2192-
_mm256_set1_epi8(0)
2188+
const { mem::zeroed() }
21932189
}
21942190

21952191
/// Sets packed double-precision (64-bit) floating-point elements in returned
@@ -2722,7 +2718,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
27222718
#[stable(feature = "simd_x86", since = "1.27.0")]
27232719
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
27242720
let a = a.as_i64x2();
2725-
let undefined = _mm_undefined_si128().as_i64x2();
2721+
let undefined = i64x2::ZERO;
27262722
let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
27272723
transmute(dst)
27282724
}
@@ -2752,7 +2748,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
27522748
// instructions, thus it has zero latency.
27532749
#[stable(feature = "simd_x86", since = "1.27.0")]
27542750
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2755-
let b = _mm_setzero_si128().as_i64x2();
2751+
let b = i64x2::ZERO;
27562752
let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
27572753
transmute(dst)
27582754
}
@@ -2782,7 +2778,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
27822778
// This intrinsic has no corresponding instruction.
27832779
#[stable(feature = "simd_x86", since = "1.27.0")]
27842780
pub unsafe fn _mm256_undefined_ps() -> __m256 {
2785-
_mm256_set1_ps(0.0)
2781+
const { mem::zeroed() }
27862782
}
27872783

27882784
/// Returns vector of type `__m256d` with indeterminate elements.
@@ -2795,7 +2791,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
27952791
// This intrinsic has no corresponding instruction.
27962792
#[stable(feature = "simd_x86", since = "1.27.0")]
27972793
pub unsafe fn _mm256_undefined_pd() -> __m256d {
2798-
_mm256_set1_pd(0.0)
2794+
const { mem::zeroed() }
27992795
}
28002796

28012797
/// Returns vector of type __m256i with with indeterminate elements.
@@ -2808,7 +2804,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d {
28082804
// This intrinsic has no corresponding instruction.
28092805
#[stable(feature = "simd_x86", since = "1.27.0")]
28102806
pub unsafe fn _mm256_undefined_si256() -> __m256i {
2811-
__m256i([0, 0, 0, 0])
2807+
const { mem::zeroed() }
28122808
}
28132809

28142810
/// Sets packed __m256 returned vector with the supplied values.

0 commit comments

Comments
 (0)