From 6efd06e975e7dea1f5b7e1f1db5aba51237feaca Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 10 Mar 2026 17:42:37 +0100 Subject: [PATCH 1/3] s390x: use llvm.s390 intrinsics instead of simd_fmin/fmax --- crates/core_arch/src/s390x/vector.rs | 37 +++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 346cd674df..2f31eb48f8 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -335,6 +335,11 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short; #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short; #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short; + + #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float; + #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double; + #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float; + #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double; } #[repr(simd)] @@ -780,8 +785,20 @@ mod sealed { impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg); } - test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] } - test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vfmaxsb_m0(a: vector_float, b: vector_float) -> vector_float { + // clang uses mode 0 for `vec_max`, so we do the same. + vfmaxsb(a, b, const { 0 }) + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vfmaxdb_m0(a: vector_double, b: vector_double) -> vector_double { + vfmaxdb(a, b, const { 0 }) + } + + test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [vfmaxsb_m0, "vector-enhancements-1" vfmaxsb ] } + test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [vfmaxdb_m0, "vector-enhancements-1" vfmaxdb] } impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float); impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double); @@ -827,8 +844,20 @@ mod sealed { impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg); } - test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb] } - test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb] } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vfminsb_m0(a: vector_float, b: vector_float) -> vector_float { + // clang uses mode 0 for `vec_min`, so we do the same. + vfminsb(a, b, const { 0 }) + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vfmindb_m0(a: vector_double, b: vector_double) -> vector_double { + vfmindb(a, b, const { 0 }) + } + + test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [vfminsb_m0, "vector-enhancements-1" vfminsb] } + test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [vfmindb_m0, "vector-enhancements-1" vfmindb] } impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float); impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double); From bfc3662a29ace7a6e18d2705ad7fb138a1976473 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 10 Mar 2026 23:26:51 +0100 Subject: [PATCH 2/3] add f32 min/max tests --- crates/core_arch/src/s390x/vector.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 2f31eb48f8..33921eca5f 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -7506,6 +7506,19 @@ mod tests { [0, !0, !0, !0] } + // f32 is the tricky case for max/min as that needs a fallback on z13 + test_vec_2! { test_vec_max, vec_max, f32x4, f32x4 -> f32x4, + [1.0, f32::NAN, f32::INFINITY, 2.0], + [-10.0, -10.0, 5.0, f32::NAN], + [1.0, -10.0, f32::INFINITY, 2.0] + } + + test_vec_2! { test_vec_min, vec_min, f32x4, f32x4 -> f32x4, + [1.0, f32::NAN, f32::INFINITY, 2.0], + [-10.0, -10.0, 5.0, f32::NAN], + [-10.0, -10.0, 5.0, 2.0] + } + #[simd_test(enable = "vector")] fn test_vec_meadd() { let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]); From 454668b30b8acb355cf4052de711092f7cfb4c8c Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 11 Mar 2026 08:52:03 +0100 Subject: [PATCH 3/3] go back to portable LLVM intrinsic to avoid fallback trouble --- crates/core_arch/src/s390x/vector.rs | 49 ++++++++++------------------ 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 33921eca5f..31b9dc5eac 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -336,10 +336,19 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short; #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short; - #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float; - #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double; - #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float; - #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double; + // These are the intrinsics we'd like to use (with mode 0). However, they require + // "vector-enhancements-1" and don't have a fallback, whereas `vec_min`/`vec_max` should be + // available with just "vector". Therefore, we cannot use them. + // #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float; + // #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double; + // #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float; + // #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double; + // Instead, we use "portable" LLVM intrinsics -- even though those have the wrong semantics + // (https://github.com/rust-lang/stdarch/issues/2060), they usually do the right thing. + #[link_name = "llvm.minnum.v4f32"] fn minnum_v4f32(a: vector_float, b: vector_float) -> vector_float; + #[link_name = "llvm.minnum.v2f64"] fn minnum_v2f64(a: vector_double, b: vector_double) -> vector_double; + #[link_name = "llvm.maxnum.v4f32"] fn maxnum_v4f32(a: vector_float, b: vector_float) -> vector_float; + #[link_name = "llvm.maxnum.v2f64"] fn maxnum_v2f64(a: vector_double, b: vector_double) -> vector_double; } #[repr(simd)] @@ -785,20 +794,8 @@ mod sealed { impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg); } - #[inline] - #[target_feature(enable = "vector")] - unsafe fn vfmaxsb_m0(a: vector_float, b: vector_float) -> vector_float { - // clang uses mode 0 for `vec_max`, so we do the same. - vfmaxsb(a, b, const { 0 }) - } - #[inline] - #[target_feature(enable = "vector")] - unsafe fn vfmaxdb_m0(a: vector_double, b: vector_double) -> vector_double { - vfmaxdb(a, b, const { 0 }) - } - - test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [vfmaxsb_m0, "vector-enhancements-1" vfmaxsb ] } - test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [vfmaxdb_m0, "vector-enhancements-1" vfmaxdb] } + test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [maxnum_v4f32, "vector-enhancements-1" vfmaxsb] } + test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [maxnum_v2f64, "vector-enhancements-1" vfmaxdb] } impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float); impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double); @@ -844,20 +841,8 @@ mod sealed { impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg); } - #[inline] - #[target_feature(enable = "vector")] - unsafe fn vfminsb_m0(a: vector_float, b: vector_float) -> vector_float { - // clang uses mode 0 for `vec_min`, so we do the same. - vfminsb(a, b, const { 0 }) - } - #[inline] - #[target_feature(enable = "vector")] - unsafe fn vfmindb_m0(a: vector_double, b: vector_double) -> vector_double { - vfmindb(a, b, const { 0 }) - } - - test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [vfminsb_m0, "vector-enhancements-1" vfminsb] } - test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [vfmindb_m0, "vector-enhancements-1" vfmindb] } + test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [minnum_v4f32, "vector-enhancements-1" vfminsb] } + test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [minnum_v2f64, "vector-enhancements-1" vfmindb] } impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float); impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);