From 6efd06e975e7dea1f5b7e1f1db5aba51237feaca Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 10 Mar 2026 17:42:37 +0100
Subject: [PATCH 1/3] s390x: use llvm.s390 intrinsics instead of simd_fmin/fmax

---
 crates/core_arch/src/s390x/vector.rs | 37 +++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 346cd674df..2f31eb48f8 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -335,6 +335,11 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
     #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
     #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
+
+    #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
 }
 
 #[repr(simd)]
@@ -780,8 +785,20 @@ mod sealed {
         impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg);
     }
 
-    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] }
-    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn vfmaxsb_m0(a: vector_float, b: vector_float) -> vector_float {
+        // clang uses mode 0 for `vec_max`, so we do the same.
+        vfmaxsb(a, b, const { 0 })
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn vfmaxdb_m0(a: vector_double, b: vector_double) -> vector_double {
+        vfmaxdb(a, b, const { 0 })
+    }
+
+    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [vfmaxsb_m0, "vector-enhancements-1" vfmaxsb ] }
+    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [vfmaxdb_m0, "vector-enhancements-1" vfmaxdb] }
 
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
@@ -827,8 +844,20 @@ mod sealed {
         impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg);
     }
 
-    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb]  }
-    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb]  }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn vfminsb_m0(a: vector_float, b: vector_float) -> vector_float {
+        // clang uses mode 0 for `vec_min`, so we do the same.
+        vfminsb(a, b, const { 0 })
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn vfmindb_m0(a: vector_double, b: vector_double) -> vector_double {
+        vfmindb(a, b, const { 0 })
+    }
+
+    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [vfminsb_m0, "vector-enhancements-1" vfminsb]  }
+    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [vfmindb_m0, "vector-enhancements-1" vfmindb]  }
 
     impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);

From bfc3662a29ace7a6e18d2705ad7fb138a1976473 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 10 Mar 2026 23:26:51 +0100
Subject: [PATCH 2/3] add f32 min/max tests

---
 crates/core_arch/src/s390x/vector.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 2f31eb48f8..33921eca5f 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -7506,6 +7506,19 @@ mod tests {
         [0, !0, !0, !0]
     }
 
+    // f32 is the tricky case for max/min as that needs a fallback on z13
+    test_vec_2! { test_vec_max, vec_max, f32x4, f32x4 -> f32x4,
+        [1.0,   f32::NAN, f32::INFINITY, 2.0],
+        [-10.0, -10.0,    5.0,           f32::NAN],
+        [1.0,   -10.0,    f32::INFINITY, 2.0]
+    }
+
+    test_vec_2! { test_vec_min, vec_min, f32x4, f32x4 -> f32x4,
+        [1.0,   f32::NAN, f32::INFINITY, 2.0],
+        [-10.0, -10.0,    5.0,           f32::NAN],
+        [-10.0, -10.0,    5.0,           2.0]
+    }
+
     #[simd_test(enable = "vector")]
     fn test_vec_meadd() {
         let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]);

From 454668b30b8acb355cf4052de711092f7cfb4c8c Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Wed, 11 Mar 2026 08:52:03 +0100
Subject: [PATCH 3/3] go back to portable LLVM intrinsic to avoid fallback
 trouble

---
 crates/core_arch/src/s390x/vector.rs | 49 ++++++++++------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 33921eca5f..31b9dc5eac 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -336,10 +336,19 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
     #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
 
-    #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
-    #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
-    #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
-    #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // These are the intrinsics we'd like to use (with mode 0). However, they require
+    // "vector-enhancements-1" and don't have a fallback, whereas `vec_min`/`vec_max` should be
+    // available with just "vector". Therefore, we cannot use them.
+    // #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    // #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    // #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // Instead, we use "portable" LLVM intrinsics -- even though those have the wrong semantics
+    // (https://github.com/rust-lang/stdarch/issues/2060), they usually do the right thing.
+    #[link_name = "llvm.minnum.v4f32"] fn minnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
+    #[link_name = "llvm.minnum.v2f64"] fn minnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
+    #[link_name = "llvm.maxnum.v4f32"] fn maxnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
+    #[link_name = "llvm.maxnum.v2f64"] fn maxnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
 }
 
 #[repr(simd)]
@@ -785,20 +794,8 @@ mod sealed {
         impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg);
     }
 
-    #[inline]
-    #[target_feature(enable = "vector")]
-    unsafe fn vfmaxsb_m0(a: vector_float, b: vector_float) -> vector_float {
-        // clang uses mode 0 for `vec_max`, so we do the same.
-        vfmaxsb(a, b, const { 0 })
-    }
-    #[inline]
-    #[target_feature(enable = "vector")]
-    unsafe fn vfmaxdb_m0(a: vector_double, b: vector_double) -> vector_double {
-        vfmaxdb(a, b, const { 0 })
-    }
-
-    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [vfmaxsb_m0, "vector-enhancements-1" vfmaxsb ] }
-    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [vfmaxdb_m0, "vector-enhancements-1" vfmaxdb] }
+    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [maxnum_v4f32, "vector-enhancements-1" vfmaxsb] }
+    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [maxnum_v2f64, "vector-enhancements-1" vfmaxdb] }
 
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
@@ -844,20 +841,8 @@ mod sealed {
         impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg);
     }
 
-    #[inline]
-    #[target_feature(enable = "vector")]
-    unsafe fn vfminsb_m0(a: vector_float, b: vector_float) -> vector_float {
-        // clang uses mode 0 for `vec_min`, so we do the same.
-        vfminsb(a, b, const { 0 })
-    }
-    #[inline]
-    #[target_feature(enable = "vector")]
-    unsafe fn vfmindb_m0(a: vector_double, b: vector_double) -> vector_double {
-        vfmindb(a, b, const { 0 })
-    }
-
-    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [vfminsb_m0, "vector-enhancements-1" vfminsb]  }
-    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [vfmindb_m0, "vector-enhancements-1" vfmindb]  }
+    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [minnum_v4f32, "vector-enhancements-1" vfminsb] }
+    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [minnum_v2f64, "vector-enhancements-1" vfmindb] }
 
     impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);