rename portable float vector's wrapping_{sum,product} to {sum,product} per the RFC

gnzlbg · gnzlbg · commit d8740d6cf01b · 2018-06-06T14:25:33.000+02:00
diff --git a/coresimd/ppsv/api/arithmetic_reductions.rs b/coresimd/ppsv/api/arithmetic_reductions.rs
@@ -1,7 +1,7 @@
 //! Implements portable arithmetic vector reductions.
 #![allow(unused)]
 
-macro_rules! impl_arithmetic_reductions {
+macro_rules! impl_int_arithmetic_reductions {
     ($id:ident, $elem_ty:ident) => {
         impl $id {
             /// Horizontal sum of the vector elements.
@@ -11,15 +11,8 @@ macro_rules! impl_arithmetic_reductions {
             ///
             /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
             ///
-            /// # Integer vectors
-            ///
             /// If an operation overflows it returns the mathematical result
             /// modulo `2^n` where `n` is the number of times it overflows.
-            ///
-            /// # Floating-point vectors
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             pub fn wrapping_sum(self) -> $elem_ty {
@@ -33,15 +26,8 @@ macro_rules! impl_arithmetic_reductions {
             ///
             /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
             ///
-            /// # Integer vectors
-            ///
             /// If an operation overflows it returns the mathematical result
             /// modulo `2^n` where `n` is the number of times it overflows.
-            ///
-            /// # Floating-point vectors
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`.
             #[cfg(target_arch = "aarch64")]
             #[inline]
             pub fn wrapping_sum(self) -> $elem_ty {
@@ -62,15 +48,8 @@ macro_rules! impl_arithmetic_reductions {
             ///
             /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
             ///
-            /// # Integer vectors
-            ///
             /// If an operation overflows it returns the mathematical result
             /// modulo `2^n` where `n` is the number of times it overflows.
-            ///
-            /// # Floating-point vectors
-            ///
-            /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`.
             #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             pub fn wrapping_product(self) -> $elem_ty {
@@ -84,18 +63,95 @@ macro_rules! impl_arithmetic_reductions {
             ///
             /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
             ///
-            /// # Integer vectors
-            ///
             /// If an operation overflows it returns the mathematical result
             /// modulo `2^n` where `n` is the number of times it overflows.
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            pub fn wrapping_product(self) -> $elem_ty {
+                // FIXME: broken on AArch64
+                // https://bugs.llvm.org/show_bug.cgi?id=36796
+                use super::codegen::wrapping::Wrapping;
+                let mut x = self.extract(0) as $elem_ty;
+                for i in 1..$id::lanes() {
+                    x = Wrapping::mul(x, self.extract(i) as $elem_ty);
+                }
+                x
+            }
+        }
+    };
+}
+
+macro_rules! impl_float_arithmetic_reductions {
+    ($id:ident, $elem_ty:ident) => {
+        impl $id {
+            /// Horizontal sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
             ///
-            /// # Floating-point vectors
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
             ///
             /// If one of the vector element is `NaN` the reduction returns
-            /// `NaN`.
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[cfg(not(target_arch = "aarch64"))]
+            #[inline]
+            pub fn sum(self) -> $elem_ty {
+                use coresimd::simd_llvm::simd_reduce_add_ordered;
+                unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
+            }
+            /// Horizontal sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
             #[cfg(target_arch = "aarch64")]
             #[inline]
-            pub fn wrapping_product(self) -> $elem_ty {
+            pub fn sum(self) -> $elem_ty {
+                // FIXME: broken on AArch64
+                // https://bugs.llvm.org/show_bug.cgi?id=36796
+                use super::codegen::wrapping::Wrapping;
+                let mut x = self.extract(0) as $elem_ty;
+                for i in 1..$id::lanes() {
+                    x = Wrapping::add(x, self.extract(i) as $elem_ty);
+                }
+                x
+            }
+
+            /// Horizontal product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[cfg(not(target_arch = "aarch64"))]
+            #[inline]
+            pub fn product(self) -> $elem_ty {
+                use coresimd::simd_llvm::simd_reduce_mul_ordered;
+                unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
+            }
+            /// Horizontal product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            pub fn product(self) -> $elem_ty {
                 // FIXME: broken on AArch64
                 // https://bugs.llvm.org/show_bug.cgi?id=36796
                 use super::codegen::wrapping::Wrapping;
@@ -109,8 +165,9 @@ macro_rules! impl_arithmetic_reductions {
     };
 }
 
+
 #[cfg(test)]
-macro_rules! test_arithmetic_reductions {
+macro_rules! test_int_arithmetic_reductions {
     ($id:ident, $elem_ty:ident) => {
         fn alternating(x: usize) -> ::coresimd::simd::$id {
             use coresimd::simd::$id;
@@ -157,3 +214,52 @@ macro_rules! test_arithmetic_reductions {
         }
     };
 }
+
+#[cfg(test)]
+macro_rules! test_float_arithmetic_reductions {
+    ($id:ident, $elem_ty:ident) => {
+        fn alternating(x: usize) -> ::coresimd::simd::$id {
+            use coresimd::simd::$id;
+            let mut v = $id::splat(1 as $elem_ty);
+            for i in 0..$id::lanes() {
+                if i % x == 0 {
+                    v = v.replace(i, 2 as $elem_ty);
+                }
+            }
+            v
+        }
+
+        #[test]
+        fn sum() {
+            use coresimd::simd::$id;
+            let v = $id::splat(0 as $elem_ty);
+            assert_eq!(v.sum(), 0 as $elem_ty);
+            let v = $id::splat(1 as $elem_ty);
+            assert_eq!(v.sum(), $id::lanes() as $elem_ty);
+            let v = alternating(2);
+            assert_eq!(
+                v.sum(),
+                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+            );
+        }
+        #[test]
+        fn product() {
+            use coresimd::simd::$id;
+            let v = $id::splat(0 as $elem_ty);
+            assert_eq!(v.product(), 0 as $elem_ty);
+            let v = $id::splat(1 as $elem_ty);
+            assert_eq!(v.product(), 1 as $elem_ty);
+            let f = match $id::lanes() {
+                64 => 16,
+                32 => 8,
+                16 => 4,
+                _ => 2,
+            };
+            let v = alternating(f);
+            assert_eq!(
+                v.product(),
+                (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
+            );
+        }
+    };
+}
diff --git a/coresimd/ppsv/api/mod.rs b/coresimd/ppsv/api/mod.rs
@@ -140,7 +140,7 @@ macro_rules! simd_f_ty {
             [impl_cmp, $id, $mask_ty],
             [impl_arithmetic_ops, $id],
             [impl_arithmetic_scalar_ops, $id, $elem_ty],
-            [impl_arithmetic_reductions, $id, $elem_ty],
+            [impl_float_arithmetic_reductions, $id, $elem_ty],
             [impl_minmax_reductions, $id, $elem_ty],
             [impl_neg_op, $id, $elem_ty],
             [impl_partial_eq, $id],
@@ -157,7 +157,7 @@ macro_rules! simd_f_ty {
                 test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty);
                 test_arithmetic_ops!($id, $elem_ty);
                 test_arithmetic_scalar_ops!($id, $elem_ty);
-                test_arithmetic_reductions!($id, $elem_ty);
+                test_float_arithmetic_reductions!($id, $elem_ty);
                 test_minmax_reductions!($id, $elem_ty);
                 test_neg_op!($id, $elem_ty);
                 test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
@@ -183,7 +183,7 @@ macro_rules! simd_i_ty {
             [impl_hash, $id, $elem_ty],
             [impl_arithmetic_ops, $id],
             [impl_arithmetic_scalar_ops, $id, $elem_ty],
-            [impl_arithmetic_reductions, $id, $elem_ty],
+            [impl_int_arithmetic_reductions, $id, $elem_ty],
             [impl_minmax_reductions, $id, $elem_ty],
             [impl_neg_op, $id, $elem_ty],
             [impl_bitwise_ops, $id, !(0 as $elem_ty)],
@@ -207,7 +207,7 @@ macro_rules! simd_i_ty {
                 test_hash!($id, $elem_ty);
                 test_arithmetic_ops!($id, $elem_ty);
                 test_arithmetic_scalar_ops!($id, $elem_ty);
-                test_arithmetic_reductions!($id, $elem_ty);
+                test_int_arithmetic_reductions!($id, $elem_ty);
                 test_minmax_reductions!($id, $elem_ty);
                 test_neg_op!($id, $elem_ty);
                 test_int_bitwise_ops!($id, $elem_ty);
@@ -238,7 +238,7 @@ macro_rules! simd_u_ty {
             [impl_hash, $id, $elem_ty],
             [impl_arithmetic_ops, $id],
             [impl_arithmetic_scalar_ops, $id, $elem_ty],
-            [impl_arithmetic_reductions, $id, $elem_ty],
+            [impl_int_arithmetic_reductions, $id, $elem_ty],
             [impl_minmax_reductions, $id, $elem_ty],
             [impl_bitwise_scalar_ops, $id, $elem_ty],
             [impl_bitwise_ops, $id, !(0 as $elem_ty)],
@@ -261,7 +261,7 @@ macro_rules! simd_u_ty {
                 test_hash!($id, $elem_ty);
                 test_arithmetic_ops!($id, $elem_ty);
                 test_arithmetic_scalar_ops!($id, $elem_ty);
-                test_arithmetic_reductions!($id, $elem_ty);
+                test_int_arithmetic_reductions!($id, $elem_ty);
                 test_minmax_reductions!($id, $elem_ty);
                 test_int_bitwise_ops!($id, $elem_ty);
                 test_int_bitwise_scalar_ops!($id, $elem_ty);
diff --git a/crates/coresimd/tests/reductions.rs b/crates/coresimd/tests/reductions.rs