Skip to content

Commit d8740d6

Browse files
committed
rename portable float vector's wrapping_{sum,product} to {sum,product} per the RFC
1 parent 3491956 commit d8740d6

File tree

3 files changed

+244
-168
lines changed

3 files changed

+244
-168
lines changed

coresimd/ppsv/api/arithmetic_reductions.rs

Lines changed: 134 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Implements portable arithmetic vector reductions.
22
#![allow(unused)]
33

4-
macro_rules! impl_arithmetic_reductions {
4+
macro_rules! impl_int_arithmetic_reductions {
55
($id:ident, $elem_ty:ident) => {
66
impl $id {
77
/// Horizontal sum of the vector elements.
@@ -11,15 +11,8 @@ macro_rules! impl_arithmetic_reductions {
1111
///
1212
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
1313
///
14-
/// # Integer vectors
15-
///
1614
/// If an operation overflows it returns the mathematical result
1715
/// modulo `2^n` where `n` is the number of times it overflows.
18-
///
19-
/// # Floating-point vectors
20-
///
21-
/// If one of the vector element is `NaN` the reduction returns
22-
/// `NaN`.
2316
#[cfg(not(target_arch = "aarch64"))]
2417
#[inline]
2518
pub fn wrapping_sum(self) -> $elem_ty {
@@ -33,15 +26,8 @@ macro_rules! impl_arithmetic_reductions {
3326
///
3427
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
3528
///
36-
/// # Integer vectors
37-
///
3829
/// If an operation overflows it returns the mathematical result
3930
/// modulo `2^n` where `n` is the number of times it overflows.
40-
///
41-
/// # Floating-point vectors
42-
///
43-
/// If one of the vector element is `NaN` the reduction returns
44-
/// `NaN`.
4531
#[cfg(target_arch = "aarch64")]
4632
#[inline]
4733
pub fn wrapping_sum(self) -> $elem_ty {
@@ -62,15 +48,8 @@ macro_rules! impl_arithmetic_reductions {
6248
///
6349
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
6450
///
65-
/// # Integer vectors
66-
///
6751
/// If an operation overflows it returns the mathematical result
6852
/// modulo `2^n` where `n` is the number of times it overflows.
69-
///
70-
/// # Floating-point vectors
71-
///
72-
/// If one of the vector element is `NaN` the reduction returns
73-
/// `NaN`.
7453
#[cfg(not(target_arch = "aarch64"))]
7554
#[inline]
7655
pub fn wrapping_product(self) -> $elem_ty {
@@ -84,18 +63,95 @@ macro_rules! impl_arithmetic_reductions {
8463
///
8564
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
8665
///
87-
/// # Integer vectors
88-
///
8966
/// If an operation overflows it returns the mathematical result
9067
/// modulo `2^n` where `n` is the number of times it overflows.
68+
#[cfg(target_arch = "aarch64")]
69+
#[inline]
70+
pub fn wrapping_product(self) -> $elem_ty {
71+
// FIXME: broken on AArch64
72+
// https://bugs.llvm.org/show_bug.cgi?id=36796
73+
use super::codegen::wrapping::Wrapping;
74+
let mut x = self.extract(0) as $elem_ty;
75+
for i in 1..$id::lanes() {
76+
x = Wrapping::mul(x, self.extract(i) as $elem_ty);
77+
}
78+
x
79+
}
80+
}
81+
};
82+
}
83+
84+
macro_rules! impl_float_arithmetic_reductions {
85+
($id:ident, $elem_ty:ident) => {
86+
impl $id {
87+
/// Horizontal sum of the vector elements.
88+
///
89+
/// The intrinsic performs a tree-reduction of the vector elements.
90+
/// That is, for an 8 element vector:
9191
///
92-
/// # Floating-point vectors
92+
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
9393
///
9494
/// If one of the vector element is `NaN` the reduction returns
95-
/// `NaN`.
95+
/// `NaN`. The resulting `NaN` is not required to be equal to any
96+
/// of the `NaN`s in the vector.
97+
#[cfg(not(target_arch = "aarch64"))]
98+
#[inline]
99+
pub fn sum(self) -> $elem_ty {
100+
use coresimd::simd_llvm::simd_reduce_add_ordered;
101+
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
102+
}
103+
/// Horizontal sum of the vector elements.
104+
///
105+
/// The intrinsic performs a tree-reduction of the vector elements.
106+
/// That is, for an 8 element vector:
107+
///
108+
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
109+
///
110+
/// If one of the vector element is `NaN` the reduction returns
111+
/// `NaN`. The resulting `NaN` is not required to be equal to any
112+
/// of the `NaN`s in the vector.
96113
#[cfg(target_arch = "aarch64")]
97114
#[inline]
98-
pub fn wrapping_product(self) -> $elem_ty {
115+
pub fn sum(self) -> $elem_ty {
116+
// FIXME: broken on AArch64
117+
// https://bugs.llvm.org/show_bug.cgi?id=36796
118+
use super::codegen::wrapping::Wrapping;
119+
let mut x = self.extract(0) as $elem_ty;
120+
for i in 1..$id::lanes() {
121+
x = Wrapping::add(x, self.extract(i) as $elem_ty);
122+
}
123+
x
124+
}
125+
126+
/// Horizontal product of the vector elements.
127+
///
128+
/// The intrinsic performs a tree-reduction of the vector elements.
129+
/// That is, for an 8 element vector:
130+
///
131+
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
132+
///
133+
/// If one of the vector element is `NaN` the reduction returns
134+
/// `NaN`. The resulting `NaN` is not required to be equal to any
135+
/// of the `NaN`s in the vector.
136+
#[cfg(not(target_arch = "aarch64"))]
137+
#[inline]
138+
pub fn product(self) -> $elem_ty {
139+
use coresimd::simd_llvm::simd_reduce_mul_ordered;
140+
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
141+
}
142+
/// Horizontal product of the vector elements.
143+
///
144+
/// The intrinsic performs a tree-reduction of the vector elements.
145+
/// That is, for an 8 element vector:
146+
///
147+
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
148+
///
149+
/// If one of the vector element is `NaN` the reduction returns
150+
/// `NaN`. The resulting `NaN` is not required to be equal to any
151+
/// of the `NaN`s in the vector.
152+
#[cfg(target_arch = "aarch64")]
153+
#[inline]
154+
pub fn product(self) -> $elem_ty {
99155
// FIXME: broken on AArch64
100156
// https://bugs.llvm.org/show_bug.cgi?id=36796
101157
use super::codegen::wrapping::Wrapping;
@@ -109,8 +165,9 @@ macro_rules! impl_arithmetic_reductions {
109165
};
110166
}
111167

168+
112169
#[cfg(test)]
113-
macro_rules! test_arithmetic_reductions {
170+
macro_rules! test_int_arithmetic_reductions {
114171
($id:ident, $elem_ty:ident) => {
115172
fn alternating(x: usize) -> ::coresimd::simd::$id {
116173
use coresimd::simd::$id;
@@ -157,3 +214,52 @@ macro_rules! test_arithmetic_reductions {
157214
}
158215
};
159216
}
217+
218+
#[cfg(test)]
219+
macro_rules! test_float_arithmetic_reductions {
220+
($id:ident, $elem_ty:ident) => {
221+
fn alternating(x: usize) -> ::coresimd::simd::$id {
222+
use coresimd::simd::$id;
223+
let mut v = $id::splat(1 as $elem_ty);
224+
for i in 0..$id::lanes() {
225+
if i % x == 0 {
226+
v = v.replace(i, 2 as $elem_ty);
227+
}
228+
}
229+
v
230+
}
231+
232+
#[test]
233+
fn sum() {
234+
use coresimd::simd::$id;
235+
let v = $id::splat(0 as $elem_ty);
236+
assert_eq!(v.sum(), 0 as $elem_ty);
237+
let v = $id::splat(1 as $elem_ty);
238+
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
239+
let v = alternating(2);
240+
assert_eq!(
241+
v.sum(),
242+
($id::lanes() / 2 + $id::lanes()) as $elem_ty
243+
);
244+
}
245+
#[test]
246+
fn product() {
247+
use coresimd::simd::$id;
248+
let v = $id::splat(0 as $elem_ty);
249+
assert_eq!(v.product(), 0 as $elem_ty);
250+
let v = $id::splat(1 as $elem_ty);
251+
assert_eq!(v.product(), 1 as $elem_ty);
252+
let f = match $id::lanes() {
253+
64 => 16,
254+
32 => 8,
255+
16 => 4,
256+
_ => 2,
257+
};
258+
let v = alternating(f);
259+
assert_eq!(
260+
v.product(),
261+
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
262+
);
263+
}
264+
};
265+
}

coresimd/ppsv/api/mod.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ macro_rules! simd_f_ty {
140140
[impl_cmp, $id, $mask_ty],
141141
[impl_arithmetic_ops, $id],
142142
[impl_arithmetic_scalar_ops, $id, $elem_ty],
143-
[impl_arithmetic_reductions, $id, $elem_ty],
143+
[impl_float_arithmetic_reductions, $id, $elem_ty],
144144
[impl_minmax_reductions, $id, $elem_ty],
145145
[impl_neg_op, $id, $elem_ty],
146146
[impl_partial_eq, $id],
@@ -157,7 +157,7 @@ macro_rules! simd_f_ty {
157157
test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty);
158158
test_arithmetic_ops!($id, $elem_ty);
159159
test_arithmetic_scalar_ops!($id, $elem_ty);
160-
test_arithmetic_reductions!($id, $elem_ty);
160+
test_float_arithmetic_reductions!($id, $elem_ty);
161161
test_minmax_reductions!($id, $elem_ty);
162162
test_neg_op!($id, $elem_ty);
163163
test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
@@ -183,7 +183,7 @@ macro_rules! simd_i_ty {
183183
[impl_hash, $id, $elem_ty],
184184
[impl_arithmetic_ops, $id],
185185
[impl_arithmetic_scalar_ops, $id, $elem_ty],
186-
[impl_arithmetic_reductions, $id, $elem_ty],
186+
[impl_int_arithmetic_reductions, $id, $elem_ty],
187187
[impl_minmax_reductions, $id, $elem_ty],
188188
[impl_neg_op, $id, $elem_ty],
189189
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
@@ -207,7 +207,7 @@ macro_rules! simd_i_ty {
207207
test_hash!($id, $elem_ty);
208208
test_arithmetic_ops!($id, $elem_ty);
209209
test_arithmetic_scalar_ops!($id, $elem_ty);
210-
test_arithmetic_reductions!($id, $elem_ty);
210+
test_int_arithmetic_reductions!($id, $elem_ty);
211211
test_minmax_reductions!($id, $elem_ty);
212212
test_neg_op!($id, $elem_ty);
213213
test_int_bitwise_ops!($id, $elem_ty);
@@ -238,7 +238,7 @@ macro_rules! simd_u_ty {
238238
[impl_hash, $id, $elem_ty],
239239
[impl_arithmetic_ops, $id],
240240
[impl_arithmetic_scalar_ops, $id, $elem_ty],
241-
[impl_arithmetic_reductions, $id, $elem_ty],
241+
[impl_int_arithmetic_reductions, $id, $elem_ty],
242242
[impl_minmax_reductions, $id, $elem_ty],
243243
[impl_bitwise_scalar_ops, $id, $elem_ty],
244244
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
@@ -261,7 +261,7 @@ macro_rules! simd_u_ty {
261261
test_hash!($id, $elem_ty);
262262
test_arithmetic_ops!($id, $elem_ty);
263263
test_arithmetic_scalar_ops!($id, $elem_ty);
264-
test_arithmetic_reductions!($id, $elem_ty);
264+
test_int_arithmetic_reductions!($id, $elem_ty);
265265
test_minmax_reductions!($id, $elem_ty);
266266
test_int_bitwise_ops!($id, $elem_ty);
267267
test_int_bitwise_scalar_ops!($id, $elem_ty);

0 commit comments

Comments
 (0)