1
1
//! Implements portable arithmetic vector reductions.
2
2
#![ allow( unused) ]
3
3
4
- macro_rules! impl_arithmetic_reductions {
4
+ macro_rules! impl_int_arithmetic_reductions {
5
5
( $id: ident, $elem_ty: ident) => {
6
6
impl $id {
7
7
/// Horizontal sum of the vector elements.
@@ -11,15 +11,8 @@ macro_rules! impl_arithmetic_reductions {
11
11
///
12
12
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
13
13
///
14
- /// # Integer vectors
15
- ///
16
14
/// If an operation overflows it returns the mathematical result
17
15
/// modulo `2^n` where `n` is the number of times it overflows.
18
- ///
19
- /// # Floating-point vectors
20
- ///
21
- /// If one of the vector element is `NaN` the reduction returns
22
- /// `NaN`.
23
16
#[ cfg( not( target_arch = "aarch64" ) ) ]
24
17
#[ inline]
25
18
pub fn wrapping_sum( self ) -> $elem_ty {
@@ -33,15 +26,8 @@ macro_rules! impl_arithmetic_reductions {
33
26
///
34
27
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
35
28
///
36
- /// # Integer vectors
37
- ///
38
29
/// If an operation overflows it returns the mathematical result
39
30
/// modulo `2^n` where `n` is the number of times it overflows.
40
- ///
41
- /// # Floating-point vectors
42
- ///
43
- /// If one of the vector element is `NaN` the reduction returns
44
- /// `NaN`.
45
31
#[ cfg( target_arch = "aarch64" ) ]
46
32
#[ inline]
47
33
pub fn wrapping_sum( self ) -> $elem_ty {
@@ -62,15 +48,8 @@ macro_rules! impl_arithmetic_reductions {
62
48
///
63
49
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
64
50
///
65
- /// # Integer vectors
66
- ///
67
51
/// If an operation overflows it returns the mathematical result
68
52
/// modulo `2^n` where `n` is the number of times it overflows.
69
- ///
70
- /// # Floating-point vectors
71
- ///
72
- /// If one of the vector element is `NaN` the reduction returns
73
- /// `NaN`.
74
53
#[ cfg( not( target_arch = "aarch64" ) ) ]
75
54
#[ inline]
76
55
pub fn wrapping_product( self ) -> $elem_ty {
@@ -84,18 +63,95 @@ macro_rules! impl_arithmetic_reductions {
84
63
///
85
64
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
86
65
///
87
- /// # Integer vectors
88
- ///
89
66
/// If an operation overflows it returns the mathematical result
90
67
/// modulo `2^n` where `n` is the number of times it overflows.
68
+ #[ cfg( target_arch = "aarch64" ) ]
69
+ #[ inline]
70
+ pub fn wrapping_product( self ) -> $elem_ty {
71
+ // FIXME: broken on AArch64
72
+ // https://bugs.llvm.org/show_bug.cgi?id=36796
73
+ use super :: codegen:: wrapping:: Wrapping ;
74
+ let mut x = self . extract( 0 ) as $elem_ty;
75
+ for i in 1 ..$id:: lanes( ) {
76
+ x = Wrapping :: mul( x, self . extract( i) as $elem_ty) ;
77
+ }
78
+ x
79
+ }
80
+ }
81
+ } ;
82
+ }
83
+
84
+ macro_rules! impl_float_arithmetic_reductions {
85
+ ( $id: ident, $elem_ty: ident) => {
86
+ impl $id {
87
+ /// Horizontal sum of the vector elements.
88
+ ///
89
+ /// The intrinsic performs a tree-reduction of the vector elements.
90
+ /// That is, for an 8 element vector:
91
91
///
92
- /// # Floating-point vectors
92
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
93
93
///
94
94
/// If one of the vector element is `NaN` the reduction returns
95
- /// `NaN`.
95
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
96
+ /// of the `NaN`s in the vector.
97
+ #[ cfg( not( target_arch = "aarch64" ) ) ]
98
+ #[ inline]
99
+ pub fn sum( self ) -> $elem_ty {
100
+ use coresimd:: simd_llvm:: simd_reduce_add_ordered;
101
+ unsafe { simd_reduce_add_ordered( self , 0 as $elem_ty) }
102
+ }
103
+ /// Horizontal sum of the vector elements.
104
+ ///
105
+ /// The intrinsic performs a tree-reduction of the vector elements.
106
+ /// That is, for an 8 element vector:
107
+ ///
108
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
109
+ ///
110
+ /// If one of the vector element is `NaN` the reduction returns
111
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
112
+ /// of the `NaN`s in the vector.
96
113
#[ cfg( target_arch = "aarch64" ) ]
97
114
#[ inline]
98
- pub fn wrapping_product( self ) -> $elem_ty {
115
+ pub fn sum( self ) -> $elem_ty {
116
+ // FIXME: broken on AArch64
117
+ // https://bugs.llvm.org/show_bug.cgi?id=36796
118
+ use super :: codegen:: wrapping:: Wrapping ;
119
+ let mut x = self . extract( 0 ) as $elem_ty;
120
+ for i in 1 ..$id:: lanes( ) {
121
+ x = Wrapping :: add( x, self . extract( i) as $elem_ty) ;
122
+ }
123
+ x
124
+ }
125
+
126
+ /// Horizontal product of the vector elements.
127
+ ///
128
+ /// The intrinsic performs a tree-reduction of the vector elements.
129
+ /// That is, for an 8 element vector:
130
+ ///
131
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
132
+ ///
133
+ /// If one of the vector element is `NaN` the reduction returns
134
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
135
+ /// of the `NaN`s in the vector.
136
+ #[ cfg( not( target_arch = "aarch64" ) ) ]
137
+ #[ inline]
138
+ pub fn product( self ) -> $elem_ty {
139
+ use coresimd:: simd_llvm:: simd_reduce_mul_ordered;
140
+ unsafe { simd_reduce_mul_ordered( self , 1 as $elem_ty) }
141
+ }
142
+ /// Horizontal product of the vector elements.
143
+ ///
144
+ /// The intrinsic performs a tree-reduction of the vector elements.
145
+ /// That is, for an 8 element vector:
146
+ ///
147
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
148
+ ///
149
+ /// If one of the vector element is `NaN` the reduction returns
150
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
151
+ /// of the `NaN`s in the vector.
152
+ #[ cfg( target_arch = "aarch64" ) ]
153
+ #[ inline]
154
+ pub fn product( self ) -> $elem_ty {
99
155
// FIXME: broken on AArch64
100
156
// https://bugs.llvm.org/show_bug.cgi?id=36796
101
157
use super :: codegen:: wrapping:: Wrapping ;
@@ -109,8 +165,9 @@ macro_rules! impl_arithmetic_reductions {
109
165
} ;
110
166
}
111
167
168
+
112
169
#[ cfg( test) ]
113
- macro_rules! test_arithmetic_reductions {
170
+ macro_rules! test_int_arithmetic_reductions {
114
171
( $id: ident, $elem_ty: ident) => {
115
172
fn alternating( x: usize ) -> :: coresimd:: simd:: $id {
116
173
use coresimd:: simd:: $id;
@@ -157,3 +214,52 @@ macro_rules! test_arithmetic_reductions {
157
214
}
158
215
} ;
159
216
}
217
+
218
+ #[ cfg( test) ]
219
+ macro_rules! test_float_arithmetic_reductions {
220
+ ( $id: ident, $elem_ty: ident) => {
221
+ fn alternating( x: usize ) -> :: coresimd:: simd:: $id {
222
+ use coresimd:: simd:: $id;
223
+ let mut v = $id:: splat( 1 as $elem_ty) ;
224
+ for i in 0 ..$id:: lanes( ) {
225
+ if i % x == 0 {
226
+ v = v. replace( i, 2 as $elem_ty) ;
227
+ }
228
+ }
229
+ v
230
+ }
231
+
232
+ #[ test]
233
+ fn sum( ) {
234
+ use coresimd:: simd:: $id;
235
+ let v = $id:: splat( 0 as $elem_ty) ;
236
+ assert_eq!( v. sum( ) , 0 as $elem_ty) ;
237
+ let v = $id:: splat( 1 as $elem_ty) ;
238
+ assert_eq!( v. sum( ) , $id:: lanes( ) as $elem_ty) ;
239
+ let v = alternating( 2 ) ;
240
+ assert_eq!(
241
+ v. sum( ) ,
242
+ ( $id:: lanes( ) / 2 + $id:: lanes( ) ) as $elem_ty
243
+ ) ;
244
+ }
245
+ #[ test]
246
+ fn product( ) {
247
+ use coresimd:: simd:: $id;
248
+ let v = $id:: splat( 0 as $elem_ty) ;
249
+ assert_eq!( v. product( ) , 0 as $elem_ty) ;
250
+ let v = $id:: splat( 1 as $elem_ty) ;
251
+ assert_eq!( v. product( ) , 1 as $elem_ty) ;
252
+ let f = match $id:: lanes( ) {
253
+ 64 => 16 ,
254
+ 32 => 8 ,
255
+ 16 => 4 ,
256
+ _ => 2 ,
257
+ } ;
258
+ let v = alternating( f) ;
259
+ assert_eq!(
260
+ v. product( ) ,
261
+ ( 2_usize . pow( ( $id:: lanes( ) / f) as u32 ) as $elem_ty)
262
+ ) ;
263
+ }
264
+ } ;
265
+ }
0 commit comments