1+ use super :: super :: helpers:: Average ;
12use super :: super :: types:: Num ;
23use ndarray:: { Array1 , ArrayView1 } ;
34use num_traits:: AsPrimitive ;
45use std:: cmp;
56
7+ #[ inline( always) ]
8+ fn f64_to_i64unsigned ( v : f64 ) -> i64 {
9+ // Transmute to i64 and mask out the sign bit
10+ let v: i64 = unsafe { std:: mem:: transmute :: < f64 , i64 > ( v) } ;
11+ v & 0x7FFF_FFFF_FFFF_FFFF
12+ }
13+
614// ----------------------------------- NON-PARALLEL ------------------------------------
715
816// ----------- WITH X
@@ -11,67 +19,79 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
1119 x : ArrayView1 < Tx > ,
1220 y : ArrayView1 < Ty > ,
1321 n_out : usize ,
14- ) -> Array1 < usize > {
22+ ) -> Array1 < usize >
23+ where
24+ for < ' a > ArrayView1 < ' a , Ty > : Average ,
25+ {
1526 assert_eq ! ( x. len( ) , y. len( ) ) ;
16- if n_out >= x. len ( ) || n_out == 0 {
27+ if n_out >= x. len ( ) {
1728 return Array1 :: from ( ( 0 ..x. len ( ) ) . collect :: < Vec < usize > > ( ) ) ;
1829 }
1930 assert ! ( n_out >= 3 ) ; // avoid division by 0
2031
2132 // Bucket size. Leave room for start and end data points.
22- let every = ( x. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
33+ let every: f64 = ( x. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
2334 // Initially a is the first point in the triangle.
24- let mut a = 0 ;
35+ let mut a: usize = 0 ;
2536
2637 let mut sampled_indices: Array1 < usize > = Array1 :: < usize > :: default ( n_out) ;
2738
39+ let x_ptr = x. as_ptr ( ) ;
40+ let y_ptr = y. as_ptr ( ) ;
41+
2842 // Always add the first point
2943 sampled_indices[ 0 ] = 0 ;
3044
3145 for i in 0 ..n_out - 2 {
3246 // Calculate point average for next bucket (containing c).
33- let mut avg_x: f64 = 0.0 ;
34- let mut avg_y: f64 = 0.0 ;
35-
3647 let avg_range_start = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
3748 let avg_range_end = cmp:: min ( ( every * ( i + 2 ) as f64 ) as usize + 1 , x. len ( ) ) ;
3849
39- for i in avg_range_start..avg_range_end {
40- avg_x += x[ i] . as_ ( ) ;
41- avg_y += y[ i] . as_ ( ) ;
42- }
43- // Slicing seems to be a lot slower
44- // let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
45- // let avg_y: Ty = y.slice(s![avg_range_start..avg_range_end]).sum();
46- let avg_x: f64 = avg_x / ( avg_range_end - avg_range_start) as f64 ;
47- let avg_y: f64 = avg_y / ( avg_range_end - avg_range_start) as f64 ;
50+ // ArrayBase::slice is rather expensive..
51+ let y_slice = unsafe {
52+ ArrayView1 :: from_shape_ptr ( avg_range_end - avg_range_start, y_ptr. add ( avg_range_start) )
53+ } ;
54+ let avg_y: f64 = y_slice. average ( ) ;
55+ // TODO: avg_y could be approximated argminmax instead of mean?
56+ // TODO: below is faster than above, but not as accurate
57+ // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0;
58+ let avg_x: f64 =
59+ unsafe { ( x. uget ( avg_range_end - 1 ) . as_ ( ) + x. uget ( avg_range_start) . as_ ( ) ) / 2.0 } ;
4860
4961 // Get the range for this bucket
5062 let range_offs = ( every * i as f64 ) as usize + 1 ;
51- let range_to = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
63+ let range_to = avg_range_start ; // = start of the next bucket
5264
5365 // Point a
54- let point_ax = x[ a] . as_ ( ) ;
55- let point_ay = y[ a] . as_ ( ) ;
56-
57- let mut max_area = -1.0 ;
58- for i in range_offs..range_to {
59- // Calculate triangle area over three buckets
60- let area = ( ( point_ax - avg_x) * ( y[ i] . as_ ( ) - point_ay)
61- - ( point_ax - x[ i] . as_ ( ) ) * ( avg_y - point_ay) )
62- . abs ( ) ;
63- if area > max_area {
64- max_area = area;
65- a = i;
66- }
67- }
68- // Vectorized implementation
69- // let point_ax: Tx = x[a];
70- // let point_ay: Ty = y[a];
71- // let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
72- // let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
73- // let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
74- // a = max_idx;
66+ let point_ax = unsafe { x. uget ( a) . as_ ( ) } ;
67+ let point_ay = unsafe { y. uget ( a) . as_ ( ) } ;
68+
69+ let d1 = point_ax - avg_x;
70+ let d2 = avg_y - point_ay;
71+ let offset: f64 = d1 * point_ay + d2 * point_ax;
72+
73+ let x_slice =
74+ unsafe { std:: slice:: from_raw_parts ( x_ptr. add ( range_offs) , range_to - range_offs) } ;
75+ let y_slice =
76+ unsafe { std:: slice:: from_raw_parts ( y_ptr. add ( range_offs) , range_to - range_offs) } ;
77+ ( _, a) = y_slice. iter ( ) . zip ( x_slice. iter ( ) ) . enumerate ( ) . fold (
78+ ( -1i64 , a) ,
79+ |( max_area, a) , ( i, ( y_, x_) ) | {
80+ // Calculate triangle area over three buckets
81+ // -> area = d1 * (y_ - point_ay) - (point_ax - x_) * d2;
82+ // let area = d1 * y[i].as_() + d2 * x[i].as_() - offset;
83+ // let area = d1 * y_slice[i].as_() + d2 * x_slice[i].as_() - offset;
84+ let area = d1 * y_. as_ ( ) + d2 * x_. as_ ( ) - offset;
85+ let area = f64_to_i64unsigned ( area) ; // this is faster than abs
86+ if area > max_area {
87+ ( area, i)
88+ } else {
89+ ( max_area, a)
90+ }
91+ } ,
92+ ) ;
93+ a += range_offs;
94+
7595 sampled_indices[ i + 1 ] = a;
7696 }
7797
@@ -83,67 +103,92 @@ pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
83103
84104// ----------- WITHOUT X
85105
86- pub fn lttb_without_x < Ty : Num + AsPrimitive < f64 > > (
87- // TODO: why is this slower than the one with x?
88- y : ArrayView1 < Ty > ,
89- n_out : usize ,
90- ) -> Array1 < usize > {
91- if n_out >= y. len ( ) || n_out == 0 {
106+ pub fn lttb_without_x < Ty : Num + AsPrimitive < f64 > > ( y : ArrayView1 < Ty > , n_out : usize ) -> Array1 < usize >
107+ where
108+ for < ' a > ArrayView1 < ' a , Ty > : Average ,
109+ {
110+ if n_out >= y. len ( ) {
92111 return Array1 :: from ( ( 0 ..y. len ( ) ) . collect :: < Vec < usize > > ( ) ) ;
93112 }
94113 assert ! ( n_out >= 3 ) ; // avoid division by 0
95114
96115 // Bucket size. Leave room for start and end data points.
97- let every = ( y. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
116+ let every: f64 = ( y. len ( ) - 2 ) as f64 / ( n_out - 2 ) as f64 ;
98117 // Initially a is the first point in the triangle.
99- let mut a = 0 ;
118+ let mut a: usize = 0 ;
100119
101120 let mut sampled_indices: Array1 < usize > = Array1 :: < usize > :: default ( n_out) ;
102121
122+ let y_ptr = y. as_ptr ( ) ;
123+
103124 // Always add the first point
104125 sampled_indices[ 0 ] = 0 ;
105126
106127 for i in 0 ..n_out - 2 {
107128 // Calculate point average for next bucket (containing c).
108- let mut avg_y: f64 = 0.0 ;
109-
110129 let avg_range_start = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
111130 let avg_range_end = cmp:: min ( ( every * ( i + 2 ) as f64 ) as usize + 1 , y. len ( ) ) ;
112131
113- for i in avg_range_start..avg_range_end {
114- avg_y += y[ i] . as_ ( ) ;
115- }
116- // Slicing seems to be a lot slower
117- // let avg_x: Tx = x.slice(s![avg_range_start..avg_range_end]).sum();
118- let avg_y: f64 = avg_y / ( avg_range_end - avg_range_start) as f64 ;
132+ // ArrayBase::slice is rather expensive..
133+ let y_slice = unsafe {
134+ ArrayView1 :: from_shape_ptr ( avg_range_end - avg_range_start, y_ptr. add ( avg_range_start) )
135+ } ;
136+ let avg_y: f64 = y_slice. average ( ) ;
119137 let avg_x: f64 = ( avg_range_start + avg_range_end - 1 ) as f64 / 2.0 ;
120138
121139 // Get the range for this bucket
122140 let range_offs = ( every * i as f64 ) as usize + 1 ;
123- let range_to = ( every * ( i + 1 ) as f64 ) as usize + 1 ;
141+ let range_to = avg_range_start ; // = start of the next bucket
124142
125143 // Point a
126- let point_ay = y [ a ] . as_ ( ) ;
144+ let point_ay = unsafe { y . uget ( a ) . as_ ( ) } ;
127145 let point_ax = a as f64 ;
128146
129- let mut max_area = -1.0 ;
130- for i in range_offs..range_to {
131- // Calculate triangle area over three buckets
132- let area = ( ( point_ax - avg_x) * ( y[ i] . as_ ( ) - point_ay)
133- - ( point_ax - i as f64 ) * ( avg_y - point_ay) )
134- . abs ( ) ;
135- if area > max_area {
136- max_area = area;
137- a = i;
138- }
139- }
140- // Vectorized implementation
141- // let point_ax: Tx = x[a];
142- // let point_ay: Ty = y[a];
143- // let ar_x: Vec<Tx> = x.slice(s![range_offs..range_to]).into_iter().map(|v| point_ax - *v).collect();
144- // let ar_y: Vec<Ty> = y.slice(s![range_offs..range_to]).into_iter().map(|v| *v - point_ay).collect();
145- // let max_idx: usize = (ar_x.iter().zip(ar_y.iter()).map(|(x, y)| (x.to_f64().unwrap() * avg_y - y.to_f64().unwrap() * avg_x).abs()).enumerate().max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()).unwrap().0) + range_offs;
146- // a = max_idx;
147+ let d1 = point_ax - avg_x;
148+ let d2 = avg_y - point_ay;
149+ let point_ax = point_ax - range_offs as f64 ;
150+
151+ // let mut max_area = -1i64;
152+ let mut ax_x = point_ax; // point_ax - x[i]
153+ let offset: f64 = d1 * point_ay;
154+
155+ // TODO: for some reason is this faster than the loop below -> check if this is true for other devices
156+ let y_slice =
157+ unsafe { ArrayView1 :: from_shape_ptr ( range_to - range_offs, y_ptr. add ( range_offs) ) } ;
158+ ( _, a) = y_slice
159+ . iter ( )
160+ . enumerate ( )
161+ . fold ( ( -1i64 , a) , |( max_area, a) , ( i, y) | {
162+ // Calculate triangle area over three buckets
163+ // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
164+ let area: f64 = d1 * y. as_ ( ) - ax_x * d2 - offset;
165+ let area: i64 = f64_to_i64unsigned ( area) ;
166+ ax_x -= 1.0 ;
167+ if area > max_area {
168+ ( area, i + range_offs)
169+ } else {
170+ ( max_area, a)
171+ }
172+ } ) ;
173+
174+ // let y_slice = unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
175+ // (_, a) = y_slice
176+ // .iter()
177+ // .enumerate()
178+ // .fold((-1i64, a), |(max_area, a), (i, y_)| {
179+ // // Calculate triangle area over three buckets
180+ // // -> area: f64 = d1 * y[i].as_() - ax_x * d2;
181+ // let area: f64 = d1 * y_.as_() - ax_x * d2 - offset;
182+ // let area: i64 = f64_to_i64unsigned(area);
183+ // ax_x -= 1.0;
184+ // if area > max_area {
185+ // (area, i)
186+ // } else {
187+ // (max_area, a)
188+ // }
189+ // });
190+ // a += range_offs;
191+
147192 sampled_indices[ i + 1 ] = a;
148193 }
149194
0 commit comments