@@ -4,6 +4,12 @@ use ndarray::{s, Array1, ArrayView1};
44use rayon:: iter:: IndexedParallelIterator ;
55use rayon:: prelude:: * ;
66
7+ // TODO: check for duplicate data in the output array
8+ // -> In the current implementation we always add 4 datapoints per bin (if of
9+ // course the bin has >= 4 datapoints). However, the argmin and argmax might
10+ // be the start and end of the bin, which would result in duplicate data in
11+ // the output array. (this is for example the case for monotonic data).
12+
713// --------------------- WITHOUT X
814
915#[ inline( always) ]
@@ -26,7 +32,6 @@ pub(crate) fn m4_generic<T: Copy + PartialOrd>(
2632 . exact_chunks ( block_size)
2733 . into_iter ( )
2834 . enumerate ( )
29- // .take(n_out / 4)
3035 . for_each ( |( i, step) | {
3136 let ( min_index, max_index) = f_argminmax ( step) ;
3237
@@ -95,7 +100,7 @@ pub(crate) fn m4_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
95100#[ inline( always) ]
96101pub ( crate ) fn m4_generic_with_x < T : Copy > (
97102 arr : ArrayView1 < T > ,
98- bin_idx_iterator : impl Iterator < Item = ( usize , usize ) > ,
103+ bin_idx_iterator : impl Iterator < Item = Option < ( usize , usize ) > > ,
99104 n_out : usize ,
100105 f_argminmax : fn ( ArrayView1 < T > ) -> ( usize , usize ) ,
101106) -> Array1 < usize > {
@@ -105,35 +110,43 @@ pub(crate) fn m4_generic_with_x<T: Copy>(
105110 }
106111
107112 let arr_ptr = arr. as_ptr ( ) ;
108- let mut sampled_indices: Array1 < usize > = Array1 :: < usize > :: default ( n_out) ;
109-
110- bin_idx_iterator
111- . enumerate ( )
112- . for_each ( |( i, ( start_idx, end_idx) ) | {
113- let step =
114- unsafe { ArrayView1 :: from_shape_ptr ( end_idx - start_idx, arr_ptr. add ( start_idx) ) } ;
115- let ( min_index, max_index) = f_argminmax ( step) ;
116-
117- sampled_indices[ 4 * i] = start_idx;
118-
119- // Add the indexes in sorted order
120- if min_index < max_index {
121- sampled_indices[ 4 * i + 1 ] = min_index + start_idx;
122- sampled_indices[ 4 * i + 2 ] = max_index + start_idx;
113+ let mut sampled_indices: Vec < usize > = Vec :: with_capacity ( n_out) ;
114+
115+ bin_idx_iterator. for_each ( |bin| {
116+ if let Some ( ( start, end) ) = bin {
117+ if end <= start + 4 {
118+ // If the bin has <= 4 elements, just add them all
119+ for i in start..end {
120+ sampled_indices. push ( i) ;
121+ }
123122 } else {
124- sampled_indices[ 4 * i + 1 ] = max_index + start_idx;
125- sampled_indices[ 4 * i + 2 ] = min_index + start_idx;
123+ // If the bin has > 4 elements, add the first and last + argmin and argmax
124+ let step = unsafe { ArrayView1 :: from_shape_ptr ( end - start, arr_ptr. add ( start) ) } ;
125+ let ( min_index, max_index) = f_argminmax ( step) ;
126+
127+ sampled_indices. push ( start) ;
128+
129+ // Add the indexes in sorted order
130+ if min_index < max_index {
131+ sampled_indices. push ( min_index + start) ;
132+ sampled_indices. push ( max_index + start) ;
133+ } else {
134+ sampled_indices. push ( max_index + start) ;
135+ sampled_indices. push ( min_index + start) ;
136+ }
137+
138+ sampled_indices. push ( end - 1 ) ;
126139 }
127- sampled_indices [ 4 * i + 3 ] = end_idx - 1 ;
128- } ) ;
140+ }
141+ } ) ;
129142
130- sampled_indices
143+ Array1 :: from_vec ( sampled_indices)
131144}
132145
133146#[ inline( always) ]
134147pub ( crate ) fn m4_generic_with_x_parallel < T : Copy + PartialOrd + Send + Sync > (
135148 arr : ArrayView1 < T > ,
136- bin_idx_iterator : impl IndexedParallelIterator < Item = impl Iterator < Item = ( usize , usize ) > > ,
149+ bin_idx_iterator : impl IndexedParallelIterator < Item = impl Iterator < Item = Option < ( usize , usize ) > > > ,
137150 n_out : usize ,
138151 f_argminmax : fn ( ArrayView1 < T > ) -> ( usize , usize ) ,
139152) -> Array1 < usize > {
@@ -146,24 +159,37 @@ pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
146159 bin_idx_iterator
147160 . flat_map ( |bin_idx_iterator| {
148161 bin_idx_iterator
149- . map ( |( start, end) | {
150- let step = unsafe {
151- ArrayView1 :: from_shape_ptr ( end - start, arr. as_ptr ( ) . add ( start) )
152- } ;
153- let ( min_index, max_index) = f_argminmax ( step) ;
154-
155- // Add the indexes in sorted order
156- let mut sampled_index = [ start, 0 , 0 , end - 1 ] ;
157- if min_index < max_index {
158- sampled_index[ 1 ] = min_index + start;
159- sampled_index[ 2 ] = max_index + start;
160- } else {
161- sampled_index[ 1 ] = max_index + start;
162- sampled_index[ 2 ] = min_index + start;
162+ . map ( |bin| {
163+ match bin {
164+ Some ( ( start, end) ) => {
165+ if end <= start + 4 {
166+ // If the bin has <= 4 elements, just return them all
167+ return ( start..end) . collect :: < Vec < usize > > ( ) ;
168+ }
169+
170+ // If the bin has > 4 elements, return the first and last + argmin and argmax
171+ let step = unsafe {
172+ ArrayView1 :: from_shape_ptr ( end - start, arr. as_ptr ( ) . add ( start) )
173+ } ;
174+ let ( min_index, max_index) = f_argminmax ( step) ;
175+
176+ // Return the indexes in sorted order
177+ let mut sampled_index = vec ! [ start, 0 , 0 , end - 1 ] ;
178+ if min_index < max_index {
179+ sampled_index[ 1 ] = min_index + start;
180+ sampled_index[ 2 ] = max_index + start;
181+ } else {
182+ sampled_index[ 1 ] = max_index + start;
183+ sampled_index[ 2 ] = min_index + start;
184+ }
185+ sampled_index
186+ } // If the bin is empty, return empty Vec
187+ None => {
188+ vec ! [ ]
189+ }
163190 }
164- sampled_index
165191 } )
166- . collect :: < Vec < [ usize ; 4 ] > > ( )
192+ . collect :: < Vec < Vec < usize > > > ( )
167193 } )
168194 . flatten ( )
169195 . collect :: < Vec < usize > > ( ) ,
0 commit comments