@@ -26,55 +26,6 @@ use crate::arrow::arrow_reader::{
2626use crate :: errors:: { ParquetError , Result } ;
2727use arrow_array:: Array ;
2828use arrow_select:: filter:: prep_null_mask_filter;
29- use std:: sync:: atomic:: { AtomicUsize , Ordering } ;
30-
31- // The average selector length threshold for choosing between
32- // `RowSelectionStrategy::Mask` and `RowSelectionStrategy::Selectors`.
33- // If the average selector length is less than this value,
34- // `RowSelectionStrategy::Mask` is preferred.
35- const AVG_SELECTOR_LEN_MASK_THRESHOLD : usize = 32 ;
36-
37- // The logic in `preferred_selection_strategy` depends on the constant
38- // `AVG_SELECTOR_LEN_MASK_THRESHOLD`. To allow unit testing of this logic,
39- // we use a mutable global variable that can be temporarily changed during tests.
40- //
41- // An `AtomicUsize` is used because the Rust test runner (`cargo test`) runs tests
42- // in parallel by default. The atomic operations prevent data races between
43- // different test threads that might try to modify this value simultaneously.
44- //
45- // For the production code path, `load(Ordering::Relaxed)` is used. This is the
46- // weakest memory ordering and for a simple load on most modern architectures,
47- // it compiles down to a regular memory read with negligible performance overhead.
48- // The more expensive atomic operations with stronger ordering are only used in the
49- // test-only functions below.
50- static AVG_SELECTOR_LEN_MASK_THRESHOLD_OVERRIDE : AtomicUsize =
51- AtomicUsize :: new ( AVG_SELECTOR_LEN_MASK_THRESHOLD ) ;
52-
53- #[ inline( always) ]
54- fn avg_selector_len_mask_threshold ( ) -> usize {
55- AVG_SELECTOR_LEN_MASK_THRESHOLD_OVERRIDE . load ( Ordering :: Relaxed )
56- }
57-
58- /// An RAII guard that restores the previous value of the override when it is dropped.
59- /// This ensures that any change to the global threshold is temporary and scoped to
60- /// the test or benchmark where it's used, even in the case of a panic.
61- pub struct AvgSelectorLenMaskThresholdGuard {
62- previous : usize ,
63- }
64-
65- impl Drop for AvgSelectorLenMaskThresholdGuard {
66- fn drop ( & mut self ) {
67- AVG_SELECTOR_LEN_MASK_THRESHOLD_OVERRIDE . store ( self . previous , Ordering :: SeqCst ) ;
68- }
69- }
70-
71- /// Override AVG_SELECTOR_LEN_MASK_THRESHOLD (primarily for tests / benchmarks).
72- ///
73- /// Returns an [`AvgSelectorLenMaskThresholdGuard`] that restores the previous value on drop.
74- pub fn set_avg_selector_len_mask_threshold ( value : usize ) -> AvgSelectorLenMaskThresholdGuard {
75- let previous = AVG_SELECTOR_LEN_MASK_THRESHOLD_OVERRIDE . swap ( value, Ordering :: SeqCst ) ;
76- AvgSelectorLenMaskThresholdGuard { previous }
77- }
7829
7930/// A builder for [`ReadPlan`]
8031#[ derive( Clone , Debug ) ]
@@ -102,7 +53,7 @@ impl ReadPlanBuilder {
10253 self
10354 }
10455
105- /// Force a specific strategy when materialising the [`RowSelection`]
56+ /// Configure the strategy to use when materialising the [`RowSelection`]
10657 pub fn with_selection_strategy ( mut self , strategy : RowSelectionStrategy ) -> Self {
10758 self . selection_strategy = strategy;
10859 self
@@ -139,24 +90,45 @@ impl ReadPlanBuilder {
13990
14091 /// Returns the preferred [`RowSelectionStrategy`] for materialising the current selection.
14192 pub fn preferred_selection_strategy ( & self ) -> RowSelectionStrategy {
142- let selection = match self . selection . as_ref ( ) {
143- Some ( selection) => selection,
144- None => return RowSelectionStrategy :: Mask ,
145- } ;
146-
147- let trimmed = selection. clone ( ) . trim ( ) ;
148- let selectors: Vec < RowSelector > = trimmed. into ( ) ;
149- if selectors. is_empty ( ) {
150- return RowSelectionStrategy :: Mask ;
93+ match self . selection_strategy {
94+ RowSelectionStrategy :: Selectors => RowSelectionStrategy :: Selectors ,
95+ RowSelectionStrategy :: Mask => RowSelectionStrategy :: Mask ,
96+ RowSelectionStrategy :: Auto { threshold, .. } => {
97+ let selection = match self . selection . as_ref ( ) {
98+ Some ( selection) => selection,
99+ None => return RowSelectionStrategy :: Mask ,
100+ } ;
101+
102+ let trimmed = selection. clone ( ) . trim ( ) ;
103+ let selectors: Vec < RowSelector > = trimmed. into ( ) ;
104+ if selectors. is_empty ( ) {
105+ return RowSelectionStrategy :: Mask ;
106+ }
107+
108+ let total_rows: usize = selectors. iter ( ) . map ( |s| s. row_count ) . sum ( ) ;
109+ let selector_count = selectors. len ( ) ;
110+ if selector_count == 0 {
111+ return RowSelectionStrategy :: Mask ;
112+ }
113+
114+ if total_rows < selector_count. saturating_mul ( threshold) {
115+ RowSelectionStrategy :: Mask
116+ } else {
117+ RowSelectionStrategy :: Selectors
118+ }
119+ }
151120 }
121+ }
152122
153- let total_rows: usize = selectors. iter ( ) . map ( |s| s. row_count ) . sum ( ) ;
154- let selector_count = selectors. len ( ) ;
155- if total_rows < selector_count. saturating_mul ( avg_selector_len_mask_threshold ( ) ) {
156- RowSelectionStrategy :: Mask
157- } else {
158- RowSelectionStrategy :: Selectors
159- }
123+ /// Returns `true` if the configured strategy allows falling back to selectors for safety.
124+ pub ( crate ) fn selection_strategy_allows_safe_fallback ( & self ) -> bool {
125+ matches ! (
126+ self . selection_strategy,
127+ RowSelectionStrategy :: Auto {
128+ safe_strategy: true ,
129+ ..
130+ }
131+ )
160132 }
161133
162134 /// Evaluates an [`ArrowPredicate`], updating this plan's `selection`
@@ -206,7 +178,10 @@ impl ReadPlanBuilder {
206178 if !self . selects_any ( ) {
207179 self . selection = Some ( RowSelection :: from ( vec ! [ ] ) ) ;
208180 }
209- let selection_strategy = self . selection_strategy ;
181+ let selection_strategy = match self . selection_strategy {
182+ RowSelectionStrategy :: Auto { .. } => self . preferred_selection_strategy ( ) ,
183+ strategy => strategy,
184+ } ;
210185 let Self {
211186 batch_size,
212187 selection,
@@ -355,12 +330,44 @@ mod tests {
355330
356331 #[ test]
357332 fn preferred_selection_strategy_prefers_selectors_when_threshold_small ( ) {
358- let _guard = set_avg_selector_len_mask_threshold ( 1 ) ;
359333 let selection = RowSelection :: from ( vec ! [ RowSelector :: select( 8 ) ] ) ;
360- let builder = builder_with_selection ( selection) ;
334+ let builder =
335+ builder_with_selection ( selection) . with_selection_strategy ( RowSelectionStrategy :: Auto {
336+ threshold : 1 ,
337+ safe_strategy : true ,
338+ } ) ;
361339 assert_eq ! (
362340 builder. preferred_selection_strategy( ) ,
363341 RowSelectionStrategy :: Selectors
364342 ) ;
365343 }
344+
345+ #[ test]
346+ fn selection_strategy_safe_fallback_detection ( ) {
347+ let selection = RowSelection :: from ( vec ! [ RowSelector :: select( 8 ) ] ) ;
348+
349+ let builder_safe = builder_with_selection ( selection. clone ( ) ) . with_selection_strategy (
350+ RowSelectionStrategy :: Auto {
351+ threshold : 32 ,
352+ safe_strategy : true ,
353+ } ,
354+ ) ;
355+ assert ! ( builder_safe. selection_strategy_allows_safe_fallback( ) ) ;
356+
357+ let builder_unsafe = builder_with_selection ( selection. clone ( ) ) . with_selection_strategy (
358+ RowSelectionStrategy :: Auto {
359+ threshold : 32 ,
360+ safe_strategy : false ,
361+ } ,
362+ ) ;
363+ assert ! ( !builder_unsafe. selection_strategy_allows_safe_fallback( ) ) ;
364+
365+ let builder_mask = builder_with_selection ( selection. clone ( ) )
366+ . with_selection_strategy ( RowSelectionStrategy :: Mask ) ;
367+ assert ! ( !builder_mask. selection_strategy_allows_safe_fallback( ) ) ;
368+
369+ let builder_selectors = builder_with_selection ( selection)
370+ . with_selection_strategy ( RowSelectionStrategy :: Selectors ) ;
371+ assert ! ( !builder_selectors. selection_strategy_allows_safe_fallback( ) ) ;
372+ }
366373}
0 commit comments