@@ -20,6 +20,8 @@ use arrow_array::{BooleanArray, RecordBatch};
20
20
use arrow_schema:: ArrowError ;
21
21
22
22
/// A predicate operating on [`RecordBatch`]
23
+ ///
24
+ /// See [`RowFilter`] for more information on the use of this trait.
23
25
pub trait ArrowPredicate : Send + ' static {
24
26
/// Returns the [`ProjectionMask`] that describes the columns required
25
27
/// to evaluate this predicate. All projected columns will be provided in the `batch`
@@ -29,7 +31,7 @@ pub trait ArrowPredicate: Send + 'static {
29
31
/// Evaluate this predicate for the given [`RecordBatch`] containing the columns
30
32
/// identified by [`Self::projection`]
31
33
///
32
- /// Must return a [`BooleanArray`] that has the same length as the input
34
+ /// Must return a [`BooleanArray`] that has the same length as the input
33
35
/// `batch` where each row indicates whether the row should be returned:
34
36
/// * `true`:the row should be returned
35
37
/// * `false` or `null`: the row should not be returned
@@ -68,12 +70,17 @@ where
68
70
}
69
71
}
70
72
71
- /// A [`RowFilter`] allows pushing down a filter predicate to skip IO and decode
73
+ /// Filter applied *during* the parquet read process
74
+ ///
75
+ /// [`RowFilter`] applies predicates in order, after decoding only the columns
76
+ /// required. As predicates eliminate rows, fewer rows from subsequent columns
77
+ /// may be required, thus potentially reducing IO and decode.
72
78
///
73
- /// This consists of a list of [`ArrowPredicate`] where only the rows that satisfy all
74
- /// of the predicates will be returned. Any [`RowSelection`] will be applied prior
79
+ /// A `RowFilter` consists of a list of [`ArrowPredicate`]s. Only the rows for which
80
+ /// all the predicates evaluate to `true` will be returned.
81
+ /// Any [`RowSelection`] provided to the reader will be applied prior
75
82
/// to the first predicate, and each predicate in turn will then be used to compute
76
- /// a more refined [`RowSelection`] to use when evaluating the subsequent predicates.
83
+ /// a more refined [`RowSelection`] used when evaluating the subsequent predicates.
77
84
///
78
85
/// Once all predicates have been evaluated, the final [`RowSelection`] is applied
79
86
/// to the top-level [`ProjectionMask`] to produce the final output [`RecordBatch`].
0 commit comments