15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- use arrow_array:: { ArrayAccessor , BooleanArray } ;
18
+ use arrow_array:: { ArrayAccessor , BooleanArray , StringViewArray } ;
19
19
use arrow_schema:: ArrowError ;
20
20
use memchr:: memchr2;
21
21
use memchr:: memmem:: Finder ;
@@ -114,28 +114,103 @@ impl<'a> Predicate<'a> {
114
114
Predicate :: IEqAscii ( v) => BooleanArray :: from_unary ( array, |haystack| {
115
115
haystack. eq_ignore_ascii_case ( v) != negate
116
116
} ) ,
117
- Predicate :: Contains ( finder) => BooleanArray :: from_unary ( array, |haystack| {
118
- finder. find ( haystack. as_bytes ( ) ) . is_some ( ) != negate
119
- } ) ,
120
- Predicate :: StartsWith ( v) => BooleanArray :: from_unary ( array, |haystack| {
121
- starts_with ( haystack, v, equals_kernel) != negate
122
- } ) ,
123
- Predicate :: IStartsWithAscii ( v) => BooleanArray :: from_unary ( array, |haystack| {
124
- starts_with ( haystack, v, equals_ignore_ascii_case_kernel) != negate
125
- } ) ,
126
- Predicate :: EndsWith ( v) => BooleanArray :: from_unary ( array, |haystack| {
127
- ends_with ( haystack, v, equals_kernel) != negate
128
- } ) ,
129
- Predicate :: IEndsWithAscii ( v) => BooleanArray :: from_unary ( array, |haystack| {
130
- ends_with ( haystack, v, equals_ignore_ascii_case_kernel) != negate
131
- } ) ,
117
+ Predicate :: Contains ( finder) => {
118
+ if let Some ( string_view_array) = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) {
119
+ BooleanArray :: from (
120
+ string_view_array
121
+ . bytes_iter ( )
122
+ . map ( |haystack| finder. find ( haystack) . is_some ( ) != negate)
123
+ . collect :: < Vec < _ > > ( ) ,
124
+ )
125
+ } else {
126
+ BooleanArray :: from_unary ( array, |haystack| {
127
+ finder. find ( haystack. as_bytes ( ) ) . is_some ( ) != negate
128
+ } )
129
+ }
130
+ }
131
+ Predicate :: StartsWith ( v) => {
132
+ if let Some ( string_view_array) = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) {
133
+ BooleanArray :: from (
134
+ string_view_array
135
+ . prefix_bytes_iter ( v. len ( ) )
136
+ . map ( |haystack| {
137
+ equals_bytes ( haystack, v. as_bytes ( ) , equals_kernel) != negate
138
+ } )
139
+ . collect :: < Vec < _ > > ( ) ,
140
+ )
141
+ } else {
142
+ BooleanArray :: from_unary ( array, |haystack| {
143
+ starts_with ( haystack, v, equals_kernel) != negate
144
+ } )
145
+ }
146
+ }
147
+ Predicate :: IStartsWithAscii ( v) => {
148
+ if let Some ( string_view_array) = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) {
149
+ BooleanArray :: from (
150
+ string_view_array
151
+ . prefix_bytes_iter ( v. len ( ) )
152
+ . map ( |haystack| {
153
+ equals_bytes (
154
+ haystack,
155
+ v. as_bytes ( ) ,
156
+ equals_ignore_ascii_case_kernel,
157
+ ) != negate
158
+ } )
159
+ . collect :: < Vec < _ > > ( ) ,
160
+ )
161
+ } else {
162
+ BooleanArray :: from_unary ( array, |haystack| {
163
+ starts_with ( haystack, v, equals_ignore_ascii_case_kernel) != negate
164
+ } )
165
+ }
166
+ }
167
+ Predicate :: EndsWith ( v) => {
168
+ if let Some ( string_view_array) = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) {
169
+ BooleanArray :: from (
170
+ string_view_array
171
+ . suffix_bytes_iter ( v. len ( ) )
172
+ . map ( |haystack| {
173
+ equals_bytes ( haystack, v. as_bytes ( ) , equals_kernel) != negate
174
+ } )
175
+ . collect :: < Vec < _ > > ( ) ,
176
+ )
177
+ } else {
178
+ BooleanArray :: from_unary ( array, |haystack| {
179
+ ends_with ( haystack, v, equals_kernel) != negate
180
+ } )
181
+ }
182
+ }
183
+ Predicate :: IEndsWithAscii ( v) => {
184
+ if let Some ( string_view_array) = array. as_any ( ) . downcast_ref :: < StringViewArray > ( ) {
185
+ BooleanArray :: from (
186
+ string_view_array
187
+ . suffix_bytes_iter ( v. len ( ) )
188
+ . map ( |haystack| {
189
+ equals_bytes (
190
+ haystack,
191
+ v. as_bytes ( ) ,
192
+ equals_ignore_ascii_case_kernel,
193
+ ) != negate
194
+ } )
195
+ . collect :: < Vec < _ > > ( ) ,
196
+ )
197
+ } else {
198
+ BooleanArray :: from_unary ( array, |haystack| {
199
+ ends_with ( haystack, v, equals_ignore_ascii_case_kernel) != negate
200
+ } )
201
+ }
202
+ }
132
203
Predicate :: Regex ( v) => {
133
204
BooleanArray :: from_unary ( array, |haystack| v. is_match ( haystack) != negate)
134
205
}
135
206
}
136
207
}
137
208
}
138
209
210
+ fn equals_bytes ( lhs : & [ u8 ] , rhs : & [ u8 ] , byte_eq_kernel : impl Fn ( ( & u8 , & u8 ) ) -> bool ) -> bool {
211
+ lhs. len ( ) == rhs. len ( ) && zip ( lhs, rhs) . all ( byte_eq_kernel)
212
+ }
213
+
139
214
/// This is faster than `str::starts_with` for small strings.
140
215
/// See <https://github.com/apache/arrow-rs/issues/6107> for more details.
141
216
fn starts_with ( haystack : & str , needle : & str , byte_eq_kernel : impl Fn ( ( & u8 , & u8 ) ) -> bool ) -> bool {
@@ -145,7 +220,6 @@ fn starts_with(haystack: &str, needle: &str, byte_eq_kernel: impl Fn((&u8, &u8))
145
220
zip ( haystack. as_bytes ( ) , needle. as_bytes ( ) ) . all ( byte_eq_kernel)
146
221
}
147
222
}
148
-
149
223
/// This is faster than `str::ends_with` for small strings.
150
224
/// See <https://github.com/apache/arrow-rs/issues/6107> for more details.
151
225
fn ends_with ( haystack : & str , needle : & str , byte_eq_kernel : impl Fn ( ( & u8 , & u8 ) ) -> bool ) -> bool {
0 commit comments