Skip to content

Commit d8563ba

Browse files
authored
feat: adds regex_extract function and more type tests (#7107)
* feat: adds format, regex_extract function and more type tests Signed-off-by: Dennis Zhuang <[email protected]> * fix: forgot functions Signed-off-by: Dennis Zhuang <[email protected]> * chore: forgot null type Signed-off-by: Dennis Zhuang <[email protected]> * test: forgot date type Signed-off-by: Dennis Zhuang <[email protected]> * feat: remove format function Signed-off-by: Dennis Zhuang <[email protected]> * test: update results after upgrading datafusion Signed-off-by: Dennis Zhuang <[email protected]> --------- Signed-off-by: Dennis Zhuang <[email protected]>
1 parent 7da2f5e commit d8563ba

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+6502
-15
lines changed

Cargo.lock

Lines changed: 10 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ prost-types = "0.13"
191191
raft-engine = { version = "0.4.1", default-features = false }
192192
rand = "0.9"
193193
ratelimit = "0.10"
194-
regex = "1.8"
194+
regex = "1.12"
195195
regex-automata = "0.4"
196196
reqwest = { version = "0.12", default-features = false, features = [
197197
"json",

src/common/datasource/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ object_store_opendal.workspace = true
3636
orc-rust = { version = "0.6.3", default-features = false, features = ["async"] }
3737
parquet.workspace = true
3838
paste.workspace = true
39-
regex = "1.7"
39+
regex.workspace = true
4040
serde.workspace = true
4141
snafu.workspace = true
4242
strum.workspace = true

src/common/function/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ nalgebra.workspace = true
5151
num = "0.4"
5252
num-traits = "0.2"
5353
paste.workspace = true
54+
regex.workspace = true
5455
s2 = { version = "0.0.12", optional = true }
5556
serde.workspace = true
5657
serde_json.workspace = true

src/common/function/src/function_registry.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use crate::scalars::json::JsonFunction;
3434
use crate::scalars::matches::MatchesFunction;
3535
use crate::scalars::matches_term::MatchesTermFunction;
3636
use crate::scalars::math::MathFunction;
37+
use crate::scalars::string::register_string_functions;
3738
use crate::scalars::timestamp::TimestampFunction;
3839
use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
3940
use crate::scalars::vector::VectorFunction as VectorScalarFunction;
@@ -154,6 +155,9 @@ pub static FUNCTION_REGISTRY: LazyLock<Arc<FunctionRegistry>> = LazyLock::new(||
154155
// Json related functions
155156
JsonFunction::register(&function_registry);
156157

158+
// String related functions
159+
register_string_functions(&function_registry);
160+
157161
// Vector related functions
158162
VectorScalarFunction::register(&function_registry);
159163
VectorAggrFunction::register(&function_registry);

src/common/function/src/scalars.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pub mod json;
2020
pub mod matches;
2121
pub mod matches_term;
2222
pub mod math;
23+
pub(crate) mod string;
2324
pub mod vector;
2425

2526
pub(crate) mod hll_count;

src/common/function/src/scalars/date/date_format.rs

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ use common_query::error;
2020
use common_time::{Date, Timestamp};
2121
use datafusion_common::DataFusionError;
2222
use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder};
23-
use datafusion_common::arrow::datatypes::{ArrowTimestampType, DataType, Date32Type, TimeUnit};
23+
use datafusion_common::arrow::datatypes::{
24+
ArrowTimestampType, DataType, Date32Type, Date64Type, TimeUnit,
25+
};
2426
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature};
2527
use snafu::ResultExt;
2628

@@ -40,6 +42,7 @@ impl Default for DateFormatFunction {
4042
signature: helper::one_of_sigs2(
4143
vec![
4244
DataType::Date32,
45+
DataType::Date64,
4346
DataType::Timestamp(TimeUnit::Second, None),
4447
DataType::Timestamp(TimeUnit::Millisecond, None),
4548
DataType::Timestamp(TimeUnit::Microsecond, None),
@@ -115,6 +118,29 @@ impl Function for DateFormatFunction {
115118
builder.append_option(result.as_deref());
116119
}
117120
}
121+
DataType::Date64 => {
122+
let left = left.as_primitive::<Date64Type>();
123+
for i in 0..size {
124+
let date = left.is_valid(i).then(|| {
125+
let ms = left.value(i);
126+
Timestamp::new_millisecond(ms)
127+
});
128+
let format = formats.is_valid(i).then(|| formats.value(i));
129+
130+
let result = match (date, format) {
131+
(Some(ts), Some(fmt)) => {
132+
Some(ts.as_formatted_string(fmt, Some(timezone)).map_err(|e| {
133+
DataFusionError::Execution(format!(
134+
"cannot format {ts:?} as '{fmt}': {e}"
135+
))
136+
})?)
137+
}
138+
_ => None,
139+
};
140+
141+
builder.append_option(result.as_deref());
142+
}
143+
}
118144
x => {
119145
return Err(DataFusionError::Execution(format!(
120146
"unsupported input data type {x}"
@@ -137,7 +163,9 @@ mod tests {
137163
use std::sync::Arc;
138164

139165
use arrow_schema::Field;
140-
use datafusion_common::arrow::array::{Date32Array, StringArray, TimestampSecondArray};
166+
use datafusion_common::arrow::array::{
167+
Date32Array, Date64Array, StringArray, TimestampSecondArray,
168+
};
141169
use datafusion_common::config::ConfigOptions;
142170
use datafusion_expr::{TypeSignature, Volatility};
143171

@@ -166,7 +194,7 @@ mod tests {
166194
Signature {
167195
type_signature: TypeSignature::OneOf(sigs),
168196
volatility: Volatility::Immutable
169-
} if sigs.len() == 5));
197+
} if sigs.len() == 6));
170198
}
171199

172200
#[test]
@@ -213,6 +241,50 @@ mod tests {
213241
}
214242
}
215243

244+
#[test]
245+
fn test_date64_date_format() {
246+
let f = DateFormatFunction::default();
247+
248+
let dates = vec![Some(123000), None, Some(42000), None];
249+
let formats = vec![
250+
"%Y-%m-%d %T.%3f",
251+
"%Y-%m-%d %T.%3f",
252+
"%Y-%m-%d %T.%3f",
253+
"%Y-%m-%d %T.%3f",
254+
];
255+
let results = [
256+
Some("1970-01-01 00:02:03.000"),
257+
None,
258+
Some("1970-01-01 00:00:42.000"),
259+
None,
260+
];
261+
262+
let mut config_options = ConfigOptions::default();
263+
config_options.extensions.insert(FunctionContext::default());
264+
let config_options = Arc::new(config_options);
265+
266+
let args = ScalarFunctionArgs {
267+
args: vec![
268+
ColumnarValue::Array(Arc::new(Date64Array::from(dates))),
269+
ColumnarValue::Array(Arc::new(StringArray::from_iter_values(formats))),
270+
],
271+
arg_fields: vec![],
272+
number_rows: 4,
273+
return_field: Arc::new(Field::new("x", DataType::Utf8View, false)),
274+
config_options,
275+
};
276+
let result = f
277+
.invoke_with_args(args)
278+
.and_then(|x| x.to_array(4))
279+
.unwrap();
280+
let vector = result.as_string_view();
281+
282+
assert_eq!(4, vector.len());
283+
for (actual, expect) in vector.iter().zip(results) {
284+
assert_eq!(actual, expect);
285+
}
286+
}
287+
216288
#[test]
217289
fn test_date_date_format() {
218290
let f = DateFormatFunction::default();
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright 2023 Greptime Team
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
//! String scalar functions
16+
17+
mod regexp_extract;
18+
19+
pub(crate) use regexp_extract::RegexpExtractFunction;
20+
21+
use crate::function_registry::FunctionRegistry;
22+
23+
/// Register all string functions
24+
pub fn register_string_functions(registry: &FunctionRegistry) {
25+
RegexpExtractFunction::register(registry);
26+
}

0 commit comments

Comments
 (0)