diff --git a/Cargo.lock b/Cargo.lock index 07b169581701..c2bad3d97129 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1264,7 +1264,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", - "regex-automata 0.4.9", + "regex-automata 0.4.13", "serde", ] @@ -2190,6 +2190,7 @@ dependencies = [ "num-traits", "paste", "pretty_assertions", + "regex", "s2", "serde", "serde_json", @@ -4588,7 +4589,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" dependencies = [ "bit-set", - "regex-automata 0.4.9", + "regex-automata 0.4.13", "regex-syntax 0.8.7", ] @@ -6118,7 +6119,7 @@ dependencies = [ "rand 0.9.1", "rand_chacha 0.9.0", "regex", - "regex-automata 0.4.9", + "regex-automata 0.4.13", "roaring", "serde", "serde_json", @@ -6735,7 +6736,7 @@ version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5baa5e9ff84f1aefd264e6869907646538a52147a755d494517a8007fb48733" dependencies = [ - "regex-automata 0.4.9", + "regex-automata 0.4.13", "rustversion", ] @@ -10469,13 +10470,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", + "regex-automata 0.4.13", "regex-syntax 0.8.7", ] @@ -10490,9 +10491,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", diff --git a/Cargo.toml b/Cargo.toml index a4ce20bfd19a..ebafce51bae9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -191,7 +191,7 @@ prost-types = "0.13" raft-engine = { version = "0.4.1", default-features = false } rand = "0.9" ratelimit = "0.10" -regex = "1.8" +regex = "1.12" regex-automata = "0.4" reqwest = { version = "0.12", default-features = false, features = [ "json", diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 303d05ceb1fb..964f41736c2d 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -36,7 +36,7 @@ object_store_opendal.workspace = true orc-rust = { version = "0.6.3", default-features = false, features = ["async"] } parquet.workspace = true paste.workspace = true -regex = "1.7" +regex.workspace = true serde.workspace = true snafu.workspace = true strum.workspace = true diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index d5b928e2a16f..1d272f5d04e6 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -51,6 +51,7 @@ nalgebra.workspace = true num = "0.4" num-traits = "0.2" paste.workspace = true +regex.workspace = true s2 = { version = "0.0.12", optional = true } serde.workspace = true serde_json.workspace = true diff --git a/src/common/function/src/function_registry.rs b/src/common/function/src/function_registry.rs index 75bb71c63ad1..e51dcf4cb8f5 100644 --- a/src/common/function/src/function_registry.rs +++ b/src/common/function/src/function_registry.rs @@ -34,6 +34,7 @@ use crate::scalars::json::JsonFunction; use crate::scalars::matches::MatchesFunction; use crate::scalars::matches_term::MatchesTermFunction; use crate::scalars::math::MathFunction; +use crate::scalars::string::register_string_functions; use crate::scalars::timestamp::TimestampFunction; use crate::scalars::uddsketch_calc::UddSketchCalcFunction; use crate::scalars::vector::VectorFunction as VectorScalarFunction; @@ -154,6 +155,9 @@ pub static FUNCTION_REGISTRY: LazyLock> = LazyLock::new(|| // Json related functions JsonFunction::register(&function_registry); + // String related functions + register_string_functions(&function_registry); + // Vector related functions VectorScalarFunction::register(&function_registry); VectorAggrFunction::register(&function_registry); diff --git a/src/common/function/src/scalars.rs b/src/common/function/src/scalars.rs index 6f93f2741da1..9a8c9cc3a05c 100644 --- a/src/common/function/src/scalars.rs +++ b/src/common/function/src/scalars.rs @@ -20,6 +20,7 @@ pub mod json; pub mod matches; pub mod matches_term; pub mod math; +pub(crate) mod string; pub mod vector; pub(crate) mod hll_count; diff --git a/src/common/function/src/scalars/date/date_format.rs b/src/common/function/src/scalars/date/date_format.rs index 0e321c957e92..dfa5a444cac4 100644 --- a/src/common/function/src/scalars/date/date_format.rs +++ b/src/common/function/src/scalars/date/date_format.rs @@ -20,7 +20,9 @@ use common_query::error; use common_time::{Date, Timestamp}; use datafusion_common::DataFusionError; use datafusion_common::arrow::array::{Array, AsArray, StringViewBuilder}; -use datafusion_common::arrow::datatypes::{ArrowTimestampType, DataType, Date32Type, TimeUnit}; +use datafusion_common::arrow::datatypes::{ + ArrowTimestampType, DataType, Date32Type, Date64Type, TimeUnit, +}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature}; use snafu::ResultExt; @@ -40,6 +42,7 @@ impl Default for DateFormatFunction { signature: helper::one_of_sigs2( vec![ DataType::Date32, + DataType::Date64, DataType::Timestamp(TimeUnit::Second, None), DataType::Timestamp(TimeUnit::Millisecond, None), DataType::Timestamp(TimeUnit::Microsecond, None), @@ -115,6 +118,29 @@ impl Function for DateFormatFunction { builder.append_option(result.as_deref()); } } + DataType::Date64 => { + let left = left.as_primitive::(); + for i in 0..size { + let date = left.is_valid(i).then(|| { + let ms = left.value(i); + Timestamp::new_millisecond(ms) + }); + let format = formats.is_valid(i).then(|| formats.value(i)); + + let result = match (date, format) { + (Some(ts), Some(fmt)) => { + Some(ts.as_formatted_string(fmt, Some(timezone)).map_err(|e| { + DataFusionError::Execution(format!( + "cannot format {ts:?} as '{fmt}': {e}" + )) + })?) + } + _ => None, + }; + + builder.append_option(result.as_deref()); + } + } x => { return Err(DataFusionError::Execution(format!( "unsupported input data type {x}" @@ -137,7 +163,9 @@ mod tests { use std::sync::Arc; use arrow_schema::Field; - use datafusion_common::arrow::array::{Date32Array, StringArray, TimestampSecondArray}; + use datafusion_common::arrow::array::{ + Date32Array, Date64Array, StringArray, TimestampSecondArray, + }; use datafusion_common::config::ConfigOptions; use datafusion_expr::{TypeSignature, Volatility}; @@ -166,7 +194,7 @@ mod tests { Signature { type_signature: TypeSignature::OneOf(sigs), volatility: Volatility::Immutable - } if sigs.len() == 5)); + } if sigs.len() == 6)); } #[test] @@ -213,6 +241,50 @@ mod tests { } } + #[test] + fn test_date64_date_format() { + let f = DateFormatFunction::default(); + + let dates = vec![Some(123000), None, Some(42000), None]; + let formats = vec![ + "%Y-%m-%d %T.%3f", + "%Y-%m-%d %T.%3f", + "%Y-%m-%d %T.%3f", + "%Y-%m-%d %T.%3f", + ]; + let results = [ + Some("1970-01-01 00:02:03.000"), + None, + Some("1970-01-01 00:00:42.000"), + None, + ]; + + let mut config_options = ConfigOptions::default(); + config_options.extensions.insert(FunctionContext::default()); + let config_options = Arc::new(config_options); + + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(Arc::new(Date64Array::from(dates))), + ColumnarValue::Array(Arc::new(StringArray::from_iter_values(formats))), + ], + arg_fields: vec![], + number_rows: 4, + return_field: Arc::new(Field::new("x", DataType::Utf8View, false)), + config_options, + }; + let result = f + .invoke_with_args(args) + .and_then(|x| x.to_array(4)) + .unwrap(); + let vector = result.as_string_view(); + + assert_eq!(4, vector.len()); + for (actual, expect) in vector.iter().zip(results) { + assert_eq!(actual, expect); + } + } + #[test] fn test_date_date_format() { let f = DateFormatFunction::default(); diff --git a/src/common/function/src/scalars/string.rs b/src/common/function/src/scalars/string.rs new file mode 100644 index 000000000000..95c6201ee2c5 --- /dev/null +++ b/src/common/function/src/scalars/string.rs @@ -0,0 +1,26 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! String scalar functions + +mod regexp_extract; + +pub(crate) use regexp_extract::RegexpExtractFunction; + +use crate::function_registry::FunctionRegistry; + +/// Register all string functions +pub fn register_string_functions(registry: &FunctionRegistry) { + RegexpExtractFunction::register(registry); +} diff --git a/src/common/function/src/scalars/string/regexp_extract.rs b/src/common/function/src/scalars/string/regexp_extract.rs new file mode 100644 index 000000000000..bc78c4df74b1 --- /dev/null +++ b/src/common/function/src/scalars/string/regexp_extract.rs @@ -0,0 +1,339 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Implementation of REGEXP_EXTRACT function +use std::fmt; +use std::sync::Arc; + +use datafusion_common::DataFusionError; +use datafusion_common::arrow::array::{Array, AsArray, LargeStringBuilder}; +use datafusion_common::arrow::compute::cast; +use datafusion_common::arrow::datatypes::DataType; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, TypeSignature, Volatility}; +use regex::{Regex, RegexBuilder}; + +use crate::function::Function; +use crate::function_registry::FunctionRegistry; + +const NAME: &str = "regexp_extract"; + +// Safety limits +const MAX_REGEX_SIZE: usize = 1024 * 1024; // compiled regex heap cap +const MAX_DFA_SIZE: usize = 2 * 1024 * 1024; // lazy DFA cap +const MAX_TOTAL_RESULT_SIZE: usize = 64 * 1024 * 1024; // total batch cap +const MAX_SINGLE_MATCH: usize = 1024 * 1024; // per-row cap +const MAX_PATTERN_LEN: usize = 10_000; // pattern text length cap + +/// REGEXP_EXTRACT function implementation +/// Extracts the first substring matching the given regular expression pattern. +/// If no match is found, returns NULL. +/// +#[derive(Debug)] +pub struct RegexpExtractFunction { + signature: Signature, +} + +impl RegexpExtractFunction { + pub fn register(registry: &FunctionRegistry) { + registry.register_scalar(RegexpExtractFunction::default()); + } +} + +impl Default for RegexpExtractFunction { + fn default() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8]), + TypeSignature::Exact(vec![DataType::Utf8View, DataType::Utf8View]), + TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8View]), + TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8View]), + TypeSignature::Exact(vec![DataType::Utf8View, DataType::LargeUtf8]), + TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]), + TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]), + TypeSignature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]), + TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]), + ], + Volatility::Immutable, + ), + } + } +} + +impl fmt::Display for RegexpExtractFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +impl Function for RegexpExtractFunction { + fn name(&self) -> &str { + NAME + } + + // Always return LargeUtf8 for simplicity and safety + fn return_type(&self, _: &[DataType]) -> datafusion_common::Result { + Ok(DataType::LargeUtf8) + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn invoke_with_args( + &self, + args: ScalarFunctionArgs, + ) -> datafusion_common::Result { + if args.args.len() != 2 { + return Err(DataFusionError::Execution( + "REGEXP_EXTRACT requires exactly two arguments (text, pattern)".to_string(), + )); + } + + // Keep original ColumnarValue variants for scalar-pattern fast path + let pattern_is_scalar = matches!(args.args[1], ColumnarValue::Scalar(_)); + + let arrays = ColumnarValue::values_to_arrays(&args.args)?; + let text_array = &arrays[0]; + let pattern_array = &arrays[1]; + + // Cast both to LargeUtf8 for uniform access (supports Utf8/Utf8View/Dictionary) + let text_large = cast(text_array.as_ref(), &DataType::LargeUtf8).map_err(|e| { + DataFusionError::Execution(format!("REGEXP_EXTRACT: text cast failed: {e}")) + })?; + let pattern_large = cast(pattern_array.as_ref(), &DataType::LargeUtf8).map_err(|e| { + DataFusionError::Execution(format!("REGEXP_EXTRACT: pattern cast failed: {e}")) + })?; + + let text = text_large.as_string::(); + let pattern = pattern_large.as_string::(); + let len = text.len(); + + // Pre-size result builder with conservative estimate + let mut estimated_total = 0usize; + for i in 0..len { + if !text.is_null(i) { + estimated_total = estimated_total.saturating_add(text.value_length(i) as usize); + if estimated_total > MAX_TOTAL_RESULT_SIZE { + return Err(DataFusionError::ResourcesExhausted(format!( + "REGEXP_EXTRACT total output exceeds {} bytes", + MAX_TOTAL_RESULT_SIZE + ))); + } + } + } + let mut builder = LargeStringBuilder::with_capacity(len, estimated_total); + + // Fast path: if pattern is scalar, compile once + let compiled_scalar: Option = if pattern_is_scalar && len > 0 && !pattern.is_null(0) + { + Some(compile_regex_checked(pattern.value(0))?) + } else { + None + }; + + for i in 0..len { + if text.is_null(i) || pattern.is_null(i) { + builder.append_null(); + continue; + } + + let s = text.value(i); + let pat = pattern.value(i); + + // Compile or reuse regex + let re = if let Some(ref compiled) = compiled_scalar { + compiled + } else { + // TODO: For performance-critical applications with repeating patterns, + // consider adding a small LRU cache here + &compile_regex_checked(pat)? + }; + + // First match only + if let Some(m) = re.find(s) { + let m_str = m.as_str(); + if m_str.len() > MAX_SINGLE_MATCH { + return Err(DataFusionError::Execution( + "REGEXP_EXTRACT match exceeds per-row limit (1MB)".to_string(), + )); + } + builder.append_value(m_str); + } else { + builder.append_null(); + } + } + + Ok(ColumnarValue::Array(Arc::new(builder.finish()))) + } +} + +// Compile a regex with safety checks +fn compile_regex_checked(pattern: &str) -> datafusion_common::Result { + if pattern.len() > MAX_PATTERN_LEN { + return Err(DataFusionError::Execution(format!( + "REGEXP_EXTRACT pattern too long (> {} chars)", + MAX_PATTERN_LEN + ))); + } + RegexBuilder::new(pattern) + .size_limit(MAX_REGEX_SIZE) + .dfa_size_limit(MAX_DFA_SIZE) + .build() + .map_err(|e| { + DataFusionError::Execution(format!("REGEXP_EXTRACT invalid pattern '{}': {e}", pattern)) + }) +} + +#[cfg(test)] +mod tests { + use datafusion_common::arrow::array::StringArray; + use datafusion_common::arrow::datatypes::Field; + use datafusion_expr::ScalarFunctionArgs; + + use super::*; + + #[test] + fn test_regexp_extract_function_basic() { + let text_array = Arc::new(StringArray::from(vec!["version 1.2.3", "no match here"])); + let pattern_array = Arc::new(StringArray::from(vec!["\\d+\\.\\d+\\.\\d+", "\\d+"])); + + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(text_array), + ColumnarValue::Array(pattern_array), + ], + arg_fields: vec![ + Arc::new(Field::new("arg_0", DataType::Utf8, false)), + Arc::new(Field::new("arg_1", DataType::Utf8, false)), + ], + return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)), + number_rows: 2, + config_options: Arc::new(datafusion_common::config::ConfigOptions::default()), + }; + + let function = RegexpExtractFunction::default(); + let result = function.invoke_with_args(args).unwrap(); + + if let ColumnarValue::Array(array) = result { + let string_array = array.as_string::(); + assert_eq!(string_array.value(0), "1.2.3"); + assert!(string_array.is_null(1)); // no match should return NULL + } else { + panic!("Expected array result"); + } + } + + #[test] + fn test_regexp_extract_phone_number() { + let text_array = Arc::new(StringArray::from(vec!["Phone: 123-456-7890", "No phone"])); + let pattern_array = Arc::new(StringArray::from(vec![ + "\\d{3}-\\d{3}-\\d{4}", + "\\d{3}-\\d{3}-\\d{4}", + ])); + + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(text_array), + ColumnarValue::Array(pattern_array), + ], + arg_fields: vec![ + Arc::new(Field::new("arg_0", DataType::Utf8, false)), + Arc::new(Field::new("arg_1", DataType::Utf8, false)), + ], + return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)), + number_rows: 2, + config_options: Arc::new(datafusion_common::config::ConfigOptions::default()), + }; + + let function = RegexpExtractFunction::default(); + let result = function.invoke_with_args(args).unwrap(); + + if let ColumnarValue::Array(array) = result { + let string_array = array.as_string::(); + assert_eq!(string_array.value(0), "123-456-7890"); + assert!(string_array.is_null(1)); // no match should return NULL + } else { + panic!("Expected array result"); + } + } + + #[test] + fn test_regexp_extract_email() { + let text_array = Arc::new(StringArray::from(vec![ + "Email: user@domain.com", + "Invalid email", + ])); + let pattern_array = Arc::new(StringArray::from(vec![ + "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+", + "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+", + ])); + + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(text_array), + ColumnarValue::Array(pattern_array), + ], + arg_fields: vec![ + Arc::new(Field::new("arg_0", DataType::Utf8, false)), + Arc::new(Field::new("arg_1", DataType::Utf8, false)), + ], + return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)), + number_rows: 2, + config_options: Arc::new(datafusion_common::config::ConfigOptions::default()), + }; + + let function = RegexpExtractFunction::default(); + let result = function.invoke_with_args(args).unwrap(); + + if let ColumnarValue::Array(array) = result { + let string_array = array.as_string::(); + assert_eq!(string_array.value(0), "user@domain.com"); + assert!(string_array.is_null(1)); // no match should return NULL + } else { + panic!("Expected array result"); + } + } + + #[test] + fn test_regexp_extract_with_nulls() { + let text_array = Arc::new(StringArray::from(vec![Some("test 123"), None])); + let pattern_array = Arc::new(StringArray::from(vec![Some("\\d+"), Some("\\d+")])); + + let args = ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(text_array), + ColumnarValue::Array(pattern_array), + ], + arg_fields: vec![ + Arc::new(Field::new("arg_0", DataType::Utf8, true)), + Arc::new(Field::new("arg_1", DataType::Utf8, false)), + ], + return_field: Arc::new(Field::new("result", DataType::LargeUtf8, true)), + number_rows: 2, + config_options: Arc::new(datafusion_common::config::ConfigOptions::default()), + }; + + let function = RegexpExtractFunction::default(); + let result = function.invoke_with_args(args).unwrap(); + + if let ColumnarValue::Array(array) = result { + let string_array = array.as_string::(); + assert_eq!(string_array.value(0), "123"); + assert!(string_array.is_null(1)); // NULL input should return NULL + } else { + panic!("Expected array result"); + } + } +} diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index 4cc1efb8bc1e..7926ae198ade 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -65,7 +65,7 @@ partition.workspace = true puffin.workspace = true rand.workspace = true rayon = "1.10" -regex = "1.5" +regex.workspace = true rskafka = { workspace = true, optional = true } rstest = { workspace = true, optional = true } rstest_reuse = { workspace = true, optional = true } diff --git a/tests/cases/standalone/common/function/string/concat.result b/tests/cases/standalone/common/function/string/concat.result new file mode 100644 index 000000000000..5c0907d5cb28 --- /dev/null +++ b/tests/cases/standalone/common/function/string/concat.result @@ -0,0 +1,211 @@ +-- String concatenation function tests +-- Test CONCAT function +-- Basic concatenation +SELECT CONCAT('hello', 'world'); + ++-------------------------------------+ +| concat(Utf8("hello"),Utf8("world")) | ++-------------------------------------+ +| helloworld | ++-------------------------------------+ + +SELECT CONCAT('hello', ' ', 'world'); + ++-----------------------------------------------+ +| concat(Utf8("hello"),Utf8(" "),Utf8("world")) | ++-----------------------------------------------+ +| hello world | ++-----------------------------------------------+ + +SELECT CONCAT('a', 'b', 'c', 'd'); + ++-------------------------------------------------+ +| concat(Utf8("a"),Utf8("b"),Utf8("c"),Utf8("d")) | ++-------------------------------------------------+ +| abcd | ++-------------------------------------------------+ + +-- Concatenation with NULL values +SELECT CONCAT('hello', NULL); + ++----------------------------+ +| concat(Utf8("hello"),NULL) | ++----------------------------+ +| hello | ++----------------------------+ + +SELECT CONCAT(NULL, 'world'); + ++----------------------------+ +| concat(NULL,Utf8("world")) | ++----------------------------+ +| world | ++----------------------------+ + +SELECT CONCAT(NULL, NULL); + ++-------------------+ +| concat(NULL,NULL) | ++-------------------+ +| | ++-------------------+ + +-- Concatenation with numbers (should convert to string) +SELECT CONCAT('value: ', 42); + ++-----------------------------------+ +| concat(Utf8("value: "),Int64(42)) | ++-----------------------------------+ +| value: 42 | ++-----------------------------------+ + +SELECT CONCAT(1, 2, 3); + ++------------------------------------+ +| concat(Int64(1),Int64(2),Int64(3)) | ++------------------------------------+ +| 123 | ++------------------------------------+ + +-- Test with table data +CREATE TABLE concat_test(first_name VARCHAR, last_name VARCHAR, age INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO concat_test VALUES + ('John', 'Doe', 30, 1000), + ('Jane', 'Smith', 25, 2000), + ('Bob', NULL, 35, 3000), + (NULL, 'Wilson', 40, 4000); + +Affected Rows: 4 + +-- Concatenate table columns +SELECT CONCAT(first_name, ' ', last_name) as full_name FROM concat_test ORDER BY ts; + ++------------+ +| full_name | ++------------+ +| John Doe | +| Jane Smith | +| Bob | +| Wilson | ++------------+ + +SELECT CONCAT(first_name, ' is ', age, ' years old') FROM concat_test ORDER BY ts; + ++--------------------------------------------------------------------------------+ +| concat(concat_test.first_name,Utf8(" is "),concat_test.age,Utf8(" years old")) | ++--------------------------------------------------------------------------------+ +| John is 30 years old | +| Jane is 25 years old | +| Bob is 35 years old | +| is 40 years old | ++--------------------------------------------------------------------------------+ + +-- Test CONCAT_WS (concat with separator) +SELECT CONCAT_WS(' ', first_name, last_name) as full_name FROM concat_test ORDER BY ts; + ++------------+ +| full_name | ++------------+ +| John Doe | +| Jane Smith | +| Bob | +| Wilson | ++------------+ + +SELECT CONCAT_WS('-', first_name, last_name, age) FROM concat_test ORDER BY ts; + ++-----------------------------------------------------------------------------------+ +| concat_ws(Utf8("-"),concat_test.first_name,concat_test.last_name,concat_test.age) | ++-----------------------------------------------------------------------------------+ +| John-Doe-30 | +| Jane-Smith-25 | +| Bob-35 | +| Wilson-40 | ++-----------------------------------------------------------------------------------+ + +SELECT CONCAT_WS(',', 'a', 'b', 'c', 'd'); + ++--------------------------------------------------------------+ +| concat_ws(Utf8(","),Utf8("a"),Utf8("b"),Utf8("c"),Utf8("d")) | ++--------------------------------------------------------------+ +| a,b,c,d | ++--------------------------------------------------------------+ + +-- CONCAT_WS with NULL values (should skip NULLs) +SELECT CONCAT_WS(' ', 'hello', NULL, 'world'); + ++-------------------------------------------------------+ +| concat_ws(Utf8(" "),Utf8("hello"),NULL,Utf8("world")) | ++-------------------------------------------------------+ +| hello world | ++-------------------------------------------------------+ + +SELECT CONCAT_WS('|', first_name, last_name) FROM concat_test ORDER BY ts; + ++-------------------------------------------------------------------+ +| concat_ws(Utf8("|"),concat_test.first_name,concat_test.last_name) | ++-------------------------------------------------------------------+ +| John|Doe | +| Jane|Smith | +| Bob | +| Wilson | ++-------------------------------------------------------------------+ + +-- Test pipe operator || +SELECT 'hello' || 'world'; + ++--------------------------------+ +| Utf8("hello") || Utf8("world") | ++--------------------------------+ +| helloworld | ++--------------------------------+ + +SELECT 'hello' || ' ' || 'world'; + ++---------------------------------------------+ +| Utf8("hello") || Utf8(" ") || Utf8("world") | ++---------------------------------------------+ +| hello world | ++---------------------------------------------+ + +SELECT first_name || ' ' || last_name FROM concat_test WHERE first_name IS NOT NULL AND last_name IS NOT NULL ORDER BY ts; + ++--------------------------------------------------------------+ +| concat_test.first_name || Utf8(" ") || concat_test.last_name | ++--------------------------------------------------------------+ +| John Doe | +| Jane Smith | ++--------------------------------------------------------------+ + +-- Unicode concatenation +SELECT CONCAT('Hello ', '世界'); + ++-------------------------------------+ +| concat(Utf8("Hello "),Utf8("世界")) | ++-------------------------------------+ +| Hello 世界 | ++-------------------------------------+ + +SELECT CONCAT('🚀', ' ', '🌟'); + ++-----------------------------------------+ +| concat(Utf8("🚀"),Utf8(" "),Utf8("🌟")) | ++-----------------------------------------+ +| 🚀 🌟 | ++-----------------------------------------+ + +SELECT '中文' || '🐄'; + ++----------------------------+ +| Utf8("中文") || Utf8("🐄") | ++----------------------------+ +| 中文🐄 | ++----------------------------+ + +DROP TABLE concat_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/concat.sql b/tests/cases/standalone/common/function/string/concat.sql new file mode 100644 index 000000000000..4f73eed62ee7 --- /dev/null +++ b/tests/cases/standalone/common/function/string/concat.sql @@ -0,0 +1,63 @@ +-- String concatenation function tests +-- Test CONCAT function + +-- Basic concatenation +SELECT CONCAT('hello', 'world'); + +SELECT CONCAT('hello', ' ', 'world'); + +SELECT CONCAT('a', 'b', 'c', 'd'); + +-- Concatenation with NULL values +SELECT CONCAT('hello', NULL); + +SELECT CONCAT(NULL, 'world'); + +SELECT CONCAT(NULL, NULL); + +-- Concatenation with numbers (should convert to string) +SELECT CONCAT('value: ', 42); + +SELECT CONCAT(1, 2, 3); + +-- Test with table data +CREATE TABLE concat_test(first_name VARCHAR, last_name VARCHAR, age INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO concat_test VALUES + ('John', 'Doe', 30, 1000), + ('Jane', 'Smith', 25, 2000), + ('Bob', NULL, 35, 3000), + (NULL, 'Wilson', 40, 4000); + +-- Concatenate table columns +SELECT CONCAT(first_name, ' ', last_name) as full_name FROM concat_test ORDER BY ts; + +SELECT CONCAT(first_name, ' is ', age, ' years old') FROM concat_test ORDER BY ts; + +-- Test CONCAT_WS (concat with separator) +SELECT CONCAT_WS(' ', first_name, last_name) as full_name FROM concat_test ORDER BY ts; + +SELECT CONCAT_WS('-', first_name, last_name, age) FROM concat_test ORDER BY ts; + +SELECT CONCAT_WS(',', 'a', 'b', 'c', 'd'); + +-- CONCAT_WS with NULL values (should skip NULLs) +SELECT CONCAT_WS(' ', 'hello', NULL, 'world'); + +SELECT CONCAT_WS('|', first_name, last_name) FROM concat_test ORDER BY ts; + +-- Test pipe operator || +SELECT 'hello' || 'world'; + +SELECT 'hello' || ' ' || 'world'; + +SELECT first_name || ' ' || last_name FROM concat_test WHERE first_name IS NOT NULL AND last_name IS NOT NULL ORDER BY ts; + +-- Unicode concatenation +SELECT CONCAT('Hello ', '世界'); + +SELECT CONCAT('🚀', ' ', '🌟'); + +SELECT '中文' || '🐄'; + +DROP TABLE concat_test; diff --git a/tests/cases/standalone/common/function/string/length.result b/tests/cases/standalone/common/function/string/length.result new file mode 100644 index 000000000000..e508750626d4 --- /dev/null +++ b/tests/cases/standalone/common/function/string/length.result @@ -0,0 +1,183 @@ +-- String length function tests +-- LENGTH function +SELECT LENGTH('hello'); + ++-----------------------+ +| length(Utf8("hello")) | ++-----------------------+ +| 5 | ++-----------------------+ + +SELECT LENGTH(''); + ++------------------+ +| length(Utf8("")) | ++------------------+ +| 0 | ++------------------+ + +SELECT LENGTH(NULL); + ++--------------+ +| length(NULL) | ++--------------+ +| | ++--------------+ + +SELECT LENGTH('hello world'); + ++-----------------------------+ +| length(Utf8("hello world")) | ++-----------------------------+ +| 11 | ++-----------------------------+ + +-- CHAR_LENGTH (character length) +SELECT CHAR_LENGTH('hello'); + ++----------------------------+ +| char_length(Utf8("hello")) | ++----------------------------+ +| 5 | ++----------------------------+ + +SELECT CHAR_LENGTH(''); + ++-----------------------+ +| char_length(Utf8("")) | ++-----------------------+ +| 0 | ++-----------------------+ + +SELECT CHAR_LENGTH(NULL); + ++-------------------+ +| char_length(NULL) | ++-------------------+ +| | ++-------------------+ + +-- CHARACTER_LENGTH (alias for CHAR_LENGTH) +SELECT CHARACTER_LENGTH('hello world'); + ++---------------------------------------+ +| character_length(Utf8("hello world")) | ++---------------------------------------+ +| 11 | ++---------------------------------------+ + +-- Unicode character length +SELECT LENGTH('世界') AS a, CHAR_LENGTH('世界') AS b; + ++---+---+ +| a | b | ++---+---+ +| 2 | 2 | ++---+---+ + +SELECT LENGTH('🚀🌟') AS a, CHAR_LENGTH('🚀🌟') AS b; + ++---+---+ +| a | b | ++---+---+ +| 2 | 2 | ++---+---+ + +SELECT LENGTH('café') AS a, CHAR_LENGTH('café') AS b; + ++---+---+ +| a | b | ++---+---+ +| 4 | 4 | ++---+---+ + +-- Test with table data +CREATE TABLE length_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO length_test VALUES + ('hello', 1000), + ('world!', 2000), + ('', 3000), + ('中文测试', 4000), + ('🚀🎉🌟', 5000), + (NULL, 6000); + +Affected Rows: 6 + +SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM length_test ORDER BY ts; + ++----------+---+---+ +| s | a | b | ++----------+---+---+ +| hello | 5 | 5 | +| world! | 6 | 6 | +| | 0 | 0 | +| 中文测试 | 4 | 4 | +| 🚀🎉🌟 | 3 | 3 | +| | | | ++----------+---+---+ + +-- BIT_LENGTH (length in bits) +SELECT BIT_LENGTH('hello'); + ++---------------------------+ +| bit_length(Utf8("hello")) | ++---------------------------+ +| 40 | ++---------------------------+ + +SELECT BIT_LENGTH(''); + ++----------------------+ +| bit_length(Utf8("")) | ++----------------------+ +| 0 | ++----------------------+ + +SELECT BIT_LENGTH('世界'); + ++--------------------------+ +| bit_length(Utf8("世界")) | ++--------------------------+ +| 48 | ++--------------------------+ + +-- OCTET_LENGTH (length in bytes) +SELECT OCTET_LENGTH('hello'); + ++-----------------------------+ +| octet_length(Utf8("hello")) | ++-----------------------------+ +| 5 | ++-----------------------------+ + +SELECT OCTET_LENGTH(''); + ++------------------------+ +| octet_length(Utf8("")) | ++------------------------+ +| 0 | ++------------------------+ + +SELECT OCTET_LENGTH('世界'); + ++----------------------------+ +| octet_length(Utf8("世界")) | ++----------------------------+ +| 6 | ++----------------------------+ + +SELECT OCTET_LENGTH('🚀'); + ++--------------------------+ +| octet_length(Utf8("🚀")) | ++--------------------------+ +| 4 | ++--------------------------+ + +DROP TABLE length_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/length.sql b/tests/cases/standalone/common/function/string/length.sql new file mode 100644 index 000000000000..26f683858d22 --- /dev/null +++ b/tests/cases/standalone/common/function/string/length.sql @@ -0,0 +1,58 @@ +-- String length function tests + +-- LENGTH function +SELECT LENGTH('hello'); + +SELECT LENGTH(''); + +SELECT LENGTH(NULL); + +SELECT LENGTH('hello world'); + +-- CHAR_LENGTH (character length) +SELECT CHAR_LENGTH('hello'); + +SELECT CHAR_LENGTH(''); + +SELECT CHAR_LENGTH(NULL); + +-- CHARACTER_LENGTH (alias for CHAR_LENGTH) +SELECT CHARACTER_LENGTH('hello world'); + +-- Unicode character length +SELECT LENGTH('世界') AS a, CHAR_LENGTH('世界') AS b; + +SELECT LENGTH('🚀🌟') AS a, CHAR_LENGTH('🚀🌟') AS b; + +SELECT LENGTH('café') AS a, CHAR_LENGTH('café') AS b; + +-- Test with table data +CREATE TABLE length_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO length_test VALUES + ('hello', 1000), + ('world!', 2000), + ('', 3000), + ('中文测试', 4000), + ('🚀🎉🌟', 5000), + (NULL, 6000); + +SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM length_test ORDER BY ts; + +-- BIT_LENGTH (length in bits) +SELECT BIT_LENGTH('hello'); + +SELECT BIT_LENGTH(''); + +SELECT BIT_LENGTH('世界'); + +-- OCTET_LENGTH (length in bytes) +SELECT OCTET_LENGTH('hello'); + +SELECT OCTET_LENGTH(''); + +SELECT OCTET_LENGTH('世界'); + +SELECT OCTET_LENGTH('🚀'); + +DROP TABLE length_test; diff --git a/tests/cases/standalone/common/function/string/like_pattern.result b/tests/cases/standalone/common/function/string/like_pattern.result new file mode 100644 index 000000000000..515582a1fcd6 --- /dev/null +++ b/tests/cases/standalone/common/function/string/like_pattern.result @@ -0,0 +1,280 @@ +-- String LIKE pattern matching tests +-- Basic LIKE patterns +SELECT 'hello world' LIKE 'hello%'; + ++-----------------------------------------+ +| Utf8("hello world") LIKE Utf8("hello%") | ++-----------------------------------------+ +| true | ++-----------------------------------------+ + +SELECT 'hello world' LIKE '%world'; + ++-----------------------------------------+ +| Utf8("hello world") LIKE Utf8("%world") | ++-----------------------------------------+ +| true | ++-----------------------------------------+ + +SELECT 'hello world' LIKE '%llo%'; + ++----------------------------------------+ +| Utf8("hello world") LIKE Utf8("%llo%") | ++----------------------------------------+ +| true | ++----------------------------------------+ + +SELECT 'hello world' LIKE 'hello_world'; + ++----------------------------------------------+ +| Utf8("hello world") LIKE Utf8("hello_world") | ++----------------------------------------------+ +| true | ++----------------------------------------------+ + +SELECT 'hello world' LIKE 'hello world'; + ++----------------------------------------------+ +| Utf8("hello world") LIKE Utf8("hello world") | ++----------------------------------------------+ +| true | ++----------------------------------------------+ + +-- LIKE with NOT +SELECT 'hello world' NOT LIKE 'goodbye%'; + ++-----------------------------------------------+ +| Utf8("hello world") NOT LIKE Utf8("goodbye%") | ++-----------------------------------------------+ +| true | ++-----------------------------------------------+ + +SELECT 'hello world' NOT LIKE 'hello%'; + ++---------------------------------------------+ +| Utf8("hello world") NOT LIKE Utf8("hello%") | ++---------------------------------------------+ +| false | ++---------------------------------------------+ + +-- Case sensitivity +SELECT 'Hello World' LIKE 'hello%'; + ++-----------------------------------------+ +| Utf8("Hello World") LIKE Utf8("hello%") | ++-----------------------------------------+ +| false | ++-----------------------------------------+ + +SELECT 'Hello World' ILIKE 'hello%'; + ++------------------------------------------+ +| Utf8("Hello World") ILIKE Utf8("hello%") | ++------------------------------------------+ +| true | ++------------------------------------------+ + +SELECT 'Hello World' ILIKE 'HELLO%'; + ++------------------------------------------+ +| Utf8("Hello World") ILIKE Utf8("HELLO%") | ++------------------------------------------+ +| true | ++------------------------------------------+ + +-- Test with table data +CREATE TABLE like_test("name" VARCHAR, email VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO like_test VALUES + ('John Doe', 'john@example.com', 1000), + ('Jane Smith', 'jane@gmail.com', 2000), + ('Bob Wilson', 'bob@yahoo.com', 3000), + ('Alice Johnson', 'alice@company.org', 4000), + ('Charlie Brown', 'charlie@test.net', 5000); + +Affected Rows: 5 + +-- Pattern matching on names +SELECT "name" FROM like_test WHERE "name" LIKE 'J%' ORDER BY ts; + ++------------+ +| name | ++------------+ +| John Doe | +| Jane Smith | ++------------+ + +SELECT "name" FROM like_test WHERE "name" LIKE '%son' ORDER BY ts; + ++---------------+ +| name | ++---------------+ +| Bob Wilson | +| Alice Johnson | ++---------------+ + +-- Contains space +SELECT "name" FROM like_test WHERE "name" LIKE '% %' ORDER BY ts; + ++---------------+ +| name | ++---------------+ +| John Doe | +| Jane Smith | +| Bob Wilson | +| Alice Johnson | +| Charlie Brown | ++---------------+ + +-- Pattern matching on emails +SELECT "name", email FROM like_test WHERE email LIKE '%@gmail.com' ORDER BY ts; + ++------------+----------------+ +| name | email | ++------------+----------------+ +| Jane Smith | jane@gmail.com | ++------------+----------------+ + +SELECT "name", email FROM like_test WHERE email LIKE '%.com' ORDER BY ts; + ++------------+------------------+ +| name | email | ++------------+------------------+ +| John Doe | john@example.com | +| Jane Smith | jane@gmail.com | +| Bob Wilson | bob@yahoo.com | ++------------+------------------+ + +SELECT "name", email FROM like_test WHERE email LIKE '%@%.org' ORDER BY ts; + ++---------------+-------------------+ +| name | email | ++---------------+-------------------+ +| Alice Johnson | alice@company.org | ++---------------+-------------------+ + +-- Underscore wildcard +SELECT "name" FROM like_test WHERE "name" LIKE 'Jo__ ___' ORDER BY ts; + ++----------+ +| name | ++----------+ +| John Doe | ++----------+ + +SELECT email FROM like_test WHERE email LIKE '____@%' ORDER BY ts; + ++------------------+ +| email | ++------------------+ +| john@example.com | +| jane@gmail.com | ++------------------+ + +-- Multiple wildcards +-- Contains 'o' +SELECT "name" FROM like_test WHERE "name" LIKE '%o%' ORDER BY ts; + ++---------------+ +| name | ++---------------+ +| John Doe | +| Bob Wilson | +| Alice Johnson | +| Charlie Brown | ++---------------+ + +-- 'a' before and after @ +SELECT email FROM like_test WHERE email LIKE '%a%@%a%' ORDER BY ts; + ++-------------------+ +| email | ++-------------------+ +| jane@gmail.com | +| alice@company.org | ++-------------------+ + +-- Escaping special characters +CREATE TABLE escape_test("text" VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO escape_test VALUES + ('100% complete', 1000), + ('test_file.txt', 2000), + ('50% done', 3000), + ('backup_2023.sql', 4000); + +Affected Rows: 4 + +-- Need to escape % and _ +-- Contains % +SELECT "text" FROM escape_test WHERE "text" LIKE '%\%%' ORDER BY ts; + ++---------------+ +| text | ++---------------+ +| 100% complete | +| 50% done | ++---------------+ + +-- Contains _ +SELECT "text" FROM escape_test WHERE "text" LIKE '%\_%' ORDER BY ts; + ++-----------------+ +| text | ++-----------------+ +| test_file.txt | +| backup_2023.sql | ++-----------------+ + +-- Unicode pattern matching +CREATE TABLE unicode_like(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO unicode_like VALUES + ('Hello 世界', 1000), + ('🚀 rocket', 2000), + ('café shop', 3000); + +Affected Rows: 3 + +SELECT s FROM unicode_like WHERE s LIKE '%世界' ORDER BY ts; + ++------------+ +| s | ++------------+ +| Hello 世界 | ++------------+ + +SELECT s FROM unicode_like WHERE s LIKE '🚀%' ORDER BY ts; + ++-----------+ +| s | ++-----------+ +| 🚀 rocket | ++-----------+ + +SELECT s FROM unicode_like WHERE s LIKE '%é%' ORDER BY ts; + ++-----------+ +| s | ++-----------+ +| café shop | ++-----------+ + +DROP TABLE like_test; + +Affected Rows: 0 + +DROP TABLE escape_test; + +Affected Rows: 0 + +DROP TABLE unicode_like; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/like_pattern.sql b/tests/cases/standalone/common/function/string/like_pattern.sql new file mode 100644 index 000000000000..460fc42e33bc --- /dev/null +++ b/tests/cases/standalone/common/function/string/like_pattern.sql @@ -0,0 +1,97 @@ +-- String LIKE pattern matching tests + +-- Basic LIKE patterns +SELECT 'hello world' LIKE 'hello%'; + +SELECT 'hello world' LIKE '%world'; + +SELECT 'hello world' LIKE '%llo%'; + +SELECT 'hello world' LIKE 'hello_world'; + +SELECT 'hello world' LIKE 'hello world'; + +-- LIKE with NOT +SELECT 'hello world' NOT LIKE 'goodbye%'; + +SELECT 'hello world' NOT LIKE 'hello%'; + +-- Case sensitivity +SELECT 'Hello World' LIKE 'hello%'; + +SELECT 'Hello World' ILIKE 'hello%'; + +SELECT 'Hello World' ILIKE 'HELLO%'; + +-- Test with table data +CREATE TABLE like_test("name" VARCHAR, email VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO like_test VALUES + ('John Doe', 'john@example.com', 1000), + ('Jane Smith', 'jane@gmail.com', 2000), + ('Bob Wilson', 'bob@yahoo.com', 3000), + ('Alice Johnson', 'alice@company.org', 4000), + ('Charlie Brown', 'charlie@test.net', 5000); + +-- Pattern matching on names +SELECT "name" FROM like_test WHERE "name" LIKE 'J%' ORDER BY ts; + +SELECT "name" FROM like_test WHERE "name" LIKE '%son' ORDER BY ts; + +-- Contains space +SELECT "name" FROM like_test WHERE "name" LIKE '% %' ORDER BY ts; + +-- Pattern matching on emails +SELECT "name", email FROM like_test WHERE email LIKE '%@gmail.com' ORDER BY ts; + +SELECT "name", email FROM like_test WHERE email LIKE '%.com' ORDER BY ts; + +SELECT "name", email FROM like_test WHERE email LIKE '%@%.org' ORDER BY ts; + +-- Underscore wildcard +SELECT "name" FROM like_test WHERE "name" LIKE 'Jo__ ___' ORDER BY ts; + +SELECT email FROM like_test WHERE email LIKE '____@%' ORDER BY ts; + +-- Multiple wildcards +-- Contains 'o' +SELECT "name" FROM like_test WHERE "name" LIKE '%o%' ORDER BY ts; + +-- 'a' before and after @ +SELECT email FROM like_test WHERE email LIKE '%a%@%a%' ORDER BY ts; + +-- Escaping special characters +CREATE TABLE escape_test("text" VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO escape_test VALUES + ('100% complete', 1000), + ('test_file.txt', 2000), + ('50% done', 3000), + ('backup_2023.sql', 4000); + +-- Need to escape % and _ +-- Contains % +SELECT "text" FROM escape_test WHERE "text" LIKE '%\%%' ORDER BY ts; + +-- Contains _ +SELECT "text" FROM escape_test WHERE "text" LIKE '%\_%' ORDER BY ts; + +-- Unicode pattern matching +CREATE TABLE unicode_like(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO unicode_like VALUES + ('Hello 世界', 1000), + ('🚀 rocket', 2000), + ('café shop', 3000); + +SELECT s FROM unicode_like WHERE s LIKE '%世界' ORDER BY ts; + +SELECT s FROM unicode_like WHERE s LIKE '🚀%' ORDER BY ts; + +SELECT s FROM unicode_like WHERE s LIKE '%é%' ORDER BY ts; + +DROP TABLE like_test; + +DROP TABLE escape_test; + +DROP TABLE unicode_like; diff --git a/tests/cases/standalone/common/function/string/position.result b/tests/cases/standalone/common/function/string/position.result new file mode 100644 index 000000000000..1b65fb6fba2c --- /dev/null +++ b/tests/cases/standalone/common/function/string/position.result @@ -0,0 +1,278 @@ +-- String position/search function tests +-- POSITION function +SELECT POSITION('world' IN 'hello world'); + ++-------------------------------------------+ +| strpos(Utf8("hello world"),Utf8("world")) | ++-------------------------------------------+ +| 7 | ++-------------------------------------------+ + +SELECT POSITION('xyz' IN 'hello world'); + ++-----------------------------------------+ +| strpos(Utf8("hello world"),Utf8("xyz")) | ++-----------------------------------------+ +| 0 | ++-----------------------------------------+ + +SELECT POSITION('' IN 'hello world'); + ++--------------------------------------+ +| strpos(Utf8("hello world"),Utf8("")) | ++--------------------------------------+ +| 1 | ++--------------------------------------+ + +SELECT POSITION('world' IN ''); + ++--------------------------------+ +| strpos(Utf8(""),Utf8("world")) | ++--------------------------------+ +| 0 | ++--------------------------------+ + +-- STRPOS function (same as POSITION) +SELECT STRPOS('hello world', 'world'); + ++-------------------------------------------+ +| strpos(Utf8("hello world"),Utf8("world")) | ++-------------------------------------------+ +| 7 | ++-------------------------------------------+ + +SELECT STRPOS('hello world', 'xyz'); + ++-----------------------------------------+ +| strpos(Utf8("hello world"),Utf8("xyz")) | ++-----------------------------------------+ +| 0 | ++-----------------------------------------+ + +SELECT STRPOS('hello world', 'hello'); + ++-------------------------------------------+ +| strpos(Utf8("hello world"),Utf8("hello")) | ++-------------------------------------------+ +| 1 | ++-------------------------------------------+ + +SELECT STRPOS('hello world', 'o'); + ++---------------------------------------+ +| strpos(Utf8("hello world"),Utf8("o")) | ++---------------------------------------+ +| 5 | ++---------------------------------------+ + +-- INSTR function +SELECT INSTR('hello world', 'world'); + ++------------------------------------------+ +| instr(Utf8("hello world"),Utf8("world")) | ++------------------------------------------+ +| 7 | ++------------------------------------------+ + +SELECT INSTR('hello world', 'o'); + ++--------------------------------------+ +| instr(Utf8("hello world"),Utf8("o")) | ++--------------------------------------+ +| 5 | ++--------------------------------------+ + +SELECT INSTR('hello world', 'xyz'); + ++----------------------------------------+ +| instr(Utf8("hello world"),Utf8("xyz")) | ++----------------------------------------+ +| 0 | ++----------------------------------------+ + +-- Case sensitive search +SELECT POSITION('WORLD' IN 'hello world'); + ++-------------------------------------------+ +| strpos(Utf8("hello world"),Utf8("WORLD")) | ++-------------------------------------------+ +| 0 | ++-------------------------------------------+ + +SELECT POSITION('World' IN 'hello world'); + ++-------------------------------------------+ +| strpos(Utf8("hello world"),Utf8("World")) | ++-------------------------------------------+ +| 0 | ++-------------------------------------------+ + +-- LEFT and RIGHT functions +SELECT LEFT('hello world', 5); + ++------------------------------------+ +| left(Utf8("hello world"),Int64(5)) | ++------------------------------------+ +| hello | ++------------------------------------+ + +SELECT RIGHT('hello world', 5); + ++-------------------------------------+ +| right(Utf8("hello world"),Int64(5)) | ++-------------------------------------+ +| world | ++-------------------------------------+ + +-- More than string length +SELECT LEFT('hello', 10); + ++-------------------------------+ +| left(Utf8("hello"),Int64(10)) | ++-------------------------------+ +| hello | ++-------------------------------+ + +-- More than string length +SELECT RIGHT('hello', 10); + ++--------------------------------+ +| right(Utf8("hello"),Int64(10)) | ++--------------------------------+ +| hello | ++--------------------------------+ + +-- Test with NULL values +SELECT POSITION('world' IN NULL); + ++----------------------------+ +| strpos(NULL,Utf8("world")) | ++----------------------------+ +| | ++----------------------------+ + +SELECT POSITION(NULL IN 'hello world'); + ++----------------------------------+ +| strpos(Utf8("hello world"),NULL) | ++----------------------------------+ +| | ++----------------------------------+ + +SELECT LEFT(NULL, 5); + ++---------------------+ +| left(NULL,Int64(5)) | ++---------------------+ +| | ++---------------------+ + +SELECT RIGHT('hello', NULL); + ++---------------------------+ +| right(Utf8("hello"),NULL) | ++---------------------------+ +| | ++---------------------------+ + +-- Test with table data +CREATE TABLE position_test(s VARCHAR, "search" VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO position_test VALUES + ('hello world', 'world', 1000), + ('hello world', 'hello', 2000), + ('hello world', 'xyz', 3000), + ('programming', 'gram', 4000), + ('database', 'base', 5000); + +Affected Rows: 5 + +SELECT s, "search", POSITION("search" IN s) AS a, STRPOS(s, "search") AS b FROM position_test ORDER BY ts; + ++-------------+--------+---+---+ +| s | search | a | b | ++-------------+--------+---+---+ +| hello world | world | 7 | 7 | +| hello world | hello | 1 | 1 | +| hello world | xyz | 0 | 0 | +| programming | gram | 4 | 4 | +| database | base | 5 | 5 | ++-------------+--------+---+---+ + +-- Test LEFT and RIGHT with table data +SELECT s, LEFT(s, 5), RIGHT(s, 5) FROM position_test ORDER BY ts; + ++-------------+--------------------------------+---------------------------------+ +| s | left(position_test.s,Int64(5)) | right(position_test.s,Int64(5)) | ++-------------+--------------------------------+---------------------------------+ +| hello world | hello | world | +| hello world | hello | world | +| hello world | hello | world | +| programming | progr | mming | +| database | datab | abase | ++-------------+--------------------------------+---------------------------------+ + +-- Unicode position tests +SELECT POSITION('世' IN 'hello世界'); + ++--------------------------------------+ +| strpos(Utf8("hello世界"),Utf8("世")) | ++--------------------------------------+ +| 6 | ++--------------------------------------+ + +SELECT POSITION('界' IN 'hello世界'); + ++--------------------------------------+ +| strpos(Utf8("hello世界"),Utf8("界")) | ++--------------------------------------+ +| 7 | ++--------------------------------------+ + +SELECT STRPOS('café shop', 'é'); + ++-------------------------------------+ +| strpos(Utf8("café shop"),Utf8("é")) | ++-------------------------------------+ +| 4 | ++-------------------------------------+ + +SELECT LEFT('中文测试', 2); + ++---------------------------------+ +| left(Utf8("中文测试"),Int64(2)) | ++---------------------------------+ +| 中文 | ++---------------------------------+ + +SELECT RIGHT('中文测试', 2); + ++----------------------------------+ +| right(Utf8("中文测试"),Int64(2)) | ++----------------------------------+ +| 测试 | ++----------------------------------+ + +-- Multiple occurrences (finds first one) +SELECT POSITION('o' IN 'hello world'); + ++---------------------------------------+ +| strpos(Utf8("hello world"),Utf8("o")) | ++---------------------------------------+ +| 5 | ++---------------------------------------+ + +SELECT STRPOS('hello world', 'l'); + ++---------------------------------------+ +| strpos(Utf8("hello world"),Utf8("l")) | ++---------------------------------------+ +| 3 | ++---------------------------------------+ + +DROP TABLE position_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/position.sql b/tests/cases/standalone/common/function/string/position.sql new file mode 100644 index 000000000000..519a9146d316 --- /dev/null +++ b/tests/cases/standalone/common/function/string/position.sql @@ -0,0 +1,84 @@ +-- String position/search function tests + +-- POSITION function +SELECT POSITION('world' IN 'hello world'); + +SELECT POSITION('xyz' IN 'hello world'); + +SELECT POSITION('' IN 'hello world'); + +SELECT POSITION('world' IN ''); + +-- STRPOS function (same as POSITION) +SELECT STRPOS('hello world', 'world'); + +SELECT STRPOS('hello world', 'xyz'); + +SELECT STRPOS('hello world', 'hello'); + +SELECT STRPOS('hello world', 'o'); + +-- INSTR function +SELECT INSTR('hello world', 'world'); + +SELECT INSTR('hello world', 'o'); + +SELECT INSTR('hello world', 'xyz'); + +-- Case sensitive search +SELECT POSITION('WORLD' IN 'hello world'); + +SELECT POSITION('World' IN 'hello world'); + +-- LEFT and RIGHT functions +SELECT LEFT('hello world', 5); + +SELECT RIGHT('hello world', 5); + +-- More than string length +SELECT LEFT('hello', 10); + +-- More than string length +SELECT RIGHT('hello', 10); + +-- Test with NULL values +SELECT POSITION('world' IN NULL); + +SELECT POSITION(NULL IN 'hello world'); + +SELECT LEFT(NULL, 5); + +SELECT RIGHT('hello', NULL); + +-- Test with table data +CREATE TABLE position_test(s VARCHAR, "search" VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO position_test VALUES + ('hello world', 'world', 1000), + ('hello world', 'hello', 2000), + ('hello world', 'xyz', 3000), + ('programming', 'gram', 4000), + ('database', 'base', 5000); + +SELECT s, "search", POSITION("search" IN s) AS a, STRPOS(s, "search") AS b FROM position_test ORDER BY ts; + +-- Test LEFT and RIGHT with table data +SELECT s, LEFT(s, 5), RIGHT(s, 5) FROM position_test ORDER BY ts; + +-- Unicode position tests +SELECT POSITION('世' IN 'hello世界'); + +SELECT POSITION('界' IN 'hello世界'); + +SELECT STRPOS('café shop', 'é'); + +SELECT LEFT('中文测试', 2); + +SELECT RIGHT('中文测试', 2); + +-- Multiple occurrences (finds first one) +SELECT POSITION('o' IN 'hello world'); + +SELECT STRPOS('hello world', 'l'); + +DROP TABLE position_test; diff --git a/tests/cases/standalone/common/function/string/regex.result b/tests/cases/standalone/common/function/string/regex.result new file mode 100644 index 000000000000..b7030f4346cc --- /dev/null +++ b/tests/cases/standalone/common/function/string/regex.result @@ -0,0 +1,143 @@ +-- Regular expression function tests +-- REGEXP_MATCHES function +SELECT regexp_like('hello123world', '\d+'); + ++------------------------------------------------+ +| regexp_like(Utf8("hello123world"),Utf8("\d+")) | ++------------------------------------------------+ +| true | ++------------------------------------------------+ + +SELECT regexp_like('no numbers here', '\d+'); + ++--------------------------------------------------+ +| regexp_like(Utf8("no numbers here"),Utf8("\d+")) | ++--------------------------------------------------+ +| false | ++--------------------------------------------------+ + +SELECT regexp_like('email@example.com', '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+'); + ++-------------------------------------------------------------------------------------+ +| regexp_like(Utf8("email@example.com"),Utf8("[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+")) | ++-------------------------------------------------------------------------------------+ +| true | ++-------------------------------------------------------------------------------------+ + +-- REGEXP_REPLACE function +SELECT REGEXP_REPLACE('hello123world', '\d+', 'XXX'); + ++---------------------------------------------------------------+ +| regexp_replace(Utf8("hello123world"),Utf8("\d+"),Utf8("XXX")) | ++---------------------------------------------------------------+ +| helloXXXworld | ++---------------------------------------------------------------+ + +SELECT REGEXP_REPLACE('phone: 123-456-7890', '\d{3}-\d{3}-\d{4}', 'XXX-XXX-XXXX'); + ++--------------------------------------------------------------------------------------------+ +| regexp_replace(Utf8("phone: 123-456-7890"),Utf8("\d{3}-\d{3}-\d{4}"),Utf8("XXX-XXX-XXXX")) | ++--------------------------------------------------------------------------------------------+ +| phone: XXX-XXX-XXXX | ++--------------------------------------------------------------------------------------------+ + +SELECT REGEXP_REPLACE(' extra spaces ', '\s+', ' '); + ++------------------------------------------------------------------+ +| regexp_replace(Utf8(" extra spaces "),Utf8("\s+"),Utf8(" ")) | ++------------------------------------------------------------------+ +| extra spaces | ++------------------------------------------------------------------+ + +-- REGEXP_EXTRACT function +SELECT REGEXP_EXTRACT('version 1.2.3', '\d+\.\d+\.\d+'); + ++-------------------------------------------------------------+ +| regexp_extract(Utf8("version 1.2.3"),Utf8("\d+\.\d+\.\d+")) | ++-------------------------------------------------------------+ +| 1.2.3 | ++-------------------------------------------------------------+ + +SELECT REGEXP_EXTRACT('no match here', '\d+\.\d+\.\d+'); + ++-------------------------------------------------------------+ +| regexp_extract(Utf8("no match here"),Utf8("\d+\.\d+\.\d+")) | ++-------------------------------------------------------------+ +| | ++-------------------------------------------------------------+ + +-- Test with ~ operator (regex match) +SELECT 'hello123' ~ '\d+'; + ++--------------------------------+ +| Utf8("hello123") ~ Utf8("\d+") | ++--------------------------------+ +| true | ++--------------------------------+ + +SELECT 'hello world' ~ '\d+'; + ++-----------------------------------+ +| Utf8("hello world") ~ Utf8("\d+") | ++-----------------------------------+ +| false | ++-----------------------------------+ + +SELECT 'email@example.com' ~ '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+'; + ++--------------------------------------------------------------------------+ +| Utf8("email@example.com") ~ Utf8("[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+") | ++--------------------------------------------------------------------------+ +| true | ++--------------------------------------------------------------------------+ + +-- Test with table data +CREATE TABLE regex_test("text" VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO regex_test VALUES + ('Phone: 123-456-7890', 1000), + ('Email: user@domain.com', 2000), + ('Version 2.1.0', 3000), + ('No pattern here', 4000); + +Affected Rows: 4 + +SELECT "text", REGEXP_EXTRACT("text", '\d{3}-\d{3}-\d{4}') as phone FROM regex_test ORDER BY ts; + ++------------------------+--------------+ +| text | phone | ++------------------------+--------------+ +| Phone: 123-456-7890 | 123-456-7890 | +| Email: user@domain.com | | +| Version 2.1.0 | | +| No pattern here | | ++------------------------+--------------+ + +SELECT "text", REGEXP_EXTRACT("text", '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+') as email FROM regex_test ORDER BY ts; + ++------------------------+-----------------+ +| text | email | ++------------------------+-----------------+ +| Phone: 123-456-7890 | | +| Email: user@domain.com | user@domain.com | +| Version 2.1.0 | | +| No pattern here | | ++------------------------+-----------------+ + +SELECT "text", REGEXP_EXTRACT("text", '\d+\.\d+\.\d+') as version FROM regex_test ORDER BY ts; + ++------------------------+---------+ +| text | version | ++------------------------+---------+ +| Phone: 123-456-7890 | | +| Email: user@domain.com | | +| Version 2.1.0 | 2.1.0 | +| No pattern here | | ++------------------------+---------+ + +DROP TABLE regex_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/regex.sql b/tests/cases/standalone/common/function/string/regex.sql new file mode 100644 index 000000000000..10e0cbcc4dc0 --- /dev/null +++ b/tests/cases/standalone/common/function/string/regex.sql @@ -0,0 +1,44 @@ +-- Regular expression function tests + +-- REGEXP_MATCHES function +SELECT regexp_like('hello123world', '\d+'); + +SELECT regexp_like('no numbers here', '\d+'); + +SELECT regexp_like('email@example.com', '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+'); + +-- REGEXP_REPLACE function +SELECT REGEXP_REPLACE('hello123world', '\d+', 'XXX'); + +SELECT REGEXP_REPLACE('phone: 123-456-7890', '\d{3}-\d{3}-\d{4}', 'XXX-XXX-XXXX'); + +SELECT REGEXP_REPLACE(' extra spaces ', '\s+', ' '); + +-- REGEXP_EXTRACT function +SELECT REGEXP_EXTRACT('version 1.2.3', '\d+\.\d+\.\d+'); + +SELECT REGEXP_EXTRACT('no match here', '\d+\.\d+\.\d+'); + +-- Test with ~ operator (regex match) +SELECT 'hello123' ~ '\d+'; + +SELECT 'hello world' ~ '\d+'; + +SELECT 'email@example.com' ~ '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+'; + +-- Test with table data +CREATE TABLE regex_test("text" VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO regex_test VALUES + ('Phone: 123-456-7890', 1000), + ('Email: user@domain.com', 2000), + ('Version 2.1.0', 3000), + ('No pattern here', 4000); + +SELECT "text", REGEXP_EXTRACT("text", '\d{3}-\d{3}-\d{4}') as phone FROM regex_test ORDER BY ts; + +SELECT "text", REGEXP_EXTRACT("text", '[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+') as email FROM regex_test ORDER BY ts; + +SELECT "text", REGEXP_EXTRACT("text", '\d+\.\d+\.\d+') as version FROM regex_test ORDER BY ts; + +DROP TABLE regex_test; diff --git a/tests/cases/standalone/common/function/string/repeat.result b/tests/cases/standalone/common/function/string/repeat.result new file mode 100644 index 000000000000..32ecc614d103 --- /dev/null +++ b/tests/cases/standalone/common/function/string/repeat.result @@ -0,0 +1,217 @@ +-- String REPEAT function tests +-- Basic REPEAT function +SELECT REPEAT('hello', 3); + ++--------------------------------+ +| repeat(Utf8("hello"),Int64(3)) | ++--------------------------------+ +| hellohellohello | ++--------------------------------+ + +SELECT REPEAT('a', 5); + ++----------------------------+ +| repeat(Utf8("a"),Int64(5)) | ++----------------------------+ +| aaaaa | ++----------------------------+ + +SELECT REPEAT('', 3); + ++---------------------------+ +| repeat(Utf8(""),Int64(3)) | ++---------------------------+ +| | ++---------------------------+ + +SELECT REPEAT('test', 0); + ++-------------------------------+ +| repeat(Utf8("test"),Int64(0)) | ++-------------------------------+ +| | ++-------------------------------+ + +SELECT REPEAT('test', 1); + ++-------------------------------+ +| repeat(Utf8("test"),Int64(1)) | ++-------------------------------+ +| test | ++-------------------------------+ + +-- REPEAT with NULL values +SELECT REPEAT(NULL, 3); + ++-----------------------+ +| repeat(NULL,Int64(3)) | ++-----------------------+ +| | ++-----------------------+ + +SELECT REPEAT('hello', NULL); + ++----------------------------+ +| repeat(Utf8("hello"),NULL) | ++----------------------------+ +| | ++----------------------------+ + +-- REPEAT with negative numbers +SELECT REPEAT('hello', -1); + ++---------------------------------+ +| repeat(Utf8("hello"),Int64(-1)) | ++---------------------------------+ +| | ++---------------------------------+ + +-- REPEAT with special characters +SELECT REPEAT('*', 10); + ++-----------------------------+ +| repeat(Utf8("*"),Int64(10)) | ++-----------------------------+ +| ********** | ++-----------------------------+ + +SELECT REPEAT('-=', 5); + ++-----------------------------+ +| repeat(Utf8("-="),Int64(5)) | ++-----------------------------+ +| -=-=-=-=-= | ++-----------------------------+ + +SELECT REPEAT('!@#', 3); + ++------------------------------+ +| repeat(Utf8("!@#"),Int64(3)) | ++------------------------------+ +| !@#!@#!@# | ++------------------------------+ + +-- Test with table data +CREATE TABLE repeat_test(s VARCHAR, n INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO repeat_test VALUES + ('hello', 2, 1000), + ('*', 5, 2000), + ('test', 0, 3000), + ('a', 10, 4000), + (NULL, 3, 5000), + ('hi', NULL, 6000); + +Affected Rows: 6 + +SELECT s, n, REPEAT(s, n) FROM repeat_test ORDER BY ts; + ++-------+----+-------------------------------------+ +| s | n | repeat(repeat_test.s,repeat_test.n) | ++-------+----+-------------------------------------+ +| hello | 2 | hellohello | +| * | 5 | ***** | +| test | 0 | | +| a | 10 | aaaaaaaaaa | +| | 3 | | +| hi | | | ++-------+----+-------------------------------------+ + +-- Unicode REPEAT +SELECT REPEAT('世', 3); + ++-----------------------------+ +| repeat(Utf8("世"),Int64(3)) | ++-----------------------------+ +| 世世世 | ++-----------------------------+ + +SELECT REPEAT('🚀', 5); + ++-----------------------------+ +| repeat(Utf8("🚀"),Int64(5)) | ++-----------------------------+ +| 🚀🚀🚀🚀🚀 | ++-----------------------------+ + +SELECT REPEAT('café', 2); + ++-------------------------------+ +| repeat(Utf8("café"),Int64(2)) | ++-------------------------------+ +| cafécafé | ++-------------------------------+ + +-- REPEAT with spaces and formatting +SELECT REPEAT(' ', 10); + ++-----------------------------+ +| repeat(Utf8(" "),Int64(10)) | ++-----------------------------+ +| | ++-----------------------------+ + +SELECT REPEAT('\t', 3); + ++-----------------------------+ +| repeat(Utf8("\t"),Int64(3)) | ++-----------------------------+ +| \t\t\t | ++-----------------------------+ + +SELECT CONCAT('Start', REPEAT('-', 10), 'End'); + ++---------------------------------------------------------------+ +| concat(Utf8("Start"),repeat(Utf8("-"),Int64(10)),Utf8("End")) | ++---------------------------------------------------------------+ +| Start----------End | ++---------------------------------------------------------------+ + +-- Large REPEAT operations +SELECT LENGTH(REPEAT('a', 100)); + ++---------------------------------------+ +| length(repeat(Utf8("a"), Int64(100))) | ++---------------------------------------+ +| 100 | ++---------------------------------------+ + +SELECT LENGTH(REPEAT('ab', 50)); + ++---------------------------------------+ +| length(repeat(Utf8("ab"), Int64(50))) | ++---------------------------------------+ +| 100 | ++---------------------------------------+ + +-- Combining REPEAT with other functions +SELECT UPPER(REPEAT('hello', 3)); + ++---------------------------------------+ +| upper(repeat(Utf8("hello"),Int64(3))) | ++---------------------------------------+ +| HELLOHELLOHELLO | ++---------------------------------------+ + +SELECT REPEAT(UPPER('hello'), 2); + ++---------------------------------------+ +| repeat(upper(Utf8("hello")),Int64(2)) | ++---------------------------------------+ +| HELLOHELLO | ++---------------------------------------+ + +SELECT REVERSE(REPEAT('abc', 3)); + ++---------------------------------------+ +| reverse(repeat(Utf8("abc"),Int64(3))) | ++---------------------------------------+ +| cbacbacba | ++---------------------------------------+ + +DROP TABLE repeat_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/repeat.sql b/tests/cases/standalone/common/function/string/repeat.sql new file mode 100644 index 000000000000..6e75de811386 --- /dev/null +++ b/tests/cases/standalone/common/function/string/repeat.sql @@ -0,0 +1,68 @@ +-- String REPEAT function tests + +-- Basic REPEAT function +SELECT REPEAT('hello', 3); + +SELECT REPEAT('a', 5); + +SELECT REPEAT('', 3); + +SELECT REPEAT('test', 0); + +SELECT REPEAT('test', 1); + +-- REPEAT with NULL values +SELECT REPEAT(NULL, 3); + +SELECT REPEAT('hello', NULL); + +-- REPEAT with negative numbers +SELECT REPEAT('hello', -1); + +-- REPEAT with special characters +SELECT REPEAT('*', 10); + +SELECT REPEAT('-=', 5); + +SELECT REPEAT('!@#', 3); + +-- Test with table data +CREATE TABLE repeat_test(s VARCHAR, n INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO repeat_test VALUES + ('hello', 2, 1000), + ('*', 5, 2000), + ('test', 0, 3000), + ('a', 10, 4000), + (NULL, 3, 5000), + ('hi', NULL, 6000); + +SELECT s, n, REPEAT(s, n) FROM repeat_test ORDER BY ts; + +-- Unicode REPEAT +SELECT REPEAT('世', 3); + +SELECT REPEAT('🚀', 5); + +SELECT REPEAT('café', 2); + +-- REPEAT with spaces and formatting +SELECT REPEAT(' ', 10); + +SELECT REPEAT('\t', 3); + +SELECT CONCAT('Start', REPEAT('-', 10), 'End'); + +-- Large REPEAT operations +SELECT LENGTH(REPEAT('a', 100)); + +SELECT LENGTH(REPEAT('ab', 50)); + +-- Combining REPEAT with other functions +SELECT UPPER(REPEAT('hello', 3)); + +SELECT REPEAT(UPPER('hello'), 2); + +SELECT REVERSE(REPEAT('abc', 3)); + +DROP TABLE repeat_test; diff --git a/tests/cases/standalone/common/function/string/replace.result b/tests/cases/standalone/common/function/string/replace.result new file mode 100644 index 000000000000..a4e1790d34ed --- /dev/null +++ b/tests/cases/standalone/common/function/string/replace.result @@ -0,0 +1,180 @@ +-- String REPLACE function tests +-- Basic REPLACE function +SELECT REPLACE('hello world', 'world', 'universe'); + ++-------------------------------------------------------------+ +| replace(Utf8("hello world"),Utf8("world"),Utf8("universe")) | ++-------------------------------------------------------------+ +| hello universe | ++-------------------------------------------------------------+ + +SELECT REPLACE('hello world', 'xyz', 'abc'); + ++------------------------------------------------------+ +| replace(Utf8("hello world"),Utf8("xyz"),Utf8("abc")) | ++------------------------------------------------------+ +| hello world | ++------------------------------------------------------+ + +SELECT REPLACE('hello hello hello', 'hello', 'hi'); + ++-------------------------------------------------------------+ +| replace(Utf8("hello hello hello"),Utf8("hello"),Utf8("hi")) | ++-------------------------------------------------------------+ +| hi hi hi | ++-------------------------------------------------------------+ + +-- REPLACE with empty strings +SELECT REPLACE('hello world', 'world', ''); + ++-----------------------------------------------------+ +| replace(Utf8("hello world"),Utf8("world"),Utf8("")) | ++-----------------------------------------------------+ +| hello | ++-----------------------------------------------------+ + +SELECT REPLACE('hello world', '', 'xyz'); + ++---------------------------------------------------+ +| replace(Utf8("hello world"),Utf8(""),Utf8("xyz")) | ++---------------------------------------------------+ +| xyzhxyzexyzlxyzlxyzoxyz xyzwxyzoxyzrxyzlxyzdxyz | ++---------------------------------------------------+ + +SELECT REPLACE('', 'xyz', 'abc'); + ++-------------------------------------------+ +| replace(Utf8(""),Utf8("xyz"),Utf8("abc")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ + +-- Case sensitive replacement +SELECT REPLACE('Hello World', 'hello', 'hi'); + ++-------------------------------------------------------+ +| replace(Utf8("Hello World"),Utf8("hello"),Utf8("hi")) | ++-------------------------------------------------------+ +| Hello World | ++-------------------------------------------------------+ + +SELECT REPLACE('Hello World', 'Hello', 'Hi'); + ++-------------------------------------------------------+ +| replace(Utf8("Hello World"),Utf8("Hello"),Utf8("Hi")) | ++-------------------------------------------------------+ +| Hi World | ++-------------------------------------------------------+ + +-- NULL handling +SELECT REPLACE(NULL, 'world', 'universe'); + ++----------------------------------------------+ +| replace(NULL,Utf8("world"),Utf8("universe")) | ++----------------------------------------------+ +| | ++----------------------------------------------+ + +SELECT REPLACE('hello world', NULL, 'universe'); + ++----------------------------------------------------+ +| replace(Utf8("hello world"),NULL,Utf8("universe")) | ++----------------------------------------------------+ +| | ++----------------------------------------------------+ + +SELECT REPLACE('hello world', 'world', NULL); + ++-------------------------------------------------+ +| replace(Utf8("hello world"),Utf8("world"),NULL) | ++-------------------------------------------------+ +| | ++-------------------------------------------------+ + +-- Test with table data +CREATE TABLE replace_test(s VARCHAR, old_str VARCHAR, new_str VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO replace_test VALUES + ('hello world', 'world', 'universe', 1000), + ('programming language', 'language', 'paradigm', 2000), + ('test test test', 'test', 'exam', 3000), + ('no match here', 'xyz', 'abc', 4000); + +Affected Rows: 4 + +SELECT s, old_str, new_str, REPLACE(s, old_str, new_str) FROM replace_test ORDER BY ts; + ++----------------------+----------+----------+-------------------------------------------------------------------+ +| s | old_str | new_str | replace(replace_test.s,replace_test.old_str,replace_test.new_str) | ++----------------------+----------+----------+-------------------------------------------------------------------+ +| hello world | world | universe | hello universe | +| programming language | language | paradigm | programming paradigm | +| test test test | test | exam | exam exam exam | +| no match here | xyz | abc | no match here | ++----------------------+----------+----------+-------------------------------------------------------------------+ + +-- Unicode replacement +SELECT REPLACE('hello 世界', '世界', 'world'); + ++--------------------------------------------------------+ +| replace(Utf8("hello 世界"),Utf8("世界"),Utf8("world")) | ++--------------------------------------------------------+ +| hello world | ++--------------------------------------------------------+ + +SELECT REPLACE('café shop', 'é', 'e'); + ++------------------------------------------------+ +| replace(Utf8("café shop"),Utf8("é"),Utf8("e")) | ++------------------------------------------------+ +| cafe shop | ++------------------------------------------------+ + +SELECT REPLACE('🚀 rocket 🚀', '🚀', '✈️'); + ++-----------------------------------------------------+ +| replace(Utf8("🚀 rocket 🚀"),Utf8("🚀"),Utf8("✈️")) | ++-----------------------------------------------------+ +| ✈️ rocket ✈️ | ++-----------------------------------------------------+ + +-- Multiple character replacement +SELECT REPLACE('hello-world-test', '-', '_'); + ++-------------------------------------------------------+ +| replace(Utf8("hello-world-test"),Utf8("-"),Utf8("_")) | ++-------------------------------------------------------+ +| hello_world_test | ++-------------------------------------------------------+ + +SELECT REPLACE('abc::def::ghi', '::', '-->'); + ++-------------------------------------------------------+ +| replace(Utf8("abc::def::ghi"),Utf8("::"),Utf8("-->")) | ++-------------------------------------------------------+ +| abc-->def-->ghi | ++-------------------------------------------------------+ + +-- Overlapping patterns +SELECT REPLACE('ababab', 'ab', 'xy'); + ++-----------------------------------------------+ +| replace(Utf8("ababab"),Utf8("ab"),Utf8("xy")) | ++-----------------------------------------------+ +| xyxyxy | ++-----------------------------------------------+ + +SELECT REPLACE('aaa', 'aa', 'b'); + ++-------------------------------------------+ +| replace(Utf8("aaa"),Utf8("aa"),Utf8("b")) | ++-------------------------------------------+ +| ba | ++-------------------------------------------+ + +DROP TABLE replace_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/replace.sql b/tests/cases/standalone/common/function/string/replace.sql new file mode 100644 index 000000000000..20006ae7c870 --- /dev/null +++ b/tests/cases/standalone/common/function/string/replace.sql @@ -0,0 +1,57 @@ +-- String REPLACE function tests + +-- Basic REPLACE function +SELECT REPLACE('hello world', 'world', 'universe'); + +SELECT REPLACE('hello world', 'xyz', 'abc'); + +SELECT REPLACE('hello hello hello', 'hello', 'hi'); + +-- REPLACE with empty strings +SELECT REPLACE('hello world', 'world', ''); + +SELECT REPLACE('hello world', '', 'xyz'); + +SELECT REPLACE('', 'xyz', 'abc'); + +-- Case sensitive replacement +SELECT REPLACE('Hello World', 'hello', 'hi'); + +SELECT REPLACE('Hello World', 'Hello', 'Hi'); + +-- NULL handling +SELECT REPLACE(NULL, 'world', 'universe'); + +SELECT REPLACE('hello world', NULL, 'universe'); + +SELECT REPLACE('hello world', 'world', NULL); + +-- Test with table data +CREATE TABLE replace_test(s VARCHAR, old_str VARCHAR, new_str VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO replace_test VALUES + ('hello world', 'world', 'universe', 1000), + ('programming language', 'language', 'paradigm', 2000), + ('test test test', 'test', 'exam', 3000), + ('no match here', 'xyz', 'abc', 4000); + +SELECT s, old_str, new_str, REPLACE(s, old_str, new_str) FROM replace_test ORDER BY ts; + +-- Unicode replacement +SELECT REPLACE('hello 世界', '世界', 'world'); + +SELECT REPLACE('café shop', 'é', 'e'); + +SELECT REPLACE('🚀 rocket 🚀', '🚀', '✈️'); + +-- Multiple character replacement +SELECT REPLACE('hello-world-test', '-', '_'); + +SELECT REPLACE('abc::def::ghi', '::', '-->'); + +-- Overlapping patterns +SELECT REPLACE('ababab', 'ab', 'xy'); + +SELECT REPLACE('aaa', 'aa', 'b'); + +DROP TABLE replace_test; diff --git a/tests/cases/standalone/common/function/string/reverse.result b/tests/cases/standalone/common/function/string/reverse.result new file mode 100644 index 000000000000..00bd73f49bd2 --- /dev/null +++ b/tests/cases/standalone/common/function/string/reverse.result @@ -0,0 +1,200 @@ +-- String REVERSE function tests +-- Basic REVERSE function +SELECT REVERSE('hello'); + ++------------------------+ +| reverse(Utf8("hello")) | ++------------------------+ +| olleh | ++------------------------+ + +SELECT REVERSE('world'); + ++------------------------+ +| reverse(Utf8("world")) | ++------------------------+ +| dlrow | ++------------------------+ + +SELECT REVERSE(''); + ++-------------------+ +| reverse(Utf8("")) | ++-------------------+ +| | ++-------------------+ + +SELECT REVERSE(NULL); + ++---------------+ +| reverse(NULL) | ++---------------+ +| | ++---------------+ + +-- REVERSE with numbers and special characters +SELECT REVERSE('12345'); + ++------------------------+ +| reverse(Utf8("12345")) | ++------------------------+ +| 54321 | ++------------------------+ + +SELECT REVERSE('hello!'); + ++-------------------------+ +| reverse(Utf8("hello!")) | ++-------------------------+ +| !olleh | ++-------------------------+ + +SELECT REVERSE('a!@#$%b'); + ++--------------------------+ +| reverse(Utf8("a!@#$%b")) | ++--------------------------+ +| b%$#@!a | ++--------------------------+ + +-- REVERSE with palindromes +SELECT REVERSE('radar'); + ++------------------------+ +| reverse(Utf8("radar")) | ++------------------------+ +| radar | ++------------------------+ + +SELECT REVERSE('madam'); + ++------------------------+ +| reverse(Utf8("madam")) | ++------------------------+ +| madam | ++------------------------+ + +SELECT REVERSE('racecar'); + ++--------------------------+ +| reverse(Utf8("racecar")) | ++--------------------------+ +| racecar | ++--------------------------+ + +-- Test with table data +CREATE TABLE reverse_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO reverse_test VALUES + ('hello', 1000), + ('world', 2000), + ('12345', 3000), + ('radar', 4000), + ('', 5000), + (NULL, 6000); + +Affected Rows: 6 + +SELECT s, REVERSE(s) FROM reverse_test ORDER BY ts; + ++-------+-------------------------+ +| s | reverse(reverse_test.s) | ++-------+-------------------------+ +| hello | olleh | +| world | dlrow | +| 12345 | 54321 | +| radar | radar | +| | | +| | | ++-------+-------------------------+ + +-- Unicode REVERSE +SELECT REVERSE('世界'); + ++-----------------------+ +| reverse(Utf8("世界")) | ++-----------------------+ +| 界世 | ++-----------------------+ + +SELECT REVERSE('café'); + ++-----------------------+ +| reverse(Utf8("café")) | ++-----------------------+ +| éfac | ++-----------------------+ + +SELECT REVERSE('🚀🌟'); + ++-----------------------+ +| reverse(Utf8("🚀🌟")) | ++-----------------------+ +| 🌟🚀 | ++-----------------------+ + +-- REVERSE with spaces +SELECT REVERSE('hello world'); + ++------------------------------+ +| reverse(Utf8("hello world")) | ++------------------------------+ +| dlrow olleh | ++------------------------------+ + +SELECT REVERSE(' spaces '); + ++-----------------------------+ +| reverse(Utf8(" spaces ")) | ++-----------------------------+ +| secaps | ++-----------------------------+ + +-- Combining REVERSE with other functions +SELECT UPPER(REVERSE('hello')); + ++-------------------------------+ +| upper(reverse(Utf8("hello"))) | ++-------------------------------+ +| OLLEH | ++-------------------------------+ + +SELECT REVERSE(UPPER('hello')); + ++-------------------------------+ +| reverse(upper(Utf8("hello"))) | ++-------------------------------+ +| OLLEH | ++-------------------------------+ + +SELECT LENGTH(REVERSE('hello world')); + ++--------------------------------------+ +| length(reverse(Utf8("hello world"))) | ++--------------------------------------+ +| 11 | ++--------------------------------------+ + +-- Double REVERSE (should return original) +SELECT REVERSE(REVERSE('hello world')); + ++---------------------------------------+ +| reverse(reverse(Utf8("hello world"))) | ++---------------------------------------+ +| hello world | ++---------------------------------------+ + +SELECT REVERSE(REVERSE('中文测试')); + ++------------------------------------+ +| reverse(reverse(Utf8("中文测试"))) | ++------------------------------------+ +| 中文测试 | ++------------------------------------+ + +DROP TABLE reverse_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/reverse.sql b/tests/cases/standalone/common/function/string/reverse.sql new file mode 100644 index 000000000000..f33f359f7391 --- /dev/null +++ b/tests/cases/standalone/common/function/string/reverse.sql @@ -0,0 +1,63 @@ +-- String REVERSE function tests + +-- Basic REVERSE function +SELECT REVERSE('hello'); + +SELECT REVERSE('world'); + +SELECT REVERSE(''); + +SELECT REVERSE(NULL); + +-- REVERSE with numbers and special characters +SELECT REVERSE('12345'); + +SELECT REVERSE('hello!'); + +SELECT REVERSE('a!@#$%b'); + +-- REVERSE with palindromes +SELECT REVERSE('radar'); + +SELECT REVERSE('madam'); + +SELECT REVERSE('racecar'); + +-- Test with table data +CREATE TABLE reverse_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO reverse_test VALUES + ('hello', 1000), + ('world', 2000), + ('12345', 3000), + ('radar', 4000), + ('', 5000), + (NULL, 6000); + +SELECT s, REVERSE(s) FROM reverse_test ORDER BY ts; + +-- Unicode REVERSE +SELECT REVERSE('世界'); + +SELECT REVERSE('café'); + +SELECT REVERSE('🚀🌟'); + +-- REVERSE with spaces +SELECT REVERSE('hello world'); + +SELECT REVERSE(' spaces '); + +-- Combining REVERSE with other functions +SELECT UPPER(REVERSE('hello')); + +SELECT REVERSE(UPPER('hello')); + +SELECT LENGTH(REVERSE('hello world')); + +-- Double REVERSE (should return original) +SELECT REVERSE(REVERSE('hello world')); + +SELECT REVERSE(REVERSE('中文测试')); + +DROP TABLE reverse_test; diff --git a/tests/cases/standalone/common/function/string/string_split.result b/tests/cases/standalone/common/function/string/string_split.result new file mode 100644 index 000000000000..d67adc0a0a23 --- /dev/null +++ b/tests/cases/standalone/common/function/string/string_split.result @@ -0,0 +1,213 @@ +-- Migrated from DuckDB test: test/sql/function/string/test_string_split.test +-- String split function tests +-- Test basic string_split functionality +SELECT string_to_array(NULL, NULL); + ++----------------------------+ +| string_to_array(NULL,NULL) | ++----------------------------+ +| | ++----------------------------+ + +SELECT string_to_array('hello world', ' '); + ++------------------------------------------------+ +| string_to_array(Utf8("hello world"),Utf8(" ")) | ++------------------------------------------------+ +| [hello, world] | ++------------------------------------------------+ + +SELECT string_to_array(NULL, ' '); + ++---------------------------------+ +| string_to_array(NULL,Utf8(" ")) | ++---------------------------------+ +| | ++---------------------------------+ + +SELECT string_to_array('a b c', NULL); + ++-------------------------------------+ +| string_to_array(Utf8("a b c"),NULL) | ++-------------------------------------+ +| [a, , b, , c] | ++-------------------------------------+ + +SELECT string_to_array('a b c', ' '); + ++------------------------------------------+ +| string_to_array(Utf8("a b c"),Utf8(" ")) | ++------------------------------------------+ +| [a, b, c] | ++------------------------------------------+ + +-- Test with table data +CREATE TABLE split_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO split_test VALUES + ('hello,world,test', 1000), + ('a|b|c|d', 2000), + ('no-separator', 3000), + ('', 4000), + (NULL, 5000); + +Affected Rows: 5 + +-- Test splitting with different separators +SELECT s, string_to_array(s, ',') FROM split_test ORDER BY ts; + ++------------------+-----------------------------------------+ +| s | string_to_array(split_test.s,Utf8(",")) | ++------------------+-----------------------------------------+ +| hello,world,test | [hello, world, test] | +| a|b|c|d | [a|b|c|d] | +| no-separator | [no-separator] | +| | [] | +| | | ++------------------+-----------------------------------------+ + +SELECT s, string_to_array(s, '|') FROM split_test ORDER BY ts; + ++------------------+-----------------------------------------+ +| s | string_to_array(split_test.s,Utf8("|")) | ++------------------+-----------------------------------------+ +| hello,world,test | [hello,world,test] | +| a|b|c|d | [a, b, c, d] | +| no-separator | [no-separator] | +| | [] | +| | | ++------------------+-----------------------------------------+ + +SELECT s, string_to_array(s, '-') FROM split_test ORDER BY ts; + ++------------------+-----------------------------------------+ +| s | string_to_array(split_test.s,Utf8("-")) | ++------------------+-----------------------------------------+ +| hello,world,test | [hello,world,test] | +| a|b|c|d | [a|b|c|d] | +| no-separator | [no, separator] | +| | [] | +| | | ++------------------+-----------------------------------------+ + +-- Test splitting with multi-character separator +CREATE TABLE multi_sep_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO multi_sep_test VALUES + ('hello::world::test', 1000), + ('a---b---c', 2000), + ('single', 3000); + +Affected Rows: 3 + +SELECT s, string_to_array(s, '::') FROM multi_sep_test ORDER BY ts; + ++--------------------+----------------------------------------------+ +| s | string_to_array(multi_sep_test.s,Utf8("::")) | ++--------------------+----------------------------------------------+ +| hello::world::test | [hello, world, test] | +| a---b---c | [a---b---c] | +| single | [single] | ++--------------------+----------------------------------------------+ + +SELECT s, string_to_array(s, '---') FROM multi_sep_test ORDER BY ts; + ++--------------------+-----------------------------------------------+ +| s | string_to_array(multi_sep_test.s,Utf8("---")) | ++--------------------+-----------------------------------------------+ +| hello::world::test | [hello::world::test] | +| a---b---c | [a, b, c] | +| single | [single] | ++--------------------+-----------------------------------------------+ + +-- Test with Unicode separators +CREATE TABLE unicode_split_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO unicode_split_test VALUES + ('hello世world世test', 1000), + ('a🦆b🦆c', 2000); + +Affected Rows: 2 + +SELECT s, string_to_array(s, '世') FROM unicode_split_test ORDER BY ts; + ++--------------------+--------------------------------------------------+ +| s | string_to_array(unicode_split_test.s,Utf8("世")) | ++--------------------+--------------------------------------------------+ +| hello世world世test | [hello, world, test] | +| a🦆b🦆c | [a🦆b🦆c] | ++--------------------+--------------------------------------------------+ + +SELECT s, string_to_array(s, '🦆') FROM unicode_split_test ORDER BY ts; + ++--------------------+--------------------------------------------------+ +| s | string_to_array(unicode_split_test.s,Utf8("🦆")) | ++--------------------+--------------------------------------------------+ +| hello世world世test | [hello世world世test] | +| a🦆b🦆c | [a, b, c] | ++--------------------+--------------------------------------------------+ + +-- Test edge cases +-- Empty string +SELECT string_to_array('', ','); + ++-------------------------------------+ +| string_to_array(Utf8(""),Utf8(",")) | ++-------------------------------------+ +| [] | ++-------------------------------------+ + +-- Empty separator +SELECT string_to_array('hello', ''); + ++-----------------------------------------+ +| string_to_array(Utf8("hello"),Utf8("")) | ++-----------------------------------------+ +| [hello] | ++-----------------------------------------+ + +-- Multiple consecutive separators +SELECT string_to_array(',,hello,,world,,', ','); + ++-----------------------------------------------------+ +| string_to_array(Utf8(",,hello,,world,,"),Utf8(",")) | ++-----------------------------------------------------+ +| [, , hello, , world, , ] | ++-----------------------------------------------------+ + +-- Trailing separator +SELECT string_to_array('hello,', ','); + ++-------------------------------------------+ +| string_to_array(Utf8("hello,"),Utf8(",")) | ++-------------------------------------------+ +| [hello, ] | ++-------------------------------------------+ + +-- Leading separator +SELECT string_to_array(',hello', ','); + ++-------------------------------------------+ +| string_to_array(Utf8(",hello"),Utf8(",")) | ++-------------------------------------------+ +| [, hello] | ++-------------------------------------------+ + +DROP TABLE split_test; + +Affected Rows: 0 + +DROP TABLE multi_sep_test; + +Affected Rows: 0 + +DROP TABLE unicode_split_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/string_split.sql b/tests/cases/standalone/common/function/string/string_split.sql new file mode 100644 index 000000000000..ef0be5fff5bf --- /dev/null +++ b/tests/cases/standalone/common/function/string/string_split.sql @@ -0,0 +1,75 @@ +-- Migrated from DuckDB test: test/sql/function/string/test_string_split.test +-- String split function tests + +-- Test basic string_split functionality +SELECT string_to_array(NULL, NULL); + +SELECT string_to_array('hello world', ' '); + +SELECT string_to_array(NULL, ' '); + +SELECT string_to_array('a b c', NULL); + +SELECT string_to_array('a b c', ' '); + +-- Test with table data +CREATE TABLE split_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO split_test VALUES + ('hello,world,test', 1000), + ('a|b|c|d', 2000), + ('no-separator', 3000), + ('', 4000), + (NULL, 5000); + +-- Test splitting with different separators +SELECT s, string_to_array(s, ',') FROM split_test ORDER BY ts; + +SELECT s, string_to_array(s, '|') FROM split_test ORDER BY ts; + +SELECT s, string_to_array(s, '-') FROM split_test ORDER BY ts; + +-- Test splitting with multi-character separator +CREATE TABLE multi_sep_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO multi_sep_test VALUES + ('hello::world::test', 1000), + ('a---b---c', 2000), + ('single', 3000); + +SELECT s, string_to_array(s, '::') FROM multi_sep_test ORDER BY ts; + +SELECT s, string_to_array(s, '---') FROM multi_sep_test ORDER BY ts; + +-- Test with Unicode separators +CREATE TABLE unicode_split_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO unicode_split_test VALUES + ('hello世world世test', 1000), + ('a🦆b🦆c', 2000); + +SELECT s, string_to_array(s, '世') FROM unicode_split_test ORDER BY ts; + +SELECT s, string_to_array(s, '🦆') FROM unicode_split_test ORDER BY ts; + +-- Test edge cases +-- Empty string +SELECT string_to_array('', ','); + +-- Empty separator +SELECT string_to_array('hello', ''); + +-- Multiple consecutive separators +SELECT string_to_array(',,hello,,world,,', ','); + +-- Trailing separator +SELECT string_to_array('hello,', ','); + +-- Leading separator +SELECT string_to_array(',hello', ','); + +DROP TABLE split_test; + +DROP TABLE multi_sep_test; + +DROP TABLE unicode_split_test; diff --git a/tests/cases/standalone/common/function/string/substring.result b/tests/cases/standalone/common/function/string/substring.result new file mode 100644 index 000000000000..642571084a88 --- /dev/null +++ b/tests/cases/standalone/common/function/string/substring.result @@ -0,0 +1,173 @@ +-- Migrated from DuckDB test: test/sql/function/string/test_substring.test +-- Substring function tests +CREATE TABLE strings(s VARCHAR, "off" INTEGER, length INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO strings VALUES + ('hello', 1, 2, 1000), + ('world', 2, 3, 2000), + ('b', 1, 1, 3000), + (NULL, 2, 2, 4000); + +Affected Rows: 4 + +-- Test zero length +SELECT SUBSTRING('🦆ab', 1, 0), SUBSTRING('abc', 1, 0); + ++----------------------------------------+---------------------------------------+ +| substr(Utf8("🦆ab"),Int64(1),Int64(0)) | substr(Utf8("abc"),Int64(1),Int64(0)) | ++----------------------------------------+---------------------------------------+ +| | | ++----------------------------------------+---------------------------------------+ + +-- Normal substring with constant offset/length +SELECT SUBSTRING(s, 1, 2) FROM strings ORDER BY ts; + ++-------------------------------------+ +| substr(strings.s,Int64(1),Int64(2)) | ++-------------------------------------+ +| he | +| wo | +| b | +| | ++-------------------------------------+ + +-- Substring out of range +SELECT SUBSTRING(s, 2, 2) FROM strings ORDER BY ts; + ++-------------------------------------+ +| substr(strings.s,Int64(2),Int64(2)) | ++-------------------------------------+ +| el | +| or | +| | +| | ++-------------------------------------+ + +-- Variable length offset/length +SELECT SUBSTRING(s, "off", "length") FROM strings ORDER BY ts; + ++----------------------------------------------+ +| substr(strings.s,strings.off,strings.length) | ++----------------------------------------------+ +| he | +| orl | +| b | +| | ++----------------------------------------------+ + +SELECT SUBSTRING(s, "off", 2) FROM strings ORDER BY ts; + ++----------------------------------------+ +| substr(strings.s,strings.off,Int64(2)) | ++----------------------------------------+ +| he | +| or | +| b | +| | ++----------------------------------------+ + +SELECT SUBSTRING(s, 1, length) FROM strings ORDER BY ts; + ++-------------------------------------------+ +| substr(strings.s,Int64(1),strings.length) | ++-------------------------------------------+ +| he | +| wor | +| b | +| | ++-------------------------------------------+ + +SELECT SUBSTRING('hello', "off", length) FROM strings ORDER BY ts; + ++--------------------------------------------------+ +| substr(Utf8("hello"),strings.off,strings.length) | ++--------------------------------------------------+ +| he | +| ell | +| h | +| el | ++--------------------------------------------------+ + +-- Test with NULL values +SELECT SUBSTRING(NULL, "off", length) FROM strings ORDER BY ts; + ++-----------------------------------------+ +| substr(NULL,strings.off,strings.length) | ++-----------------------------------------+ +| | +| | +| | +| | ++-----------------------------------------+ + +SELECT SUBSTRING(s, NULL, length) FROM strings ORDER BY ts; + ++---------------------------------------+ +| substr(strings.s,NULL,strings.length) | ++---------------------------------------+ +| | +| | +| | +| | ++---------------------------------------+ + +SELECT SUBSTRING(s, "off", NULL) FROM strings ORDER BY ts; + ++------------------------------------+ +| substr(strings.s,strings.off,NULL) | ++------------------------------------+ +| | +| | +| | +| | ++------------------------------------+ + +-- Test negative offsets +SELECT SUBSTRING('hello', -1, 3); + ++------------------------------------------+ +| substr(Utf8("hello"),Int64(-1),Int64(3)) | ++------------------------------------------+ +| h | ++------------------------------------------+ + +SELECT SUBSTRING('hello', 0, 3); + ++-----------------------------------------+ +| substr(Utf8("hello"),Int64(0),Int64(3)) | ++-----------------------------------------+ +| he | ++-----------------------------------------+ + +-- Test with Unicode characters +CREATE TABLE unicode_strings(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO unicode_strings VALUES + ('Hello 世界', 1000), + ('🦆🦀🐧', 2000), + ('café', 3000); + +Affected Rows: 3 + +SELECT s, SUBSTRING(s, 1, 5), SUBSTRING(s, 7, 2) FROM unicode_strings ORDER BY ts; + ++------------+---------------------------------------------+---------------------------------------------+ +| s | substr(unicode_strings.s,Int64(1),Int64(5)) | substr(unicode_strings.s,Int64(7),Int64(2)) | ++------------+---------------------------------------------+---------------------------------------------+ +| Hello 世界 | Hello | 世界 | +| 🦆🦀🐧 | 🦆🦀🐧 | | +| café | café | | ++------------+---------------------------------------------+---------------------------------------------+ + +DROP TABLE strings; + +Affected Rows: 0 + +DROP TABLE unicode_strings; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/substring.sql b/tests/cases/standalone/common/function/string/substring.sql new file mode 100644 index 000000000000..6c00bbe5f602 --- /dev/null +++ b/tests/cases/standalone/common/function/string/substring.sql @@ -0,0 +1,53 @@ +-- Migrated from DuckDB test: test/sql/function/string/test_substring.test +-- Substring function tests + +CREATE TABLE strings(s VARCHAR, "off" INTEGER, length INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO strings VALUES + ('hello', 1, 2, 1000), + ('world', 2, 3, 2000), + ('b', 1, 1, 3000), + (NULL, 2, 2, 4000); + +-- Test zero length +SELECT SUBSTRING('🦆ab', 1, 0), SUBSTRING('abc', 1, 0); + +-- Normal substring with constant offset/length +SELECT SUBSTRING(s, 1, 2) FROM strings ORDER BY ts; + +-- Substring out of range +SELECT SUBSTRING(s, 2, 2) FROM strings ORDER BY ts; + +-- Variable length offset/length +SELECT SUBSTRING(s, "off", "length") FROM strings ORDER BY ts; + +SELECT SUBSTRING(s, "off", 2) FROM strings ORDER BY ts; + +SELECT SUBSTRING(s, 1, length) FROM strings ORDER BY ts; + +SELECT SUBSTRING('hello', "off", length) FROM strings ORDER BY ts; + +-- Test with NULL values +SELECT SUBSTRING(NULL, "off", length) FROM strings ORDER BY ts; + +SELECT SUBSTRING(s, NULL, length) FROM strings ORDER BY ts; + +SELECT SUBSTRING(s, "off", NULL) FROM strings ORDER BY ts; + +-- Test negative offsets +SELECT SUBSTRING('hello', -1, 3); +SELECT SUBSTRING('hello', 0, 3); + +-- Test with Unicode characters +CREATE TABLE unicode_strings(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO unicode_strings VALUES + ('Hello 世界', 1000), + ('🦆🦀🐧', 2000), + ('café', 3000); + +SELECT s, SUBSTRING(s, 1, 5), SUBSTRING(s, 7, 2) FROM unicode_strings ORDER BY ts; + +DROP TABLE strings; + +DROP TABLE unicode_strings; diff --git a/tests/cases/standalone/common/function/string/trim_pad.result b/tests/cases/standalone/common/function/string/trim_pad.result new file mode 100644 index 000000000000..c29b430180b8 --- /dev/null +++ b/tests/cases/standalone/common/function/string/trim_pad.result @@ -0,0 +1,274 @@ +-- String TRIM and PAD function tests +-- TRIM functions +SELECT TRIM(' hello world '); + ++--------------------------------+ +| btrim(Utf8(" hello world ")) | ++--------------------------------+ +| hello world | ++--------------------------------+ + +SELECT LTRIM(' hello world '); + ++--------------------------------+ +| ltrim(Utf8(" hello world ")) | ++--------------------------------+ +| hello world | ++--------------------------------+ + +SELECT RTRIM(' hello world '); + ++--------------------------------+ +| rtrim(Utf8(" hello world ")) | ++--------------------------------+ +| hello world | ++--------------------------------+ + +-- TRIM with specific characters +SELECT TRIM('x' FROM 'xxxhello worldxxx'); + ++--------------------------------------------+ +| btrim(Utf8("xxxhello worldxxx"),Utf8("x")) | ++--------------------------------------------+ +| hello world | ++--------------------------------------------+ + +SELECT LTRIM('hello world', 'hel'); + ++----------------------------------------+ +| ltrim(Utf8("hello world"),Utf8("hel")) | ++----------------------------------------+ +| o world | ++----------------------------------------+ + +SELECT RTRIM('hello world', 'dlr'); + ++----------------------------------------+ +| rtrim(Utf8("hello world"),Utf8("dlr")) | ++----------------------------------------+ +| hello wo | ++----------------------------------------+ + +-- PAD functions +SELECT LPAD('hello', 10, '*'); + ++-----------------------------------------+ +| lpad(Utf8("hello"),Int64(10),Utf8("*")) | ++-----------------------------------------+ +| *****hello | ++-----------------------------------------+ + +SELECT RPAD('hello', 10, '*'); + ++-----------------------------------------+ +| rpad(Utf8("hello"),Int64(10),Utf8("*")) | ++-----------------------------------------+ +| hello***** | ++-----------------------------------------+ + +-- Truncate +SELECT LPAD('hello', 3, '*'); + ++----------------------------------------+ +| lpad(Utf8("hello"),Int64(3),Utf8("*")) | ++----------------------------------------+ +| hel | ++----------------------------------------+ + +-- Truncate +SELECT RPAD('hello', 3, '*'); + ++----------------------------------------+ +| rpad(Utf8("hello"),Int64(3),Utf8("*")) | ++----------------------------------------+ +| hel | ++----------------------------------------+ + +-- PAD with multi-character padding +SELECT LPAD('test', 10, 'ab'); + ++-----------------------------------------+ +| lpad(Utf8("test"),Int64(10),Utf8("ab")) | ++-----------------------------------------+ +| abababtest | ++-----------------------------------------+ + +SELECT RPAD('test', 10, 'xy'); + ++-----------------------------------------+ +| rpad(Utf8("test"),Int64(10),Utf8("xy")) | ++-----------------------------------------+ +| testxyxyxy | ++-----------------------------------------+ + +-- Test with table data +CREATE TABLE trim_pad_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO trim_pad_test VALUES + (' hello ', 1000), + ('world ', 2000), + (' test', 3000), + ('no-spaces', 4000), + ('', 5000), + (NULL, 6000); + +Affected Rows: 6 + +-- Apply TRIM functions to table data +SELECT s, TRIM(s), LTRIM(s), RTRIM(s) FROM trim_pad_test ORDER BY ts; + ++-----------+------------------------+------------------------+------------------------+ +| s | btrim(trim_pad_test.s) | ltrim(trim_pad_test.s) | rtrim(trim_pad_test.s) | ++-----------+------------------------+------------------------+------------------------+ +| hello | hello | hello | hello | +| world | world | world | world | +| test | test | test | test | +| no-spaces | no-spaces | no-spaces | no-spaces | +| | | | | +| | | | | ++-----------+------------------------+------------------------+------------------------+ + +-- Apply PAD functions +SELECT s, LPAD(TRIM(s), 15, '-'), RPAD(TRIM(s), 15, '+') FROM trim_pad_test WHERE s IS NOT NULL ORDER BY ts; + ++-----------+--------------------------------------------------+--------------------------------------------------+ +| s | lpad(btrim(trim_pad_test.s),Int64(15),Utf8("-")) | rpad(btrim(trim_pad_test.s),Int64(15),Utf8("+")) | ++-----------+--------------------------------------------------+--------------------------------------------------+ +| hello | ----------hello | hello++++++++++ | +| world | ----------world | world++++++++++ | +| test | -----------test | test+++++++++++ | +| no-spaces | ------no-spaces | no-spaces++++++ | +| | --------------- | +++++++++++++++ | ++-----------+--------------------------------------------------+--------------------------------------------------+ + +-- Test with Unicode characters +SELECT TRIM(' 中文测试 '); + ++-----------------------------+ +| btrim(Utf8(" 中文测试 ")) | ++-----------------------------+ +| 中文测试 | ++-----------------------------+ + +SELECT LPAD('🚀', 10, '★'); + ++--------------------------------------+ +| lpad(Utf8("🚀"),Int64(10),Utf8("★")) | ++--------------------------------------+ +| ★★★★★★★★★🚀 | ++--------------------------------------+ + +SELECT RPAD('café', 8, '•'); + ++---------------------------------------+ +| rpad(Utf8("café"),Int64(8),Utf8("•")) | ++---------------------------------------+ +| café•••• | ++---------------------------------------+ + +-- Edge cases +SELECT TRIM(''); + ++-----------------+ +| btrim(Utf8("")) | ++-----------------+ +| | ++-----------------+ + +SELECT TRIM(NULL); + ++-------------+ +| btrim(NULL) | ++-------------+ +| | ++-------------+ + +SELECT LPAD('', 5, '*'); + ++-----------------------------------+ +| lpad(Utf8(""),Int64(5),Utf8("*")) | ++-----------------------------------+ +| ***** | ++-----------------------------------+ + +SELECT RPAD('', 5, '*'); + ++-----------------------------------+ +| rpad(Utf8(""),Int64(5),Utf8("*")) | ++-----------------------------------+ +| ***** | ++-----------------------------------+ + +SELECT LPAD('test', 0, '*'); + ++---------------------------------------+ +| lpad(Utf8("test"),Int64(0),Utf8("*")) | ++---------------------------------------+ +| | ++---------------------------------------+ + +SELECT RPAD('test', 0, '*'); + ++---------------------------------------+ +| rpad(Utf8("test"),Int64(0),Utf8("*")) | ++---------------------------------------+ +| | ++---------------------------------------+ + +-- TRIM with various whitespace characters +SELECT TRIM('\t\nhello\r\n\t'); + ++--------------------------------+ +| btrim(Utf8("\t\nhello\r\n\t")) | ++--------------------------------+ +| \t\nhello\r\n\t | ++--------------------------------+ + +SELECT LTRIM('\t\nhello world'); + ++--------------------------------+ +| ltrim(Utf8("\t\nhello world")) | ++--------------------------------+ +| \t\nhello world | ++--------------------------------+ + +SELECT RTRIM('hello world\r\n'); + ++--------------------------------+ +| rtrim(Utf8("hello world\r\n")) | ++--------------------------------+ +| hello world\r\n | ++--------------------------------+ + +-- Custom TRIM characters +CREATE TABLE custom_trim(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO custom_trim VALUES + ('***hello***', 1000), + ('---world---', 2000), + ('abcTESTabc', 3000); + +Affected Rows: 3 + +SELECT s, TRIM('*' FROM s), TRIM('-' FROM s), TRIM('abc' FROM s) FROM custom_trim ORDER BY ts; + ++-------------+--------------------------------+--------------------------------+----------------------------------+ +| s | btrim(custom_trim.s,Utf8("*")) | btrim(custom_trim.s,Utf8("-")) | btrim(custom_trim.s,Utf8("abc")) | ++-------------+--------------------------------+--------------------------------+----------------------------------+ +| ***hello*** | hello | ***hello*** | ***hello*** | +| ---world--- | ---world--- | world | ---world--- | +| abcTESTabc | abcTESTabc | abcTESTabc | TEST | ++-------------+--------------------------------+--------------------------------+----------------------------------+ + +DROP TABLE trim_pad_test; + +Affected Rows: 0 + +DROP TABLE custom_trim; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/trim_pad.sql b/tests/cases/standalone/common/function/string/trim_pad.sql new file mode 100644 index 000000000000..6c6ba92c6559 --- /dev/null +++ b/tests/cases/standalone/common/function/string/trim_pad.sql @@ -0,0 +1,88 @@ +-- String TRIM and PAD function tests + +-- TRIM functions +SELECT TRIM(' hello world '); + +SELECT LTRIM(' hello world '); + +SELECT RTRIM(' hello world '); + +-- TRIM with specific characters +SELECT TRIM('x' FROM 'xxxhello worldxxx'); + +SELECT LTRIM('hello world', 'hel'); + +SELECT RTRIM('hello world', 'dlr'); + +-- PAD functions +SELECT LPAD('hello', 10, '*'); + +SELECT RPAD('hello', 10, '*'); + +-- Truncate +SELECT LPAD('hello', 3, '*'); + +-- Truncate +SELECT RPAD('hello', 3, '*'); + +-- PAD with multi-character padding +SELECT LPAD('test', 10, 'ab'); + +SELECT RPAD('test', 10, 'xy'); + +-- Test with table data +CREATE TABLE trim_pad_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO trim_pad_test VALUES + (' hello ', 1000), + ('world ', 2000), + (' test', 3000), + ('no-spaces', 4000), + ('', 5000), + (NULL, 6000); + +-- Apply TRIM functions to table data +SELECT s, TRIM(s), LTRIM(s), RTRIM(s) FROM trim_pad_test ORDER BY ts; + +-- Apply PAD functions +SELECT s, LPAD(TRIM(s), 15, '-'), RPAD(TRIM(s), 15, '+') FROM trim_pad_test WHERE s IS NOT NULL ORDER BY ts; + +-- Test with Unicode characters +SELECT TRIM(' 中文测试 '); + +SELECT LPAD('🚀', 10, '★'); + +SELECT RPAD('café', 8, '•'); + +-- Edge cases +SELECT TRIM(''); + +SELECT TRIM(NULL); +SELECT LPAD('', 5, '*'); + +SELECT RPAD('', 5, '*'); + +SELECT LPAD('test', 0, '*'); + +SELECT RPAD('test', 0, '*'); + +-- TRIM with various whitespace characters +SELECT TRIM('\t\nhello\r\n\t'); + +SELECT LTRIM('\t\nhello world'); + +SELECT RTRIM('hello world\r\n'); + +-- Custom TRIM characters +CREATE TABLE custom_trim(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO custom_trim VALUES + ('***hello***', 1000), + ('---world---', 2000), + ('abcTESTabc', 3000); + +SELECT s, TRIM('*' FROM s), TRIM('-' FROM s), TRIM('abc' FROM s) FROM custom_trim ORDER BY ts; + +DROP TABLE trim_pad_test; + +DROP TABLE custom_trim; diff --git a/tests/cases/standalone/common/function/string/upper_lower.result b/tests/cases/standalone/common/function/string/upper_lower.result new file mode 100644 index 000000000000..4f283530ef94 --- /dev/null +++ b/tests/cases/standalone/common/function/string/upper_lower.result @@ -0,0 +1,291 @@ +-- String case conversion function tests +-- Basic UPPER and LOWER functions +SELECT UPPER('hello world'); + ++----------------------------+ +| upper(Utf8("hello world")) | ++----------------------------+ +| HELLO WORLD | ++----------------------------+ + +SELECT LOWER('HELLO WORLD'); + ++----------------------------+ +| lower(Utf8("HELLO WORLD")) | ++----------------------------+ +| hello world | ++----------------------------+ + +SELECT UPPER('MiXeD cAsE'); + ++---------------------------+ +| upper(Utf8("MiXeD cAsE")) | ++---------------------------+ +| MIXED CASE | ++---------------------------+ + +SELECT LOWER('MiXeD cAsE'); + ++---------------------------+ +| lower(Utf8("MiXeD cAsE")) | ++---------------------------+ +| mixed case | ++---------------------------+ + +-- INITCAP (capitalize first letter of each word) +SELECT INITCAP('hello world'); + ++------------------------------+ +| initcap(Utf8("hello world")) | ++------------------------------+ +| Hello World | ++------------------------------+ + +SELECT INITCAP('HELLO WORLD'); + ++------------------------------+ +| initcap(Utf8("HELLO WORLD")) | ++------------------------------+ +| Hello World | ++------------------------------+ + +SELECT INITCAP('mIxEd CaSe TeSt'); + ++----------------------------------+ +| initcap(Utf8("mIxEd CaSe TeSt")) | ++----------------------------------+ +| Mixed Case Test | ++----------------------------------+ + +-- Test with NULL +SELECT UPPER(NULL); + ++-------------+ +| upper(NULL) | ++-------------+ +| | ++-------------+ + +SELECT LOWER(NULL); + ++-------------+ +| lower(NULL) | ++-------------+ +| | ++-------------+ + +SELECT INITCAP(NULL); + ++---------------+ +| initcap(NULL) | ++---------------+ +| | ++---------------+ + +-- Test with numbers and special characters +SELECT UPPER('hello123!@#'); + ++----------------------------+ +| upper(Utf8("hello123!@#")) | ++----------------------------+ +| HELLO123!@# | ++----------------------------+ + +SELECT LOWER('HELLO123!@#'); + ++----------------------------+ +| lower(Utf8("HELLO123!@#")) | ++----------------------------+ +| hello123!@# | ++----------------------------+ + +SELECT INITCAP('hello-world_test'); + ++-----------------------------------+ +| initcap(Utf8("hello-world_test")) | ++-----------------------------------+ +| Hello-World_Test | ++-----------------------------------+ + +-- Test with table data +CREATE TABLE case_test("name" VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO case_test VALUES + ('john doe', 'new york', 1000), + ('JANE SMITH', 'LOS ANGELES', 2000), + ('Bob Wilson', 'Chicago', 3000), + ('alice johnson', 'BOSTON', 4000); + +Affected Rows: 4 + +-- Apply case functions to table data +SELECT "name", UPPER("name"), LOWER("name"), INITCAP("name") FROM case_test ORDER BY ts; + ++---------------+-----------------------+-----------------------+-------------------------+ +| name | upper(case_test.name) | lower(case_test.name) | initcap(case_test.name) | ++---------------+-----------------------+-----------------------+-------------------------+ +| john doe | JOHN DOE | john doe | John Doe | +| JANE SMITH | JANE SMITH | jane smith | Jane Smith | +| Bob Wilson | BOB WILSON | bob wilson | Bob Wilson | +| alice johnson | ALICE JOHNSON | alice johnson | Alice Johnson | ++---------------+-----------------------+-----------------------+-------------------------+ + +SELECT city, UPPER(city), LOWER(city), INITCAP(city) FROM case_test ORDER BY ts; + ++-------------+-----------------------+-----------------------+-------------------------+ +| city | upper(case_test.city) | lower(case_test.city) | initcap(case_test.city) | ++-------------+-----------------------+-----------------------+-------------------------+ +| new york | NEW YORK | new york | New York | +| LOS ANGELES | LOS ANGELES | los angeles | Los Angeles | +| Chicago | CHICAGO | chicago | Chicago | +| BOSTON | BOSTON | boston | Boston | ++-------------+-----------------------+-----------------------+-------------------------+ + +-- Combined case operations +SELECT INITCAP(LOWER("name")) as formatted_name FROM case_test ORDER BY ts; + ++----------------+ +| formatted_name | ++----------------+ +| John Doe | +| Jane Smith | +| Bob Wilson | +| Alice Johnson | ++----------------+ + +-- Unicode case conversion +SELECT UPPER('café'); + ++---------------------+ +| upper(Utf8("café")) | ++---------------------+ +| CAFÉ | ++---------------------+ + +SELECT LOWER('CAFÉ'); + ++---------------------+ +| lower(Utf8("CAFÉ")) | ++---------------------+ +| café | ++---------------------+ + +-- German characters +SELECT UPPER('äöüß'); + ++---------------------+ +| upper(Utf8("äöüß")) | ++---------------------+ +| ÄÖÜSS | ++---------------------+ + +-- German uppercase +SELECT LOWER('ÄÖÜ'); + ++--------------------+ +| lower(Utf8("ÄÖÜ")) | ++--------------------+ +| äöü | ++--------------------+ + +-- Greek letters +SELECT UPPER('αβγ'); + ++--------------------+ +| upper(Utf8("αβγ")) | ++--------------------+ +| ΑΒΓ | ++--------------------+ + +SELECT LOWER('ΑΒΓ'); + ++--------------------+ +| lower(Utf8("ΑΒΓ")) | ++--------------------+ +| αβγ | ++--------------------+ + +-- Test with empty string +SELECT UPPER(''); + ++-----------------+ +| upper(Utf8("")) | ++-----------------+ +| | ++-----------------+ + +SELECT LOWER(''); + ++-----------------+ +| lower(Utf8("")) | ++-----------------+ +| | ++-----------------+ + +SELECT INITCAP(''); + ++-------------------+ +| initcap(Utf8("")) | ++-------------------+ +| | ++-------------------+ + +-- Test with single characters +SELECT UPPER('a'), UPPER('A'), UPPER('1'), UPPER(' '); + ++------------------+------------------+------------------+------------------+ +| upper(Utf8("a")) | upper(Utf8("A")) | upper(Utf8("1")) | upper(Utf8(" ")) | ++------------------+------------------+------------------+------------------+ +| A | A | 1 | | ++------------------+------------------+------------------+------------------+ + +SELECT LOWER('a'), LOWER('A'), LOWER('1'), LOWER(' '); + ++------------------+------------------+------------------+------------------+ +| lower(Utf8("a")) | lower(Utf8("A")) | lower(Utf8("1")) | lower(Utf8(" ")) | ++------------------+------------------+------------------+------------------+ +| a | a | 1 | | ++------------------+------------------+------------------+------------------+ + +SELECT INITCAP('a'), INITCAP('A'), INITCAP('1'); + ++--------------------+--------------------+--------------------+ +| initcap(Utf8("a")) | initcap(Utf8("A")) | initcap(Utf8("1")) | ++--------------------+--------------------+--------------------+ +| A | A | 1 | ++--------------------+--------------------+--------------------+ + +-- Complex Unicode examples +CREATE TABLE unicode_case(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO unicode_case VALUES + ('hello 世界', 1000), + ('HELLO 世界', 2000), + ('café à paris', 3000), + ('CAFÉ À PARIS', 4000); + +Affected Rows: 4 + +SELECT s, UPPER(s), LOWER(s), INITCAP(s) FROM unicode_case ORDER BY ts; + ++--------------+-----------------------+-----------------------+-------------------------+ +| s | upper(unicode_case.s) | lower(unicode_case.s) | initcap(unicode_case.s) | ++--------------+-----------------------+-----------------------+-------------------------+ +| hello 世界 | HELLO 世界 | hello 世界 | Hello 世界 | +| HELLO 世界 | HELLO 世界 | hello 世界 | Hello 世界 | +| café à paris | CAFÉ À PARIS | café à paris | Café À Paris | +| CAFÉ À PARIS | CAFÉ À PARIS | café à paris | Café À Paris | ++--------------+-----------------------+-----------------------+-------------------------+ + +DROP TABLE case_test; + +Affected Rows: 0 + +DROP TABLE unicode_case; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/function/string/upper_lower.sql b/tests/cases/standalone/common/function/string/upper_lower.sql new file mode 100644 index 000000000000..d89f78cfe8b6 --- /dev/null +++ b/tests/cases/standalone/common/function/string/upper_lower.sql @@ -0,0 +1,93 @@ +-- String case conversion function tests + +-- Basic UPPER and LOWER functions +SELECT UPPER('hello world'); + +SELECT LOWER('HELLO WORLD'); + +SELECT UPPER('MiXeD cAsE'); + +SELECT LOWER('MiXeD cAsE'); + +-- INITCAP (capitalize first letter of each word) +SELECT INITCAP('hello world'); + +SELECT INITCAP('HELLO WORLD'); + +SELECT INITCAP('mIxEd CaSe TeSt'); + +-- Test with NULL +SELECT UPPER(NULL); + +SELECT LOWER(NULL); + +SELECT INITCAP(NULL); + +-- Test with numbers and special characters +SELECT UPPER('hello123!@#'); + +SELECT LOWER('HELLO123!@#'); + +SELECT INITCAP('hello-world_test'); + +-- Test with table data +CREATE TABLE case_test("name" VARCHAR, city VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO case_test VALUES + ('john doe', 'new york', 1000), + ('JANE SMITH', 'LOS ANGELES', 2000), + ('Bob Wilson', 'Chicago', 3000), + ('alice johnson', 'BOSTON', 4000); + +-- Apply case functions to table data +SELECT "name", UPPER("name"), LOWER("name"), INITCAP("name") FROM case_test ORDER BY ts; + +SELECT city, UPPER(city), LOWER(city), INITCAP(city) FROM case_test ORDER BY ts; + +-- Combined case operations +SELECT INITCAP(LOWER("name")) as formatted_name FROM case_test ORDER BY ts; + +-- Unicode case conversion +SELECT UPPER('café'); + +SELECT LOWER('CAFÉ'); + +-- German characters +SELECT UPPER('äöüß'); + +-- German uppercase +SELECT LOWER('ÄÖÜ'); + +-- Greek letters +SELECT UPPER('αβγ'); + +SELECT LOWER('ΑΒΓ'); + +-- Test with empty string +SELECT UPPER(''); + +SELECT LOWER(''); + +SELECT INITCAP(''); + +-- Test with single characters +SELECT UPPER('a'), UPPER('A'), UPPER('1'), UPPER(' '); + +SELECT LOWER('a'), LOWER('A'), LOWER('1'), LOWER(' '); + +SELECT INITCAP('a'), INITCAP('A'), INITCAP('1'); + +-- Complex Unicode examples +CREATE TABLE unicode_case(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO unicode_case VALUES + ('hello 世界', 1000), + ('HELLO 世界', 2000), + ('café à paris', 3000), + ('CAFÉ À PARIS', 4000); + +SELECT s, UPPER(s), LOWER(s), INITCAP(s) FROM unicode_case ORDER BY ts; + +DROP TABLE case_test; + +DROP TABLE unicode_case; diff --git a/tests/cases/standalone/common/order/nulls_first_last.result b/tests/cases/standalone/common/order/nulls_first_last.result new file mode 100644 index 000000000000..3bf957072906 --- /dev/null +++ b/tests/cases/standalone/common/order/nulls_first_last.result @@ -0,0 +1,141 @@ +-- Migrated from DuckDB test: test/sql/order/test_nulls_first.test +-- Test NULLS FIRST/NULLS LAST +CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO integers VALUES (1, 1000), (NULL, 2000); + +Affected Rows: 2 + +-- Default NULL ordering (usually NULLS LAST in most systems) +SELECT i FROM integers ORDER BY i; + ++---+ +| i | ++---+ +| 1 | +| | ++---+ + +-- Explicit NULLS FIRST +SELECT i FROM integers ORDER BY i NULLS FIRST; + ++---+ +| i | ++---+ +| | +| 1 | ++---+ + +-- Explicit NULLS LAST +SELECT i FROM integers ORDER BY i NULLS LAST; + ++---+ +| i | ++---+ +| 1 | +| | ++---+ + +-- Multiple columns with mixed NULL handling +CREATE TABLE test(i INTEGER, j INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO test VALUES (1, 1, 1000), (NULL, 1, 2000), (1, NULL, 3000); + +Affected Rows: 3 + +SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS LAST; + ++---+---+ +| i | j | ++---+---+ +| | 1 | +| 1 | 1 | +| 1 | | ++---+---+ + +SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; + ++---+---+ +| i | j | ++---+---+ +| | 1 | +| 1 | | +| 1 | 1 | ++---+---+ + +SELECT i, j FROM test ORDER BY i NULLS LAST, j NULLS FIRST; + ++---+---+ +| i | j | ++---+---+ +| 1 | | +| 1 | 1 | +| | 1 | ++---+---+ + +-- Test with DESC ordering +SELECT i, j FROM test ORDER BY i DESC NULLS FIRST, j DESC NULLS LAST; + ++---+---+ +| i | j | ++---+---+ +| | 1 | +| 1 | 1 | +| 1 | | ++---+---+ + +SELECT i, j FROM test ORDER BY i DESC NULLS LAST, j DESC NULLS FIRST; + ++---+---+ +| i | j | ++---+---+ +| 1 | | +| 1 | 1 | +| | 1 | ++---+---+ + +-- Test with strings +CREATE TABLE strings(s VARCHAR, i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO strings VALUES ('apple', 1, 1000), (NULL, 2, 2000), ('banana', NULL, 3000); + +Affected Rows: 3 + +SELECT s, i FROM strings ORDER BY s NULLS FIRST, i NULLS LAST; + ++--------+---+ +| s | i | ++--------+---+ +| | 2 | +| apple | 1 | +| banana | | ++--------+---+ + +SELECT s, i FROM strings ORDER BY s NULLS LAST, i NULLS FIRST; + ++--------+---+ +| s | i | ++--------+---+ +| apple | 1 | +| banana | | +| | 2 | ++--------+---+ + +DROP TABLE integers; + +Affected Rows: 0 + +DROP TABLE test; + +Affected Rows: 0 + +DROP TABLE strings; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/order/nulls_first_last.sql b/tests/cases/standalone/common/order/nulls_first_last.sql new file mode 100644 index 000000000000..dca46b3d215a --- /dev/null +++ b/tests/cases/standalone/common/order/nulls_first_last.sql @@ -0,0 +1,46 @@ +-- Migrated from DuckDB test: test/sql/order/test_nulls_first.test +-- Test NULLS FIRST/NULLS LAST + +CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO integers VALUES (1, 1000), (NULL, 2000); + +-- Default NULL ordering (usually NULLS LAST in most systems) +SELECT i FROM integers ORDER BY i; + +-- Explicit NULLS FIRST +SELECT i FROM integers ORDER BY i NULLS FIRST; + +-- Explicit NULLS LAST +SELECT i FROM integers ORDER BY i NULLS LAST; + +-- Multiple columns with mixed NULL handling +CREATE TABLE test(i INTEGER, j INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO test VALUES (1, 1, 1000), (NULL, 1, 2000), (1, NULL, 3000); + +SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS LAST; + +SELECT i, j FROM test ORDER BY i NULLS FIRST, j NULLS FIRST; + +SELECT i, j FROM test ORDER BY i NULLS LAST, j NULLS FIRST; + +-- Test with DESC ordering +SELECT i, j FROM test ORDER BY i DESC NULLS FIRST, j DESC NULLS LAST; + +SELECT i, j FROM test ORDER BY i DESC NULLS LAST, j DESC NULLS FIRST; + +-- Test with strings +CREATE TABLE strings(s VARCHAR, i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO strings VALUES ('apple', 1, 1000), (NULL, 2, 2000), ('banana', NULL, 3000); + +SELECT s, i FROM strings ORDER BY s NULLS FIRST, i NULLS LAST; + +SELECT s, i FROM strings ORDER BY s NULLS LAST, i NULLS FIRST; + +DROP TABLE integers; + +DROP TABLE test; + +DROP TABLE strings; diff --git a/tests/cases/standalone/common/order/order_by_basic.result b/tests/cases/standalone/common/order/order_by_basic.result new file mode 100644 index 000000000000..747507f7a17a --- /dev/null +++ b/tests/cases/standalone/common/order/order_by_basic.result @@ -0,0 +1,134 @@ +-- Migrated from DuckDB test: test/sql/order/test_order_by.test +-- Test ORDER BY keyword +CREATE TABLE test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO test VALUES (11, 22, 1000), (12, 21, 2000), (13, 22, 3000); + +Affected Rows: 3 + +-- Simple ORDER BY +SELECT b FROM test ORDER BY a DESC; + ++----+ +| b | ++----+ +| 22 | +| 21 | +| 22 | ++----+ + +SELECT a, b FROM test ORDER BY a; + ++----+----+ +| a | b | ++----+----+ +| 11 | 22 | +| 12 | 21 | +| 13 | 22 | ++----+----+ + +SELECT a, b FROM test ORDER BY a DESC; + ++----+----+ +| a | b | ++----+----+ +| 13 | 22 | +| 12 | 21 | +| 11 | 22 | ++----+----+ + +-- ORDER BY on multiple columns +SELECT a, b FROM test ORDER BY b, a; + ++----+----+ +| a | b | ++----+----+ +| 12 | 21 | +| 11 | 22 | +| 13 | 22 | ++----+----+ + +-- ORDER BY using select indices +SELECT a, b FROM test ORDER BY 2, 1; + ++----+----+ +| a | b | ++----+----+ +| 12 | 21 | +| 11 | 22 | +| 13 | 22 | ++----+----+ + +SELECT a, b FROM test ORDER BY b DESC, a; + ++----+----+ +| a | b | ++----+----+ +| 11 | 22 | +| 13 | 22 | +| 12 | 21 | ++----+----+ + +SELECT a, b FROM test ORDER BY b, a DESC; + ++----+----+ +| a | b | ++----+----+ +| 12 | 21 | +| 13 | 22 | +| 11 | 22 | ++----+----+ + +-- TOP N queries with LIMIT +SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1; + ++----+----+ +| a | b | ++----+----+ +| 12 | 21 | ++----+----+ + +-- OFFSET +SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1 OFFSET 1; + ++----+----+ +| a | b | ++----+----+ +| 13 | 22 | ++----+----+ + +-- OFFSET without limit +SELECT a, b FROM test ORDER BY b, a DESC OFFSET 1; + ++----+----+ +| a | b | ++----+----+ +| 13 | 22 | +| 11 | 22 | ++----+----+ + +-- ORDER BY with WHERE +SELECT a, b FROM test WHERE a < 13 ORDER BY b; + ++----+----+ +| a | b | ++----+----+ +| 12 | 21 | +| 11 | 22 | ++----+----+ + +SELECT a, b FROM test WHERE a < 13 ORDER BY 2; + ++----+----+ +| a | b | ++----+----+ +| 12 | 21 | +| 11 | 22 | ++----+----+ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/order/order_by_basic.sql b/tests/cases/standalone/common/order/order_by_basic.sql new file mode 100644 index 000000000000..68cba60911e2 --- /dev/null +++ b/tests/cases/standalone/common/order/order_by_basic.sql @@ -0,0 +1,39 @@ +-- Migrated from DuckDB test: test/sql/order/test_order_by.test +-- Test ORDER BY keyword + +CREATE TABLE test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO test VALUES (11, 22, 1000), (12, 21, 2000), (13, 22, 3000); + +-- Simple ORDER BY +SELECT b FROM test ORDER BY a DESC; + +SELECT a, b FROM test ORDER BY a; + +SELECT a, b FROM test ORDER BY a DESC; + +-- ORDER BY on multiple columns +SELECT a, b FROM test ORDER BY b, a; + +-- ORDER BY using select indices +SELECT a, b FROM test ORDER BY 2, 1; + +SELECT a, b FROM test ORDER BY b DESC, a; + +SELECT a, b FROM test ORDER BY b, a DESC; + +-- TOP N queries with LIMIT +SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1; + +-- OFFSET +SELECT a, b FROM test ORDER BY b, a DESC LIMIT 1 OFFSET 1; + +-- OFFSET without limit +SELECT a, b FROM test ORDER BY b, a DESC OFFSET 1; + +-- ORDER BY with WHERE +SELECT a, b FROM test WHERE a < 13 ORDER BY b; + +SELECT a, b FROM test WHERE a < 13 ORDER BY 2; + +DROP TABLE test; diff --git a/tests/cases/standalone/common/order/order_by_expressions.result b/tests/cases/standalone/common/order/order_by_expressions.result new file mode 100644 index 000000000000..f121fac188b0 --- /dev/null +++ b/tests/cases/standalone/common/order/order_by_expressions.result @@ -0,0 +1,137 @@ +-- Migrated from DuckDB test: test/sql/order/test_order_by_expressions.test +-- Test ORDER BY with expressions +CREATE TABLE test(a INTEGER, b INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO test VALUES + (1, 10, 'apple', 1000), + (2, 20, 'banana', 2000), + (3, 15, 'cherry', 3000), + (4, 25, 'date', 4000); + +Affected Rows: 4 + +-- ORDER BY with arithmetic expressions +SELECT a, b, a + b as sum FROM test ORDER BY a + b; + ++---+----+-----+ +| a | b | sum | ++---+----+-----+ +| 1 | 10 | 11 | +| 3 | 15 | 18 | +| 2 | 20 | 22 | +| 4 | 25 | 29 | ++---+----+-----+ + +SELECT a, b, a * b as product FROM test ORDER BY a * b DESC; + ++---+----+---------+ +| a | b | product | ++---+----+---------+ +| 4 | 25 | 100 | +| 3 | 15 | 45 | +| 2 | 20 | 40 | +| 1 | 10 | 10 | ++---+----+---------+ + +-- ORDER BY with string functions +SELECT s, LENGTH(s) as len FROM test ORDER BY LENGTH(s); + ++--------+-----+ +| s | len | ++--------+-----+ +| date | 4 | +| apple | 5 | +| banana | 6 | +| cherry | 6 | ++--------+-----+ + +SELECT s, UPPER(s) as upper_s FROM test ORDER BY UPPER(s); + ++--------+---------+ +| s | upper_s | ++--------+---------+ +| apple | APPLE | +| banana | BANANA | +| cherry | CHERRY | +| date | DATE | ++--------+---------+ + +-- ORDER BY with CASE expressions +SELECT a, b, + CASE + WHEN a % 2 = 0 THEN 'even' + ELSE 'odd' + END as parity +FROM test +ORDER BY + CASE + WHEN a % 2 = 0 THEN 1 + ELSE 2 + END, a; + ++---+----+--------+ +| a | b | parity | ++---+----+--------+ +| 2 | 20 | even | +| 4 | 25 | even | +| 1 | 10 | odd | +| 3 | 15 | odd | ++---+----+--------+ + +-- ORDER BY with conditional expressions +SELECT a, b FROM test ORDER BY GREATEST(a, b) DESC; + ++---+----+ +| a | b | ++---+----+ +| 4 | 25 | +| 2 | 20 | +| 3 | 15 | +| 1 | 10 | ++---+----+ + +SELECT a, b FROM test ORDER BY LEAST(a, b); + ++---+----+ +| a | b | ++---+----+ +| 1 | 10 | +| 2 | 20 | +| 3 | 15 | +| 4 | 25 | ++---+----+ + +-- ORDER BY with NULL-related expressions +INSERT INTO test VALUES (NULL, NULL, NULL, 5000); + +Affected Rows: 1 + +SELECT a, b, COALESCE(a, 999) as a_or_999 +FROM test +ORDER BY COALESCE(a, 999); + ++---+----+----------+ +| a | b | a_or_999 | ++---+----+----------+ +| 1 | 10 | 1 | +| 2 | 20 | 2 | +| 3 | 15 | 3 | +| 4 | 25 | 4 | +| | | 999 | ++---+----+----------+ + +-- ORDER BY with subqueries in expressions +SELECT a, b, + a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL) as diff_from_min +FROM test +WHERE a IS NOT NULL +ORDER BY a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL); + +Error: 1001(Unsupported), This feature is not implemented: Physical plan does not support logical expression ScalarSubquery() + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/order/order_by_expressions.sql b/tests/cases/standalone/common/order/order_by_expressions.sql new file mode 100644 index 000000000000..d4467c95288d --- /dev/null +++ b/tests/cases/standalone/common/order/order_by_expressions.sql @@ -0,0 +1,54 @@ +-- Migrated from DuckDB test: test/sql/order/test_order_by_expressions.test +-- Test ORDER BY with expressions + +CREATE TABLE test(a INTEGER, b INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO test VALUES + (1, 10, 'apple', 1000), + (2, 20, 'banana', 2000), + (3, 15, 'cherry', 3000), + (4, 25, 'date', 4000); + +-- ORDER BY with arithmetic expressions +SELECT a, b, a + b as sum FROM test ORDER BY a + b; + +SELECT a, b, a * b as product FROM test ORDER BY a * b DESC; + +-- ORDER BY with string functions +SELECT s, LENGTH(s) as len FROM test ORDER BY LENGTH(s); + +SELECT s, UPPER(s) as upper_s FROM test ORDER BY UPPER(s); + +-- ORDER BY with CASE expressions +SELECT a, b, + CASE + WHEN a % 2 = 0 THEN 'even' + ELSE 'odd' + END as parity +FROM test +ORDER BY + CASE + WHEN a % 2 = 0 THEN 1 + ELSE 2 + END, a; + +-- ORDER BY with conditional expressions +SELECT a, b FROM test ORDER BY GREATEST(a, b) DESC; + +SELECT a, b FROM test ORDER BY LEAST(a, b); + +-- ORDER BY with NULL-related expressions +INSERT INTO test VALUES (NULL, NULL, NULL, 5000); + +SELECT a, b, COALESCE(a, 999) as a_or_999 +FROM test +ORDER BY COALESCE(a, 999); + +-- ORDER BY with subqueries in expressions +SELECT a, b, + a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL) as diff_from_min +FROM test +WHERE a IS NOT NULL +ORDER BY a - (SELECT MIN(a) FROM test WHERE a IS NOT NULL); + +DROP TABLE test; diff --git a/tests/cases/standalone/common/sample/basic_sample.result b/tests/cases/standalone/common/sample/basic_sample.result new file mode 100644 index 000000000000..1691337cd461 --- /dev/null +++ b/tests/cases/standalone/common/sample/basic_sample.result @@ -0,0 +1,93 @@ +-- Migrated from DuckDB test: test/sql/sample/same_seed_same_sample.test +-- FIXME: the results are wrong in this test, waits for https://github.com/apache/datafusion/pull/16325 +-- Test basic SAMPLE functionality +-- Create test table +CREATE TABLE test(x INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +-- Insert test data +INSERT INTO test SELECT number, number * 1000 FROM numbers limit 10000; + +Affected Rows: 10000 + +-- Test TABLESAMPLE with percentage +SELECT COUNT(*) > 0 FROM test TABLESAMPLE (10 PERCENT); + ++---------------------+ +| count(*) > Int64(0) | ++---------------------+ +| true | ++---------------------+ + +-- Test TABLESAMPLE with row count +SELECT COUNT(*) FROM test TABLESAMPLE (100 ROWS); + ++----------+ +| count(*) | ++----------+ +| 10000 | ++----------+ + +-- Test TABLESAMPLE SYSTEM +SELECT COUNT(*) > 0 FROM test TABLESAMPLE SYSTEM (25 PERCENT); + ++---------------------+ +| count(*) > Int64(0) | ++---------------------+ +| true | ++---------------------+ + +-- Test TABLESAMPLE BERNOULLI +SELECT COUNT(*) > 0 FROM test TABLESAMPLE BERNOULLI (25 PERCENT); + ++---------------------+ +| count(*) > Int64(0) | ++---------------------+ +| true | ++---------------------+ + +-- Test with REPEATABLE for consistent results +SELECT COUNT(*) AS cnt1 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42); + ++-------+ +| cnt1 | ++-------+ +| 10000 | ++-------+ + +SELECT COUNT(*) AS cnt2 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42); + ++-------+ +| cnt2 | ++-------+ +| 10000 | ++-------+ + +-- Test sampling with WHERE clause +SELECT COUNT(*) FROM test TABLESAMPLE (10 PERCENT) WHERE x > 5000; + ++----------+ +| count(*) | ++----------+ +| 4999 | ++----------+ + +-- Test sampling with ORDER BY +SELECT x FROM test TABLESAMPLE (5 ROWS) ORDER BY x LIMIT 5; + ++---+ +| x | ++---+ +| 0 | +| 1 | +| 2 | +| 3 | +| 4 | ++---+ + +-- cleanup +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/sample/basic_sample.sql b/tests/cases/standalone/common/sample/basic_sample.sql new file mode 100644 index 000000000000..1e00de81af3e --- /dev/null +++ b/tests/cases/standalone/common/sample/basic_sample.sql @@ -0,0 +1,35 @@ +-- Migrated from DuckDB test: test/sql/sample/same_seed_same_sample.test +-- FIXME: the results are wrong in this test, waits for https://github.com/apache/datafusion/pull/16325 +-- Test basic SAMPLE functionality + +-- Create test table +CREATE TABLE test(x INTEGER, ts TIMESTAMP TIME INDEX); + +-- Insert test data +INSERT INTO test SELECT number, number * 1000 FROM numbers limit 10000; + +-- Test TABLESAMPLE with percentage +SELECT COUNT(*) > 0 FROM test TABLESAMPLE (10 PERCENT); + +-- Test TABLESAMPLE with row count +SELECT COUNT(*) FROM test TABLESAMPLE (100 ROWS); + +-- Test TABLESAMPLE SYSTEM +SELECT COUNT(*) > 0 FROM test TABLESAMPLE SYSTEM (25 PERCENT); + +-- Test TABLESAMPLE BERNOULLI +SELECT COUNT(*) > 0 FROM test TABLESAMPLE BERNOULLI (25 PERCENT); + +-- Test with REPEATABLE for consistent results +SELECT COUNT(*) AS cnt1 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42); + +SELECT COUNT(*) AS cnt2 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42); + +-- Test sampling with WHERE clause +SELECT COUNT(*) FROM test TABLESAMPLE (10 PERCENT) WHERE x > 5000; + +-- Test sampling with ORDER BY +SELECT x FROM test TABLESAMPLE (5 ROWS) ORDER BY x LIMIT 5; + +-- cleanup +DROP TABLE test; \ No newline at end of file diff --git a/tests/cases/standalone/common/types/date/test_date.result b/tests/cases/standalone/common/types/date/test_date.result new file mode 100644 index 000000000000..ed7f2137421e --- /dev/null +++ b/tests/cases/standalone/common/types/date/test_date.result @@ -0,0 +1,135 @@ +-- Migrated from DuckDB test: test/sql/types/date/test_date.test +-- Test basic DATE functionality +-- Create and insert into table +CREATE TABLE dates(i DATE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO dates VALUES ('1993-08-14', 1000), (NULL, 2000); + +Affected Rows: 2 + +-- Check that we can select dates +SELECT * FROM dates ORDER BY ts; + ++------------+---------------------+ +| i | ts | ++------------+---------------------+ +| 1993-08-14 | 1970-01-01T00:00:01 | +| | 1970-01-01T00:00:02 | ++------------+---------------------+ + +-- extract function +SELECT extract(year FROM i) FROM dates ORDER BY ts; + ++---------------------------------+ +| date_part(Utf8("YEAR"),dates.i) | ++---------------------------------+ +| 1993 | +| | ++---------------------------------+ + +-- Check that we can convert dates to string +SELECT CAST(i AS VARCHAR) FROM dates ORDER BY ts; + ++------------+ +| dates.i | ++------------+ +| 1993-08-14 | +| | ++------------+ + +-- Check that we can add days to a date +SELECT i + INTERVAL '5 days' FROM dates ORDER BY ts; + ++-----------------------------------------------------------------------------------------------+ +| dates.i + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }") | ++-----------------------------------------------------------------------------------------------+ +| 1993-08-19 | +| | ++-----------------------------------------------------------------------------------------------+ + +-- Check that we can subtract days from a date +SELECT i - INTERVAL '5 days' FROM dates ORDER BY ts; + ++-----------------------------------------------------------------------------------------------+ +| dates.i - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }") | ++-----------------------------------------------------------------------------------------------+ +| 1993-08-09 | +| | ++-----------------------------------------------------------------------------------------------+ + +-- Test date subtraction resulting in interval +SELECT i - DATE '1993-08-14' FROM dates ORDER BY ts; + ++------------------------------+ +| dates.i - Utf8("1993-08-14") | ++------------------------------+ +| P0D | +| | ++------------------------------+ + +-- Test various date formats +CREATE TABLE date_formats(d DATE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO date_formats VALUES + ('2021-03-01', 1000), + ('2021-12-31', 2000), + ('2000-01-01', 3000), + ('1970-01-01', 4000); + +Affected Rows: 4 + +SELECT d, extract(year FROM d), extract(month FROM d), extract(day FROM d) FROM date_formats ORDER BY d; + ++------------+----------------------------------------+-----------------------------------------+---------------------------------------+ +| d | date_part(Utf8("YEAR"),date_formats.d) | date_part(Utf8("MONTH"),date_formats.d) | date_part(Utf8("DAY"),date_formats.d) | ++------------+----------------------------------------+-----------------------------------------+---------------------------------------+ +| 1970-01-01 | 1970 | 1 | 1 | +| 2000-01-01 | 2000 | 1 | 1 | +| 2021-03-01 | 2021 | 3 | 1 | +| 2021-12-31 | 2021 | 12 | 31 | ++------------+----------------------------------------+-----------------------------------------+---------------------------------------+ + +-- Test date comparison +SELECT d FROM date_formats WHERE d > '2000-01-01' ORDER BY d; + ++------------+ +| d | ++------------+ +| 2021-03-01 | +| 2021-12-31 | ++------------+ + +SELECT d FROM date_formats WHERE d BETWEEN '2000-01-01' AND '2021-06-01' ORDER BY d; + ++------------+ +| d | ++------------+ +| 2000-01-01 | +| 2021-03-01 | ++------------+ + +-- Test NULL handling +INSERT INTO date_formats VALUES (NULL, 5000); + +Affected Rows: 1 + +SELECT COUNT(*), COUNT(d) FROM date_formats; + ++----------+-----------------------+ +| count(*) | count(date_formats.d) | ++----------+-----------------------+ +| 5 | 4 | ++----------+-----------------------+ + +DROP TABLE dates; + +Affected Rows: 0 + +DROP TABLE date_formats; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/date/test_date.sql b/tests/cases/standalone/common/types/date/test_date.sql new file mode 100644 index 000000000000..5bf0db4b4b27 --- /dev/null +++ b/tests/cases/standalone/common/types/date/test_date.sql @@ -0,0 +1,50 @@ +-- Migrated from DuckDB test: test/sql/types/date/test_date.test +-- Test basic DATE functionality + +-- Create and insert into table +CREATE TABLE dates(i DATE, ts TIMESTAMP TIME INDEX); + +INSERT INTO dates VALUES ('1993-08-14', 1000), (NULL, 2000); + +-- Check that we can select dates +SELECT * FROM dates ORDER BY ts; + +-- extract function +SELECT extract(year FROM i) FROM dates ORDER BY ts; + +-- Check that we can convert dates to string +SELECT CAST(i AS VARCHAR) FROM dates ORDER BY ts; + +-- Check that we can add days to a date +SELECT i + INTERVAL '5 days' FROM dates ORDER BY ts; + +-- Check that we can subtract days from a date +SELECT i - INTERVAL '5 days' FROM dates ORDER BY ts; + +-- Test date subtraction resulting in interval +SELECT i - DATE '1993-08-14' FROM dates ORDER BY ts; + +-- Test various date formats +CREATE TABLE date_formats(d DATE, ts TIMESTAMP TIME INDEX); + +INSERT INTO date_formats VALUES + ('2021-03-01', 1000), + ('2021-12-31', 2000), + ('2000-01-01', 3000), + ('1970-01-01', 4000); + +SELECT d, extract(year FROM d), extract(month FROM d), extract(day FROM d) FROM date_formats ORDER BY d; + +-- Test date comparison +SELECT d FROM date_formats WHERE d > '2000-01-01' ORDER BY d; + +SELECT d FROM date_formats WHERE d BETWEEN '2000-01-01' AND '2021-06-01' ORDER BY d; + +-- Test NULL handling +INSERT INTO date_formats VALUES (NULL, 5000); + +SELECT COUNT(*), COUNT(d) FROM date_formats; + +DROP TABLE dates; + +DROP TABLE date_formats; diff --git a/tests/cases/standalone/common/types/float/ieee_floating_points.result b/tests/cases/standalone/common/types/float/ieee_floating_points.result new file mode 100644 index 000000000000..69198d490ec4 --- /dev/null +++ b/tests/cases/standalone/common/types/float/ieee_floating_points.result @@ -0,0 +1,144 @@ +-- Migrated from DuckDB test: test/sql/types/float/ieee_floating_points.test +-- Test IEEE floating point behavior +-- Test special float values +CREATE TABLE float_special(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +-- Insert special values +INSERT INTO float_special VALUES + (0.0, 0.0, 1000), + (-0.0, -0.0, 2000), + ('inf'::FLOAT, 'inf'::DOUBLE, 3000), + ('-inf'::FLOAT, '-inf'::DOUBLE, 4000), + ('nan'::FLOAT, 'nan'::DOUBLE, 5000); + +Affected Rows: 5 + +-- Test basic operations with special values +SELECT f, d FROM float_special ORDER BY ts; + ++------+------+ +| f | d | ++------+------+ +| 0.0 | 0.0 | +| -0.0 | -0.0 | +| inf | inf | +| -inf | -inf | +| NaN | NaN | ++------+------+ + +-- Test comparison with infinity +-- It doesn't follow the IEEE standard, but follows PG instead. +SELECT f, f > 1000000 FROM float_special ORDER BY ts; + ++------+----------------------------------+ +| f | float_special.f > Int64(1000000) | ++------+----------------------------------+ +| 0.0 | false | +| -0.0 | false | +| inf | true | +| -inf | false | +| NaN | true | ++------+----------------------------------+ + +SELECT d, d < -1000000 FROM float_special ORDER BY ts; + ++------+-----------------------------------+ +| d | float_special.d < Int64(-1000000) | ++------+-----------------------------------+ +| 0.0 | false | +| -0.0 | false | +| inf | false | +| -inf | true | +| NaN | false | ++------+-----------------------------------+ + +-- Test NaN behavior +-- NaN != NaN +SELECT f, f = f FROM float_special WHERE f != f ORDER BY ts; + +++ +++ + +SELECT d, d IS NULL FROM float_special ORDER BY ts; + ++------+-------------------------+ +| d | float_special.d IS NULL | ++------+-------------------------+ +| 0.0 | false | +| -0.0 | false | +| inf | false | +| -inf | false | +| NaN | false | ++------+-------------------------+ + +-- Test arithmetic with special values +SELECT f, f + 1 FROM float_special ORDER BY ts; + ++------+----------------------------+ +| f | float_special.f + Int64(1) | ++------+----------------------------+ +| 0.0 | 1.0 | +| -0.0 | 1.0 | +| inf | inf | +| -inf | -inf | +| NaN | NaN | ++------+----------------------------+ + +SELECT d, d * 2 FROM float_special ORDER BY ts; + ++------+----------------------------+ +| d | float_special.d * Int64(2) | ++------+----------------------------+ +| 0.0 | 0.0 | +| -0.0 | -0.0 | +| inf | inf | +| -inf | -inf | +| NaN | NaN | ++------+----------------------------+ + +-- Test normal floating point precision +CREATE TABLE float_precision(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO float_precision VALUES + (1.23456789, 1.23456789012345, 1000), + (0.000001, 0.000000000001, 2000), + (1e10, 1e15, 3000), + (1e-10, 1e-15, 4000); + +Affected Rows: 4 + +SELECT f, d FROM float_precision ORDER BY ts; + ++---------------+--------------------+ +| f | d | ++---------------+--------------------+ +| 1.2345679 | 1.23456789012345 | +| 0.000001 | 1e-12 | +| 10000000000.0 | 1000000000000000.0 | +| 1e-10 | 1e-15 | ++---------------+--------------------+ + +-- Test rounding and precision +SELECT ROUND(f, 3), ROUND(d, 6) FROM float_precision ORDER BY ts; + ++-----------------------------------+-----------------------------------+ +| round(float_precision.f,Int64(3)) | round(float_precision.d,Int64(6)) | ++-----------------------------------+-----------------------------------+ +| 1.235 | 1.234568 | +| 0.0 | 0.0 | +| 10000000000.0 | 1000000000000000.0 | +| 0.0 | 0.0 | ++-----------------------------------+-----------------------------------+ + +DROP TABLE float_special; + +Affected Rows: 0 + +DROP TABLE float_precision; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/float/ieee_floating_points.sql b/tests/cases/standalone/common/types/float/ieee_floating_points.sql new file mode 100644 index 000000000000..755b20655481 --- /dev/null +++ b/tests/cases/standalone/common/types/float/ieee_floating_points.sql @@ -0,0 +1,51 @@ +-- Migrated from DuckDB test: test/sql/types/float/ieee_floating_points.test +-- Test IEEE floating point behavior + +-- Test special float values +CREATE TABLE float_special(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX); + +-- Insert special values +INSERT INTO float_special VALUES + (0.0, 0.0, 1000), + (-0.0, -0.0, 2000), + ('inf'::FLOAT, 'inf'::DOUBLE, 3000), + ('-inf'::FLOAT, '-inf'::DOUBLE, 4000), + ('nan'::FLOAT, 'nan'::DOUBLE, 5000); + +-- Test basic operations with special values +SELECT f, d FROM float_special ORDER BY ts; + +-- Test comparison with infinity +-- It doesn't follow the IEEE standard, but follows PG instead. +SELECT f, f > 1000000 FROM float_special ORDER BY ts; + +SELECT d, d < -1000000 FROM float_special ORDER BY ts; + +-- Test NaN behavior +-- NaN != NaN +SELECT f, f = f FROM float_special WHERE f != f ORDER BY ts; + +SELECT d, d IS NULL FROM float_special ORDER BY ts; + +-- Test arithmetic with special values +SELECT f, f + 1 FROM float_special ORDER BY ts; + +SELECT d, d * 2 FROM float_special ORDER BY ts; + +-- Test normal floating point precision +CREATE TABLE float_precision(f FLOAT, d DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO float_precision VALUES + (1.23456789, 1.23456789012345, 1000), + (0.000001, 0.000000000001, 2000), + (1e10, 1e15, 3000), + (1e-10, 1e-15, 4000); + +SELECT f, d FROM float_precision ORDER BY ts; + +-- Test rounding and precision +SELECT ROUND(f, 3), ROUND(d, 6) FROM float_precision ORDER BY ts; + +DROP TABLE float_special; + +DROP TABLE float_precision; diff --git a/tests/cases/standalone/common/types/float/infinity_nan.result b/tests/cases/standalone/common/types/float/infinity_nan.result new file mode 100644 index 000000000000..dcfbdd81cec8 --- /dev/null +++ b/tests/cases/standalone/common/types/float/infinity_nan.result @@ -0,0 +1,184 @@ +-- Migrated from DuckDB test: test/sql/types/float/infinity_test.test, nan_aggregate.test +-- Test infinity and NaN handling +-- Note: it doesn't follow the IEEE standard, but follows PG instead: https://www.postgresql.org/docs/current/datatype-numeric.html +-- Test infinity operations +CREATE TABLE inf_test(val DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO inf_test VALUES + ('inf'::DOUBLE, 1000), + ('-inf'::DOUBLE, 2000), + (1.0, 3000), + (-1.0, 4000), + (0.0, 5000); + +Affected Rows: 5 + +-- Test infinity comparisons +SELECT val, val > 0 FROM inf_test ORDER BY ts; + ++------+-------------------------+ +| val | inf_test.val > Int64(0) | ++------+-------------------------+ +| inf | true | +| -inf | false | +| 1.0 | true | +| -1.0 | false | +| 0.0 | false | ++------+-------------------------+ + +SELECT val, val < 0 FROM inf_test ORDER BY ts; + ++------+-------------------------+ +| val | inf_test.val < Int64(0) | ++------+-------------------------+ +| inf | false | +| -inf | true | +| 1.0 | false | +| -1.0 | true | +| 0.0 | false | ++------+-------------------------+ + +SELECT val, val = 'inf'::DOUBLE FROM inf_test ORDER BY ts; + ++------+----------------------------+ +| val | inf_test.val = Utf8("inf") | ++------+----------------------------+ +| inf | true | +| -inf | false | +| 1.0 | false | +| -1.0 | false | +| 0.0 | false | ++------+----------------------------+ + +-- Test infinity in aggregates +SELECT MAX(val), MIN(val) FROM inf_test; + ++-------------------+-------------------+ +| max(inf_test.val) | min(inf_test.val) | ++-------------------+-------------------+ +| inf | -inf | ++-------------------+-------------------+ + +SELECT SUM(val), AVG(val) FROM inf_test; + ++-------------------+-------------------+ +| sum(inf_test.val) | avg(inf_test.val) | ++-------------------+-------------------+ +| NaN | NaN | ++-------------------+-------------------+ + +-- Test NaN behavior +CREATE TABLE nan_test(val DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO nan_test VALUES + ('nan'::DOUBLE, 1000), + (1.0, 2000), + (2.0, 3000), + ('nan'::DOUBLE, 4000), + (3.0, 5000); + +Affected Rows: 5 + +-- Test NaN in aggregates +SELECT COUNT(*), COUNT(val) FROM nan_test; + ++----------+---------------------+ +| count(*) | count(nan_test.val) | ++----------+---------------------+ +| 5 | 5 | ++----------+---------------------+ + +SELECT MAX(val), MIN(val) FROM nan_test; + ++-------------------+-------------------+ +| max(nan_test.val) | min(nan_test.val) | ++-------------------+-------------------+ +| NaN | 1.0 | ++-------------------+-------------------+ + +SELECT SUM(val), AVG(val) FROM nan_test; + ++-------------------+-------------------+ +| sum(nan_test.val) | avg(nan_test.val) | ++-------------------+-------------------+ +| NaN | NaN | ++-------------------+-------------------+ + +-- Test NaN comparisons +SELECT val, val = val FROM nan_test ORDER BY ts; + ++-----+-----------------------------+ +| val | nan_test.val = nan_test.val | ++-----+-----------------------------+ +| NaN | true | +| 1.0 | true | +| 2.0 | true | +| NaN | true | +| 3.0 | true | ++-----+-----------------------------+ + +SELECT val, val IS NULL FROM nan_test ORDER BY ts; + ++-----+----------------------+ +| val | nan_test.val IS NULL | ++-----+----------------------+ +| NaN | false | +| 1.0 | false | +| 2.0 | false | +| NaN | false | +| 3.0 | false | ++-----+----------------------+ + +-- Test arithmetic with infinity and NaN +SELECT 'inf'::DOUBLE + 1; + ++------------------------+ +| Utf8("inf") + Int64(1) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::DOUBLE - 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("inf") - Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'inf'::DOUBLE * 0; + ++------------------------+ +| Utf8("inf") * Int64(0) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE + 1; + ++------------------------+ +| Utf8("nan") + Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE * 0; + ++------------------------+ +| Utf8("nan") * Int64(0) | ++------------------------+ +| NaN | ++------------------------+ + +DROP TABLE inf_test; + +Affected Rows: 0 + +DROP TABLE nan_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/float/infinity_nan.sql b/tests/cases/standalone/common/types/float/infinity_nan.sql new file mode 100644 index 000000000000..5f495170e14f --- /dev/null +++ b/tests/cases/standalone/common/types/float/infinity_nan.sql @@ -0,0 +1,61 @@ +-- Migrated from DuckDB test: test/sql/types/float/infinity_test.test, nan_aggregate.test +-- Test infinity and NaN handling +-- Note: it doesn't follow the IEEE standard, but follows PG instead: https://www.postgresql.org/docs/current/datatype-numeric.html +-- Test infinity operations +CREATE TABLE inf_test(val DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO inf_test VALUES + ('inf'::DOUBLE, 1000), + ('-inf'::DOUBLE, 2000), + (1.0, 3000), + (-1.0, 4000), + (0.0, 5000); + +-- Test infinity comparisons +SELECT val, val > 0 FROM inf_test ORDER BY ts; + +SELECT val, val < 0 FROM inf_test ORDER BY ts; + +SELECT val, val = 'inf'::DOUBLE FROM inf_test ORDER BY ts; + +-- Test infinity in aggregates +SELECT MAX(val), MIN(val) FROM inf_test; + +SELECT SUM(val), AVG(val) FROM inf_test; + +-- Test NaN behavior +CREATE TABLE nan_test(val DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO nan_test VALUES + ('nan'::DOUBLE, 1000), + (1.0, 2000), + (2.0, 3000), + ('nan'::DOUBLE, 4000), + (3.0, 5000); + +-- Test NaN in aggregates +SELECT COUNT(*), COUNT(val) FROM nan_test; + +SELECT MAX(val), MIN(val) FROM nan_test; + +SELECT SUM(val), AVG(val) FROM nan_test; + +-- Test NaN comparisons +SELECT val, val = val FROM nan_test ORDER BY ts; + +SELECT val, val IS NULL FROM nan_test ORDER BY ts; + +-- Test arithmetic with infinity and NaN +SELECT 'inf'::DOUBLE + 1; + +SELECT 'inf'::DOUBLE - 'inf'::DOUBLE; + +SELECT 'inf'::DOUBLE * 0; + +SELECT 'nan'::DOUBLE + 1; + +SELECT 'nan'::DOUBLE * 0; + +DROP TABLE inf_test; + +DROP TABLE nan_test; diff --git a/tests/cases/standalone/common/types/float/nan_arithmetic_extended.result b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.result new file mode 100644 index 000000000000..392b79fa4aa0 --- /dev/null +++ b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.result @@ -0,0 +1,317 @@ +-- Migrated from DuckDB test: test/sql/types/float/nan_arithmetic.test +-- Test arithmetic on NaN values +-- Test NaN arithmetic with FLOAT +-- Any arithmetic on a NaN value will result in a NaN value +SELECT 'nan'::FLOAT + 1; + ++------------------------+ +| Utf8("nan") + Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::FLOAT + 'inf'::FLOAT; + ++---------------------------+ +| Utf8("nan") + Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::FLOAT - 1; + ++------------------------+ +| Utf8("nan") - Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::FLOAT - 'inf'::FLOAT; + ++---------------------------+ +| Utf8("nan") - Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::FLOAT * 1; + ++------------------------+ +| Utf8("nan") * Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::FLOAT * 'inf'::FLOAT; + ++---------------------------+ +| Utf8("nan") * Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::FLOAT / 1; + ++------------------------+ +| Utf8("nan") / Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::FLOAT / 'inf'::FLOAT; + ++---------------------------+ +| Utf8("nan") / Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::FLOAT % 1; + ++------------------------+ +| Utf8("nan") % Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::FLOAT % 'inf'::FLOAT; + ++---------------------------+ +| Utf8("nan") % Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT -('nan'::FLOAT); + ++-----------------+ +| (- Utf8("nan")) | ++-----------------+ +| NaN | ++-----------------+ + +-- Test NaN arithmetic with DOUBLE +SELECT 'nan'::DOUBLE + 1; + ++------------------------+ +| Utf8("nan") + Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE + 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("nan") + Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::DOUBLE - 1; + ++------------------------+ +| Utf8("nan") - Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE - 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("nan") - Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::DOUBLE * 1; + ++------------------------+ +| Utf8("nan") * Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE * 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("nan") * Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::DOUBLE / 1; + ++------------------------+ +| Utf8("nan") / Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE / 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("nan") / Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT 'nan'::DOUBLE % 1; + ++------------------------+ +| Utf8("nan") % Int64(1) | ++------------------------+ +| NaN | ++------------------------+ + +SELECT 'nan'::DOUBLE % 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("nan") % Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +SELECT -('nan'::DOUBLE); + ++-----------------+ +| (- Utf8("nan")) | ++-----------------+ +| NaN | ++-----------------+ + +-- Test infinity arithmetic +SELECT 'inf'::FLOAT + 1; + ++------------------------+ +| Utf8("inf") + Int64(1) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::FLOAT - 1; + ++------------------------+ +| Utf8("inf") - Int64(1) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::FLOAT * 2; + ++------------------------+ +| Utf8("inf") * Int64(2) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::FLOAT / 2; + ++------------------------+ +| Utf8("inf") / Int64(2) | ++------------------------+ +| inf | ++------------------------+ + +SELECT -('inf'::FLOAT); + ++-----------------+ +| (- Utf8("inf")) | ++-----------------+ +| -inf | ++-----------------+ + +SELECT 'inf'::DOUBLE + 1; + ++------------------------+ +| Utf8("inf") + Int64(1) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::DOUBLE - 1; + ++------------------------+ +| Utf8("inf") - Int64(1) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::DOUBLE * 2; + ++------------------------+ +| Utf8("inf") * Int64(2) | ++------------------------+ +| inf | ++------------------------+ + +SELECT 'inf'::DOUBLE / 2; + ++------------------------+ +| Utf8("inf") / Int64(2) | ++------------------------+ +| inf | ++------------------------+ + +SELECT -('inf'::DOUBLE); + ++-----------------+ +| (- Utf8("inf")) | ++-----------------+ +| -inf | ++-----------------+ + +-- Test special infinity cases +-- Should be NaN +SELECT 'inf'::FLOAT - 'inf'::FLOAT; + ++---------------------------+ +| Utf8("inf") - Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +-- Should be NaN +SELECT 'inf'::FLOAT / 'inf'::FLOAT; + ++---------------------------+ +| Utf8("inf") / Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +-- Should be NaN +SELECT 'inf'::FLOAT * 0; + ++------------------------+ +| Utf8("inf") * Int64(0) | ++------------------------+ +| NaN | ++------------------------+ + +-- Should be NaN +SELECT 'inf'::DOUBLE - 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("inf") - Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +-- Should be NaN +SELECT 'inf'::DOUBLE / 'inf'::DOUBLE; + ++---------------------------+ +| Utf8("inf") / Utf8("inf") | ++---------------------------+ +| NaN | ++---------------------------+ + +-- Should be NaN +SELECT 'inf'::DOUBLE * 0; + ++------------------------+ +| Utf8("inf") * Int64(0) | ++------------------------+ +| NaN | ++------------------------+ + diff --git a/tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql new file mode 100644 index 000000000000..fe3d24c35c05 --- /dev/null +++ b/tests/cases/standalone/common/types/float/nan_arithmetic_extended.sql @@ -0,0 +1,91 @@ +-- Migrated from DuckDB test: test/sql/types/float/nan_arithmetic.test +-- Test arithmetic on NaN values + +-- Test NaN arithmetic with FLOAT +-- Any arithmetic on a NaN value will result in a NaN value + +SELECT 'nan'::FLOAT + 1; + +SELECT 'nan'::FLOAT + 'inf'::FLOAT; + +SELECT 'nan'::FLOAT - 1; + +SELECT 'nan'::FLOAT - 'inf'::FLOAT; + +SELECT 'nan'::FLOAT * 1; + +SELECT 'nan'::FLOAT * 'inf'::FLOAT; + +SELECT 'nan'::FLOAT / 1; + +SELECT 'nan'::FLOAT / 'inf'::FLOAT; + +SELECT 'nan'::FLOAT % 1; + +SELECT 'nan'::FLOAT % 'inf'::FLOAT; + +SELECT -('nan'::FLOAT); + +-- Test NaN arithmetic with DOUBLE +SELECT 'nan'::DOUBLE + 1; + +SELECT 'nan'::DOUBLE + 'inf'::DOUBLE; + +SELECT 'nan'::DOUBLE - 1; + +SELECT 'nan'::DOUBLE - 'inf'::DOUBLE; + +SELECT 'nan'::DOUBLE * 1; + +SELECT 'nan'::DOUBLE * 'inf'::DOUBLE; + +SELECT 'nan'::DOUBLE / 1; + +SELECT 'nan'::DOUBLE / 'inf'::DOUBLE; + +SELECT 'nan'::DOUBLE % 1; + +SELECT 'nan'::DOUBLE % 'inf'::DOUBLE; + +SELECT -('nan'::DOUBLE); + +-- Test infinity arithmetic +SELECT 'inf'::FLOAT + 1; + +SELECT 'inf'::FLOAT - 1; + +SELECT 'inf'::FLOAT * 2; + +SELECT 'inf'::FLOAT / 2; + +SELECT -('inf'::FLOAT); + +SELECT 'inf'::DOUBLE + 1; + +SELECT 'inf'::DOUBLE - 1; + +SELECT 'inf'::DOUBLE * 2; + +SELECT 'inf'::DOUBLE / 2; + +SELECT -('inf'::DOUBLE); + +-- Test special infinity cases +-- Should be NaN +SELECT 'inf'::FLOAT - 'inf'::FLOAT; + +-- Should be NaN +SELECT 'inf'::FLOAT / 'inf'::FLOAT; + +-- Should be NaN +SELECT 'inf'::FLOAT * 0; + +-- Should be NaN +SELECT 'inf'::DOUBLE - 'inf'::DOUBLE; + +-- Should be NaN +SELECT 'inf'::DOUBLE / 'inf'::DOUBLE; + +-- Should be NaN +SELECT 'inf'::DOUBLE * 0; + diff --git a/tests/cases/standalone/common/types/float/nan_cast_extended.result b/tests/cases/standalone/common/types/float/nan_cast_extended.result new file mode 100644 index 000000000000..11098a1001dc --- /dev/null +++ b/tests/cases/standalone/common/types/float/nan_cast_extended.result @@ -0,0 +1,252 @@ +-- Migrated from DuckDB test: test/sql/types/float/nan_cast.test +-- Test casting of NaN and inf values +-- Test valid casts between FLOAT, DOUBLE, and VARCHAR +-- NaN casts +SELECT 'nan'::FLOAT::DOUBLE; + ++-------------+ +| Utf8("nan") | ++-------------+ +| NaN | ++-------------+ + +SELECT 'nan'::FLOAT::VARCHAR; + ++-------------+ +| Utf8("nan") | ++-------------+ +| NaN | ++-------------+ + +SELECT 'nan'::DOUBLE::FLOAT; + ++-------------+ +| Utf8("nan") | ++-------------+ +| NaN | ++-------------+ + +SELECT 'nan'::DOUBLE::VARCHAR; + ++-------------+ +| Utf8("nan") | ++-------------+ +| NaN | ++-------------+ + +SELECT 'nan'::VARCHAR::FLOAT; + ++-------------+ +| Utf8("nan") | ++-------------+ +| NaN | ++-------------+ + +SELECT 'nan'::VARCHAR::DOUBLE; + ++-------------+ +| Utf8("nan") | ++-------------+ +| NaN | ++-------------+ + +-- Infinity casts +SELECT 'inf'::FLOAT::DOUBLE; + ++-------------+ +| Utf8("inf") | ++-------------+ +| inf | ++-------------+ + +SELECT 'inf'::FLOAT::VARCHAR; + ++-------------+ +| Utf8("inf") | ++-------------+ +| inf | ++-------------+ + +SELECT 'inf'::DOUBLE::FLOAT; + ++-------------+ +| Utf8("inf") | ++-------------+ +| inf | ++-------------+ + +SELECT 'inf'::DOUBLE::VARCHAR; + ++-------------+ +| Utf8("inf") | ++-------------+ +| inf | ++-------------+ + +SELECT 'inf'::VARCHAR::FLOAT; + ++-------------+ +| Utf8("inf") | ++-------------+ +| inf | ++-------------+ + +SELECT 'inf'::VARCHAR::DOUBLE; + ++-------------+ +| Utf8("inf") | ++-------------+ +| inf | ++-------------+ + +-- Negative infinity casts +SELECT '-inf'::FLOAT::DOUBLE; + ++--------------+ +| Utf8("-inf") | ++--------------+ +| -inf | ++--------------+ + +SELECT '-inf'::FLOAT::VARCHAR; + ++--------------+ +| Utf8("-inf") | ++--------------+ +| -inf | ++--------------+ + +SELECT '-inf'::DOUBLE::FLOAT; + ++--------------+ +| Utf8("-inf") | ++--------------+ +| -inf | ++--------------+ + +SELECT '-inf'::DOUBLE::VARCHAR; + ++--------------+ +| Utf8("-inf") | ++--------------+ +| -inf | ++--------------+ + +SELECT '-inf'::VARCHAR::FLOAT; + ++--------------+ +| Utf8("-inf") | ++--------------+ +| -inf | ++--------------+ + +SELECT '-inf'::VARCHAR::DOUBLE; + ++--------------+ +| Utf8("-inf") | ++--------------+ +| -inf | ++--------------+ + +-- Test TRY_CAST for invalid conversions (should return NULL) +SELECT TRY_CAST('nan'::FLOAT AS INTEGER); + ++-------------+ +| Utf8("nan") | ++-------------+ +| | ++-------------+ + +SELECT TRY_CAST('inf'::FLOAT AS INTEGER); + ++-------------+ +| Utf8("inf") | ++-------------+ +| | ++-------------+ + +SELECT TRY_CAST('-inf'::FLOAT AS INTEGER); + ++--------------+ +| Utf8("-inf") | ++--------------+ +| | ++--------------+ + +SELECT TRY_CAST('nan'::DOUBLE AS BIGINT); + ++-------------+ +| Utf8("nan") | ++-------------+ +| | ++-------------+ + +SELECT TRY_CAST('inf'::DOUBLE AS BIGINT); + ++-------------+ +| Utf8("inf") | ++-------------+ +| | ++-------------+ + +SELECT TRY_CAST('-inf'::DOUBLE AS BIGINT); + ++--------------+ +| Utf8("-inf") | ++--------------+ +| | ++--------------+ + +-- Test with table data +CREATE TABLE cast_test(f FLOAT, d DOUBLE, s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO cast_test VALUES + ('nan'::FLOAT, 'nan'::DOUBLE, 'nan', 1000), + ('inf'::FLOAT, 'inf'::DOUBLE, 'inf', 2000), + ('-inf'::FLOAT, '-inf'::DOUBLE, '-inf', 3000), + (1.5, 2.5, '3.5', 4000); + +Affected Rows: 4 + +-- Cast between float types +SELECT f, f::DOUBLE AS fd, d, d::FLOAT AS df FROM cast_test ORDER BY ts; + ++------+------+------+------+ +| f | fd | d | df | ++------+------+------+------+ +| NaN | NaN | NaN | NaN | +| inf | inf | inf | inf | +| -inf | -inf | -inf | -inf | +| 1.5 | 1.5 | 2.5 | 2.5 | ++------+------+------+------+ + +-- Cast to string +SELECT f::VARCHAR, d::VARCHAR FROM cast_test ORDER BY ts; + ++-------------+-------------+ +| cast_test.f | cast_test.d | ++-------------+-------------+ +| NaN | NaN | +| inf | inf | +| -inf | -inf | +| 1.5 | 2.5 | ++-------------+-------------+ + +-- Cast from string +SELECT s, TRY_CAST(s AS FLOAT) AS sf, TRY_CAST(s AS DOUBLE) AS sd FROM cast_test ORDER BY ts; + ++------+------+------+ +| s | sf | sd | ++------+------+------+ +| nan | NaN | NaN | +| inf | inf | inf | +| -inf | -inf | -inf | +| 3.5 | 3.5 | 3.5 | ++------+------+------+ + +DROP TABLE cast_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/float/nan_cast_extended.sql b/tests/cases/standalone/common/types/float/nan_cast_extended.sql new file mode 100644 index 000000000000..5c5caeeec9c6 --- /dev/null +++ b/tests/cases/standalone/common/types/float/nan_cast_extended.sql @@ -0,0 +1,76 @@ +-- Migrated from DuckDB test: test/sql/types/float/nan_cast.test +-- Test casting of NaN and inf values + +-- Test valid casts between FLOAT, DOUBLE, and VARCHAR + +-- NaN casts +SELECT 'nan'::FLOAT::DOUBLE; + +SELECT 'nan'::FLOAT::VARCHAR; + +SELECT 'nan'::DOUBLE::FLOAT; + +SELECT 'nan'::DOUBLE::VARCHAR; + +SELECT 'nan'::VARCHAR::FLOAT; + +SELECT 'nan'::VARCHAR::DOUBLE; + +-- Infinity casts +SELECT 'inf'::FLOAT::DOUBLE; + +SELECT 'inf'::FLOAT::VARCHAR; + +SELECT 'inf'::DOUBLE::FLOAT; + +SELECT 'inf'::DOUBLE::VARCHAR; + +SELECT 'inf'::VARCHAR::FLOAT; + +SELECT 'inf'::VARCHAR::DOUBLE; + +-- Negative infinity casts +SELECT '-inf'::FLOAT::DOUBLE; + +SELECT '-inf'::FLOAT::VARCHAR; + +SELECT '-inf'::DOUBLE::FLOAT; + +SELECT '-inf'::DOUBLE::VARCHAR; + +SELECT '-inf'::VARCHAR::FLOAT; + +SELECT '-inf'::VARCHAR::DOUBLE; + +-- Test TRY_CAST for invalid conversions (should return NULL) +SELECT TRY_CAST('nan'::FLOAT AS INTEGER); + +SELECT TRY_CAST('inf'::FLOAT AS INTEGER); + +SELECT TRY_CAST('-inf'::FLOAT AS INTEGER); + +SELECT TRY_CAST('nan'::DOUBLE AS BIGINT); + +SELECT TRY_CAST('inf'::DOUBLE AS BIGINT); + +SELECT TRY_CAST('-inf'::DOUBLE AS BIGINT); + +-- Test with table data +CREATE TABLE cast_test(f FLOAT, d DOUBLE, s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO cast_test VALUES + ('nan'::FLOAT, 'nan'::DOUBLE, 'nan', 1000), + ('inf'::FLOAT, 'inf'::DOUBLE, 'inf', 2000), + ('-inf'::FLOAT, '-inf'::DOUBLE, '-inf', 3000), + (1.5, 2.5, '3.5', 4000); + +-- Cast between float types +SELECT f, f::DOUBLE AS fd, d, d::FLOAT AS df FROM cast_test ORDER BY ts; + +-- Cast to string +SELECT f::VARCHAR, d::VARCHAR FROM cast_test ORDER BY ts; + +-- Cast from string +SELECT s, TRY_CAST(s AS FLOAT) AS sf, TRY_CAST(s AS DOUBLE) AS sd FROM cast_test ORDER BY ts; + +DROP TABLE cast_test; diff --git a/tests/cases/standalone/common/types/null/null_handling.result b/tests/cases/standalone/common/types/null/null_handling.result new file mode 100644 index 000000000000..320ced424ccb --- /dev/null +++ b/tests/cases/standalone/common/types/null/null_handling.result @@ -0,0 +1,171 @@ +-- Migrated from DuckDB test: test/sql/types/null/test_null.test +-- Test NULL value handling across different contexts +-- Test NULL in basic operations +CREATE TABLE null_test(i INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO null_test VALUES + (1, 'hello', 1000), + (NULL, 'world', 2000), + (3, NULL, 3000), + (NULL, NULL, 4000); + +Affected Rows: 4 + +-- Test NULL comparisons +SELECT i, s FROM null_test WHERE i IS NULL ORDER BY ts; + ++---+-------+ +| i | s | ++---+-------+ +| | world | +| | | ++---+-------+ + +SELECT i, s FROM null_test WHERE i IS NOT NULL ORDER BY ts; + ++---+-------+ +| i | s | ++---+-------+ +| 1 | hello | +| 3 | | ++---+-------+ + +SELECT i, s FROM null_test WHERE s IS NULL ORDER BY ts; + ++---+---+ +| i | s | ++---+---+ +| 3 | | +| | | ++---+---+ + +SELECT i, s FROM null_test WHERE s IS NOT NULL ORDER BY ts; + ++---+-------+ +| i | s | ++---+-------+ +| 1 | hello | +| | world | ++---+-------+ + +-- Test NULL in arithmetic +SELECT i, i + 1, i * 2, i - 5 FROM null_test ORDER BY ts; + ++---+------------------------+------------------------+------------------------+ +| i | null_test.i + Int64(1) | null_test.i * Int64(2) | null_test.i - Int64(5) | ++---+------------------------+------------------------+------------------------+ +| 1 | 2 | 2 | -4 | +| | | | | +| 3 | 4 | 6 | -2 | +| | | | | ++---+------------------------+------------------------+------------------------+ + +-- Test NULL in string operations +SELECT s, CONCAT(s, ' test'), UPPER(s), LENGTH(s) FROM null_test ORDER BY ts; + ++-------+-----------------------------------+--------------------+---------------------+ +| s | concat(null_test.s,Utf8(" test")) | upper(null_test.s) | length(null_test.s) | ++-------+-----------------------------------+--------------------+---------------------+ +| hello | hello test | HELLO | 5 | +| world | world test | WORLD | 5 | +| | test | | | +| | test | | | ++-------+-----------------------------------+--------------------+---------------------+ + +-- Test NULL with COALESCE +SELECT i, s, COALESCE(i, -1), COALESCE(s, 'missing') FROM null_test ORDER BY ts; + ++---+-------+---------------------------------+---------------------------------------+ +| i | s | coalesce(null_test.i,Int64(-1)) | coalesce(null_test.s,Utf8("missing")) | ++---+-------+---------------------------------+---------------------------------------+ +| 1 | hello | 1 | hello | +| | world | -1 | world | +| 3 | | 3 | missing | +| | | -1 | missing | ++---+-------+---------------------------------+---------------------------------------+ + +-- Test NULL in aggregates +SELECT COUNT(*), COUNT(i), COUNT(s) FROM null_test; + ++----------+--------------------+--------------------+ +| count(*) | count(null_test.i) | count(null_test.s) | ++----------+--------------------+--------------------+ +| 4 | 2 | 2 | ++----------+--------------------+--------------------+ + +SELECT SUM(i), AVG(i), MAX(i), MIN(i) FROM null_test; + ++------------------+------------------+------------------+------------------+ +| sum(null_test.i) | avg(null_test.i) | max(null_test.i) | min(null_test.i) | ++------------------+------------------+------------------+------------------+ +| 4 | 2.0 | 3 | 1 | ++------------------+------------------+------------------+------------------+ + +-- Test NULL in CASE expressions +SELECT i, s, + CASE + WHEN i IS NULL THEN 'no number' + WHEN i > 2 THEN 'big number' + ELSE 'small number' + END as category +FROM null_test ORDER BY ts; + ++---+-------+--------------+ +| i | s | category | ++---+-------+--------------+ +| 1 | hello | small number | +| | world | no number | +| 3 | | big number | +| | | no number | ++---+-------+--------------+ + +-- Test NULL in GROUP BY +SELECT i, COUNT(*) FROM null_test GROUP BY i ORDER BY i; + ++---+----------+ +| i | count(*) | ++---+----------+ +| 1 | 1 | +| 3 | 1 | +| | 2 | ++---+----------+ + +SELECT s, COUNT(*) FROM null_test GROUP BY s ORDER BY s; + ++-------+----------+ +| s | count(*) | ++-------+----------+ +| hello | 1 | +| world | 1 | +| | 2 | ++-------+----------+ + +-- Test NULLIF function +SELECT i, NULLIF(i, 1) FROM null_test ORDER BY ts; + ++---+------------------------------+ +| i | nullif(null_test.i,Int64(1)) | ++---+------------------------------+ +| 1 | | +| | | +| 3 | 3 | +| | | ++---+------------------------------+ + +SELECT s, NULLIF(s, 'hello') FROM null_test ORDER BY ts; + ++-------+-----------------------------------+ +| s | nullif(null_test.s,Utf8("hello")) | ++-------+-----------------------------------+ +| hello | | +| world | world | +| | | +| | | ++-------+-----------------------------------+ + +DROP TABLE null_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/null/null_handling.sql b/tests/cases/standalone/common/types/null/null_handling.sql new file mode 100644 index 000000000000..e0fb4607784c --- /dev/null +++ b/tests/cases/standalone/common/types/null/null_handling.sql @@ -0,0 +1,49 @@ +-- Migrated from DuckDB test: test/sql/types/null/test_null.test +-- Test NULL value handling across different contexts + +-- Test NULL in basic operations +CREATE TABLE null_test(i INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO null_test VALUES + (1, 'hello', 1000), + (NULL, 'world', 2000), + (3, NULL, 3000), + (NULL, NULL, 4000); + +-- Test NULL comparisons +SELECT i, s FROM null_test WHERE i IS NULL ORDER BY ts; +SELECT i, s FROM null_test WHERE i IS NOT NULL ORDER BY ts; +SELECT i, s FROM null_test WHERE s IS NULL ORDER BY ts; +SELECT i, s FROM null_test WHERE s IS NOT NULL ORDER BY ts; + +-- Test NULL in arithmetic +SELECT i, i + 1, i * 2, i - 5 FROM null_test ORDER BY ts; + +-- Test NULL in string operations +SELECT s, CONCAT(s, ' test'), UPPER(s), LENGTH(s) FROM null_test ORDER BY ts; + +-- Test NULL with COALESCE +SELECT i, s, COALESCE(i, -1), COALESCE(s, 'missing') FROM null_test ORDER BY ts; + +-- Test NULL in aggregates +SELECT COUNT(*), COUNT(i), COUNT(s) FROM null_test; +SELECT SUM(i), AVG(i), MAX(i), MIN(i) FROM null_test; + +-- Test NULL in CASE expressions +SELECT i, s, + CASE + WHEN i IS NULL THEN 'no number' + WHEN i > 2 THEN 'big number' + ELSE 'small number' + END as category +FROM null_test ORDER BY ts; + +-- Test NULL in GROUP BY +SELECT i, COUNT(*) FROM null_test GROUP BY i ORDER BY i; +SELECT s, COUNT(*) FROM null_test GROUP BY s ORDER BY s; + +-- Test NULLIF function +SELECT i, NULLIF(i, 1) FROM null_test ORDER BY ts; +SELECT s, NULLIF(s, 'hello') FROM null_test ORDER BY ts; + +DROP TABLE null_test; diff --git a/tests/cases/standalone/common/types/string/big_strings.result b/tests/cases/standalone/common/types/string/big_strings.result new file mode 100644 index 000000000000..a81ff17cf5af --- /dev/null +++ b/tests/cases/standalone/common/types/string/big_strings.result @@ -0,0 +1,116 @@ +-- Migrated from DuckDB test: test/sql/types/string/test_big_strings.test +-- Test handling of large strings +-- Test large string creation and manipulation +CREATE TABLE big_strings("id" INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +-- Insert strings of various sizes +INSERT INTO big_strings VALUES + (1, REPEAT('a', 100), 1000), + (2, REPEAT('Hello World! ', 50), 2000), + (3, REPEAT('Unicode 世界 ', 100), 3000), + (4, REPEAT('x', 1000), 4000); + +Affected Rows: 4 + +-- Test length of big strings +SELECT "id", LENGTH(s) FROM big_strings ORDER BY "id"; + ++----+-----------------------+ +| id | length(big_strings.s) | ++----+-----------------------+ +| 1 | 100 | +| 2 | 650 | +| 3 | 1100 | +| 4 | 1000 | ++----+-----------------------+ + +-- Test substring operations on big strings +SELECT "id", SUBSTRING(s, 1, 20) FROM big_strings ORDER BY "id"; + ++----+------------------------------------------+ +| id | substr(big_strings.s,Int64(1),Int64(20)) | ++----+------------------------------------------+ +| 1 | aaaaaaaaaaaaaaaaaaaa | +| 2 | Hello World! Hello W | +| 3 | Unicode 世界 Unicode 世 | +| 4 | xxxxxxxxxxxxxxxxxxxx | ++----+------------------------------------------+ + +SELECT "id", RIGHT(s, 10) FROM big_strings ORDER BY "id"; + ++----+--------------------------------+ +| id | right(big_strings.s,Int64(10)) | ++----+--------------------------------+ +| 1 | aaaaaaaaaa | +| 2 | lo World! | +| 3 | nicode 世界 | +| 4 | xxxxxxxxxx | ++----+--------------------------------+ + +-- Test concatenation with big strings +SELECT "id", LENGTH(s || s) FROM big_strings WHERE "id" = 1; + ++----+----------------------------------------+ +| id | length(big_strings.s || big_strings.s) | ++----+----------------------------------------+ +| 1 | 200 | ++----+----------------------------------------+ + +-- Test pattern matching on big strings +SELECT "id", s LIKE '%World%' FROM big_strings ORDER BY "id"; + ++----+------------------------------------+ +| id | big_strings.s LIKE Utf8("%World%") | ++----+------------------------------------+ +| 1 | false | +| 2 | true | +| 3 | false | +| 4 | false | ++----+------------------------------------+ + +-- Test comparison with big strings +SELECT COUNT(*) FROM big_strings WHERE s = REPEAT('a', 100); + ++----------+ +| count(*) | ++----------+ +| 1 | ++----------+ + +-- Test UPPER/LOWER on big strings +SELECT "id", LENGTH(UPPER(s)) FROM big_strings WHERE "id" <= 2 ORDER BY "id"; + ++----+------------------------------+ +| id | length(upper(big_strings.s)) | ++----+------------------------------+ +| 1 | 100 | +| 2 | 650 | ++----+------------------------------+ + +-- Test trimming big strings +CREATE TABLE padded_strings(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO padded_strings VALUES (CONCAT(' ', REPEAT('test', 100), ' '), 1000); + +Affected Rows: 1 + +SELECT LENGTH(s), LENGTH(TRIM(s)) FROM padded_strings; + ++--------------------------+---------------------------------+ +| length(padded_strings.s) | length(btrim(padded_strings.s)) | ++--------------------------+---------------------------------+ +| 406 | 400 | ++--------------------------+---------------------------------+ + +DROP TABLE big_strings; + +Affected Rows: 0 + +DROP TABLE padded_strings; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/string/big_strings.sql b/tests/cases/standalone/common/types/string/big_strings.sql new file mode 100644 index 000000000000..0c654ecb01cd --- /dev/null +++ b/tests/cases/standalone/common/types/string/big_strings.sql @@ -0,0 +1,43 @@ +-- Migrated from DuckDB test: test/sql/types/string/test_big_strings.test +-- Test handling of large strings + +-- Test large string creation and manipulation +CREATE TABLE big_strings("id" INTEGER, s VARCHAR, ts TIMESTAMP TIME INDEX); + +-- Insert strings of various sizes +INSERT INTO big_strings VALUES + (1, REPEAT('a', 100), 1000), + (2, REPEAT('Hello World! ', 50), 2000), + (3, REPEAT('Unicode 世界 ', 100), 3000), + (4, REPEAT('x', 1000), 4000); + +-- Test length of big strings +SELECT "id", LENGTH(s) FROM big_strings ORDER BY "id"; + +-- Test substring operations on big strings +SELECT "id", SUBSTRING(s, 1, 20) FROM big_strings ORDER BY "id"; + +SELECT "id", RIGHT(s, 10) FROM big_strings ORDER BY "id"; + +-- Test concatenation with big strings +SELECT "id", LENGTH(s || s) FROM big_strings WHERE "id" = 1; + +-- Test pattern matching on big strings +SELECT "id", s LIKE '%World%' FROM big_strings ORDER BY "id"; + +-- Test comparison with big strings +SELECT COUNT(*) FROM big_strings WHERE s = REPEAT('a', 100); + +-- Test UPPER/LOWER on big strings +SELECT "id", LENGTH(UPPER(s)) FROM big_strings WHERE "id" <= 2 ORDER BY "id"; + +-- Test trimming big strings +CREATE TABLE padded_strings(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO padded_strings VALUES (CONCAT(' ', REPEAT('test', 100), ' '), 1000); + +SELECT LENGTH(s), LENGTH(TRIM(s)) FROM padded_strings; + +DROP TABLE big_strings; + +DROP TABLE padded_strings; diff --git a/tests/cases/standalone/common/types/string/unicode_extended.result b/tests/cases/standalone/common/types/string/unicode_extended.result new file mode 100644 index 000000000000..6a1ad83b8579 --- /dev/null +++ b/tests/cases/standalone/common/types/string/unicode_extended.result @@ -0,0 +1,103 @@ +-- Migrated from DuckDB test: test/sql/types/string/test_unicode.test +-- Test Unicode string handling +-- Test basic Unicode strings +CREATE TABLE unicode_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO unicode_test VALUES + ('Hello 世界', 1000), + ('Ññññ', 2000), + ('🚀🎉🌟', 3000), + ('Здравствуй мир', 4000), + ('مرحبا بالعالم', 5000), + ('こんにちは世界', 6000); + +Affected Rows: 6 + +-- Test basic selection +SELECT s FROM unicode_test ORDER BY ts; + ++----------------+ +| s | ++----------------+ +| Hello 世界 | +| Ññññ | +| 🚀🎉🌟 | +| Здравствуй мир | +| مرحبا بالعالم | +| こんにちは世界 | ++----------------+ + +-- Test length function with Unicode +SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM unicode_test ORDER BY ts; + ++----------------+----+----+ +| s | a | b | ++----------------+----+----+ +| Hello 世界 | 8 | 8 | +| Ññññ | 4 | 4 | +| 🚀🎉🌟 | 3 | 3 | +| Здравствуй мир | 14 | 14 | +| مرحبا بالعالم | 13 | 13 | +| こんにちは世界 | 7 | 7 | ++----------------+----+----+ + +-- Test substring with Unicode +SELECT s, SUBSTRING(s, 1, 5) FROM unicode_test ORDER BY ts; + ++----------------+------------------------------------------+ +| s | substr(unicode_test.s,Int64(1),Int64(5)) | ++----------------+------------------------------------------+ +| Hello 世界 | Hello | +| Ññññ | Ññññ | +| 🚀🎉🌟 | 🚀🎉🌟 | +| Здравствуй мир | Здрав | +| مرحبا بالعالم | مرحبا | +| こんにちは世界 | こんにちは | ++----------------+------------------------------------------+ + +-- Test UPPER/LOWER with Unicode +SELECT s, UPPER(s), LOWER(s) FROM unicode_test WHERE s = 'Hello 世界'; + ++------------+-----------------------+-----------------------+ +| s | upper(unicode_test.s) | lower(unicode_test.s) | ++------------+-----------------------+-----------------------+ +| Hello 世界 | HELLO 世界 | hello 世界 | ++------------+-----------------------+-----------------------+ + +-- Test comparison with Unicode +SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%世界%'; + ++----------+ +| count(*) | ++----------+ +| 2 | ++----------+ + +SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%🚀%'; + ++----------+ +| count(*) | ++----------+ +| 1 | ++----------+ + +-- Test concatenation with Unicode +SELECT CONCAT(s, ' - test') FROM unicode_test ORDER BY ts; + ++----------------------------------------+ +| concat(unicode_test.s,Utf8(" - test")) | ++----------------------------------------+ +| Hello 世界 - test | +| Ññññ - test | +| 🚀🎉🌟 - test | +| Здравствуй мир - test | +| مرحبا بالعالم - test | +| こんにちは世界 - test | ++----------------------------------------+ + +DROP TABLE unicode_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/types/string/unicode_extended.sql b/tests/cases/standalone/common/types/string/unicode_extended.sql new file mode 100644 index 000000000000..3e6f47f3c811 --- /dev/null +++ b/tests/cases/standalone/common/types/string/unicode_extended.sql @@ -0,0 +1,35 @@ +-- Migrated from DuckDB test: test/sql/types/string/test_unicode.test +-- Test Unicode string handling + +-- Test basic Unicode strings +CREATE TABLE unicode_test(s VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO unicode_test VALUES + ('Hello 世界', 1000), + ('Ññññ', 2000), + ('🚀🎉🌟', 3000), + ('Здравствуй мир', 4000), + ('مرحبا بالعالم', 5000), + ('こんにちは世界', 6000); + +-- Test basic selection +SELECT s FROM unicode_test ORDER BY ts; + +-- Test length function with Unicode +SELECT s, LENGTH(s) AS a, CHAR_LENGTH(s) AS b FROM unicode_test ORDER BY ts; + +-- Test substring with Unicode +SELECT s, SUBSTRING(s, 1, 5) FROM unicode_test ORDER BY ts; + +-- Test UPPER/LOWER with Unicode +SELECT s, UPPER(s), LOWER(s) FROM unicode_test WHERE s = 'Hello 世界'; + +-- Test comparison with Unicode +SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%世界%'; + +SELECT COUNT(*) FROM unicode_test WHERE s LIKE '%🚀%'; + +-- Test concatenation with Unicode +SELECT CONCAT(s, ' - test') FROM unicode_test ORDER BY ts; + +DROP TABLE unicode_test;