From 80d89460475e599a0b76b0531fe64838efc45326 Mon Sep 17 00:00:00 2001 From: Shiv Bhatia Date: Thu, 25 Sep 2025 15:43:04 +0100 Subject: [PATCH 1/4] Extend datatype semantic equality to include timestamps --- datafusion/common/src/dfschema.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 7c481f66d9a8..a9a0dfc431e7 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -747,7 +747,8 @@ impl DFSchema { } /// Returns true of two [`DataType`]s are semantically equal (same - /// name and type), ignoring both metadata and nullability, and decimal precision/scale. + /// name and type), ignoring both metadata and nullability, decimal precision/scale, + /// and timezone time units/timezones. /// /// request to upstream: pub fn datatype_is_semantically_equal(dt1: &DataType, dt2: &DataType) -> bool { @@ -814,6 +815,10 @@ impl DFSchema { DataType::Decimal256(_l_precision, _l_scale), DataType::Decimal256(_r_precision, _r_scale), ) => true, + ( + DataType::Timestamp(_l_time_unit, _l_timezone), + DataType::Timestamp(_r_time_unit, _r_timezone), + ) => true, _ => dt1 == dt2, } } From c80e96b4ea93f6044c88278fad1f9d0e7b0f647a Mon Sep 17 00:00:00 2001 From: Shiv Bhatia Date: Thu, 25 Sep 2025 17:29:43 +0100 Subject: [PATCH 2/4] test --- datafusion/common/src/dfschema.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index a9a0dfc431e7..1244b65999f3 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1332,6 +1332,7 @@ pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String #[cfg(test)] mod tests { + use arrow::datatypes::TimeUnit; use crate::assert_contains; use super::*; @@ -1816,6 +1817,12 @@ mod tests { &DataType::Decimal256(2, 1), )); + // Any two timestamp types should match + assert!(DFSchema::datatype_is_semantically_equal( + &DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + &DataType::Timestamp(TimeUnit::Millisecond, None), + )); + // Test lists // Succeeds if both have the same element type, disregards names and nullability @@ -2111,7 +2118,7 @@ mod tests { Field::new( "timestamp_field", DataType::Timestamp( - arrow::datatypes::TimeUnit::Microsecond, + TimeUnit::Microsecond, Some("UTC".into()), ), false, @@ -2407,12 +2414,12 @@ mod tests { Field::new("date64", DataType::Date64, false), Field::new( "time32_seconds", - DataType::Time32(arrow::datatypes::TimeUnit::Second), + DataType::Time32(TimeUnit::Second), true, ), Field::new( "time64_nanoseconds", - DataType::Time64(arrow::datatypes::TimeUnit::Nanosecond), + DataType::Time64(TimeUnit::Nanosecond), false, ), ] From 1acefca6f53582ca025c3f335aa40e6bc63d54c9 Mon Sep 17 00:00:00 2001 From: Shiv Bhatia Date: Fri, 26 Sep 2025 11:44:24 +0100 Subject: [PATCH 3/4] Respond to comments --- datafusion/common/src/dfschema.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 1244b65999f3..2e0b1aa657af 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1332,7 +1332,6 @@ pub fn qualified_name(qualifier: Option<&TableReference>, name: &str) -> String #[cfg(test)] mod tests { - use arrow::datatypes::TimeUnit; use crate::assert_contains; use super::*; @@ -1819,8 +1818,11 @@ mod tests { // Any two timestamp types should match assert!(DFSchema::datatype_is_semantically_equal( - &DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), - &DataType::Timestamp(TimeUnit::Millisecond, None), + &DataType::Timestamp( + arrow::datatypes::TimeUnit::Microsecond, + Some("UTC".into()) + ), + &DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None), )); // Test lists @@ -2117,10 +2119,7 @@ mod tests { map_field, Field::new( "timestamp_field", - DataType::Timestamp( - TimeUnit::Microsecond, - Some("UTC".into()), - ), + DataType::Timestamp(arrow::datatypes::TimeUnit::Microsecond, Some("UTC".into())), false, ), ] @@ -2412,14 +2411,10 @@ mod tests { Field::new("decimal256", DataType::Decimal256(38, 10), false), Field::new("date32", DataType::Date32, true), Field::new("date64", DataType::Date64, false), - Field::new( - "time32_seconds", - DataType::Time32(TimeUnit::Second), - true, - ), + Field::new("time32_seconds", DataType::Time32(arrow::datatypes::TimeUnit::Second), true), Field::new( "time64_nanoseconds", - DataType::Time64(TimeUnit::Nanosecond), + DataType::Time64(arrow::datatypes::TimeUnit::Nanosecond), false, ), ] From d73620061659e3c98066a1e6fdacb01b8a5ed7a2 Mon Sep 17 00:00:00 2001 From: Shiv Bhatia Date: Fri, 26 Sep 2025 11:47:31 +0100 Subject: [PATCH 4/4] cargo fmt --- datafusion/common/src/dfschema.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 2e0b1aa657af..b195b1d4a184 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -2119,7 +2119,10 @@ mod tests { map_field, Field::new( "timestamp_field", - DataType::Timestamp(arrow::datatypes::TimeUnit::Microsecond, Some("UTC".into())), + DataType::Timestamp( + arrow::datatypes::TimeUnit::Microsecond, + Some("UTC".into()), + ), false, ), ] @@ -2411,7 +2414,11 @@ mod tests { Field::new("decimal256", DataType::Decimal256(38, 10), false), Field::new("date32", DataType::Date32, true), Field::new("date64", DataType::Date64, false), - Field::new("time32_seconds", DataType::Time32(arrow::datatypes::TimeUnit::Second), true), + Field::new( + "time32_seconds", + DataType::Time32(arrow::datatypes::TimeUnit::Second), + true, + ), Field::new( "time64_nanoseconds", DataType::Time64(arrow::datatypes::TimeUnit::Nanosecond),