Skip to content

Commit e003775

Browse files
committed
fixup! [Variant] Support typed access for timestamp(micro&nano)
1 parent 0748c0e commit e003775

File tree

3 files changed

+156
-83
lines changed

3 files changed

+156
-83
lines changed

parquet-variant-compute/src/variant_array.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -633,27 +633,40 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: usize) -> Variant<'_, '
633633
DataType::Float64 => {
634634
primitive_conversion_single_value!(Float64Type, typed_value, index)
635635
}
636-
DataType::Timestamp(timeunit, _) => {
637-
match timeunit {
638-
TimeUnit::Microsecond => {
636+
DataType::Timestamp(timeunit, tz) => {
637+
match (timeunit, tz) {
638+
(TimeUnit::Microsecond, Some(_)) => {
639639
generic_conversion_single_value!(
640640
TimestampMicrosecondType,
641641
as_primitive,
642-
|v| DateTime::from_timestamp(v, 0).unwrap(),
642+
|v| DateTime::from_timestamp_micros(v).unwrap(),
643643
typed_value,
644644
index
645645
)
646646
}
647-
TimeUnit::Nanosecond => {
647+
(TimeUnit::Microsecond, None) => {
648+
generic_conversion_single_value!(
649+
TimestampMicrosecondType,
650+
as_primitive,
651+
|v| DateTime::from_timestamp_micros(v).unwrap().naive_utc(),
652+
typed_value,
653+
index
654+
)
655+
}
656+
(TimeUnit::Nanosecond, Some(_)) => {
657+
generic_conversion_single_value!(
658+
TimestampNanosecondType,
659+
as_primitive,
660+
DateTime::from_timestamp_nanos,
661+
typed_value,
662+
index
663+
)
664+
}
665+
(TimeUnit::Nanosecond, None) => {
648666
generic_conversion_single_value!(
649667
TimestampNanosecondType,
650668
as_primitive,
651-
|v| {
652-
// make nano positive
653-
let nano = ((v % 1000) + 1000i64) as u32 % 1000;
654-
let sec = (v - nano as i64) / 1000;
655-
DateTime::from_timestamp(sec, nano).unwrap()
656-
},
669+
|v| DateTime::from_timestamp_nanos(v).naive_utc(),
657670
typed_value,
658671
index
659672
)

parquet-variant-compute/src/variant_get.rs

Lines changed: 124 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub(crate) enum ShreddedPathStep<'a> {
3333
/// Path step succeeded, return the new shredding state
3434
Success(&'a ShreddingState),
3535
/// The path element is not present in the `typed_value` column and there is no `value` column,
36-
/// so we we know it does not exist. It, and all paths under it, are all-NULL.
36+
/// so we know it does not exist. It, and all paths under it, are all-NULL.
3737
Missing,
3838
/// The path element is not present in the `typed_value` column and must be retrieved from the `value`
3939
/// column instead. The caller should be prepared to handle any value, including the requested
@@ -930,68 +930,154 @@ mod test {
930930
f64
931931
);
932932

933+
macro_rules! assert_variant_get_as_variant_array_with_default_option {
934+
($variant_array: expr, $array_expected: expr) => {{
935+
let options = GetOptions::new();
936+
let array = $variant_array;
937+
let result = variant_get(&array, options).unwrap();
938+
939+
// expect the result is a VariantArray
940+
let result: &VariantArray = result.as_any().downcast_ref().unwrap();
941+
942+
assert_eq!(result.len(), $array_expected.len());
943+
944+
for (idx, item) in $array_expected.into_iter().enumerate() {
945+
match item {
946+
Some(item) => assert_eq!(result.value(idx), item),
947+
None => assert!(result.is_null(idx)),
948+
}
949+
}
950+
}};
951+
}
952+
953+
partially_shredded_variant_array_gen!(
954+
partially_shredded_timestamp_micro_ntz_variant_array,
955+
|| {
956+
arrow::array::TimestampMicrosecondArray::from(vec![
957+
Some(-456000),
958+
None,
959+
None,
960+
Some(1758602096000000),
961+
])
962+
}
963+
);
964+
965+
#[test]
966+
fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
967+
let array = partially_shredded_timestamp_micro_ntz_variant_array();
968+
assert_variant_get_as_variant_array_with_default_option!(
969+
array,
970+
vec![
971+
Some(Variant::from(
972+
DateTime::from_timestamp_micros(-456000i64)
973+
.unwrap()
974+
.naive_utc(),
975+
)),
976+
None,
977+
Some(Variant::from("n/a")),
978+
Some(Variant::from(
979+
DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
980+
.unwrap()
981+
.naive_utc(),
982+
)),
983+
]
984+
)
985+
}
986+
933987
partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
934988
arrow::array::TimestampMicrosecondArray::from(vec![
935-
Some(-456),
989+
Some(-456000),
936990
None,
937991
None,
938-
Some(1758375670),
992+
Some(1758602096000000),
939993
])
994+
.with_timezone("+00:00")
940995
});
941996

942997
#[test]
943998
fn get_variant_partial_shredded_timestamp_micro_as_variant() {
944999
let array = partially_shredded_timestamp_micro_variant_array();
945-
let options = GetOptions::new();
946-
let result = variant_get(&array, options).unwrap();
1000+
assert_variant_get_as_variant_array_with_default_option!(
1001+
array,
1002+
vec![
1003+
Some(Variant::from(
1004+
DateTime::from_timestamp_micros(-456000i64)
1005+
.unwrap()
1006+
.to_utc(),
1007+
)),
1008+
None,
1009+
Some(Variant::from("n/a")),
1010+
Some(Variant::from(
1011+
DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1012+
.unwrap()
1013+
.to_utc(),
1014+
)),
1015+
]
1016+
)
1017+
}
9471018

948-
// expect the result is a VariantArray
949-
let result: &VariantArray = result.as_any().downcast_ref().unwrap();
950-
assert_eq!(result.len(), 4);
1019+
partially_shredded_variant_array_gen!(
1020+
partially_shredded_timestamp_nano_ntz_variant_array,
1021+
|| {
1022+
arrow::array::TimestampNanosecondArray::from(vec![
1023+
Some(-4999999561),
1024+
None,
1025+
None,
1026+
Some(1758602096000000000),
1027+
])
1028+
}
1029+
);
9511030

952-
// Expect the values are the same as the original values
953-
assert_eq!(
954-
result.value(0),
955-
Variant::from(DateTime::from_timestamp(-456i64, 0).unwrap())
956-
);
957-
assert!(result.is_null(1));
958-
assert_eq!(result.value(2), Variant::from("n/a"));
959-
assert_eq!(
960-
result.value(3),
961-
Variant::from(DateTime::from_timestamp(1758375670i64, 0).unwrap())
962-
);
1031+
#[test]
1032+
fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
1033+
let array = partially_shredded_timestamp_nano_ntz_variant_array();
1034+
1035+
assert_variant_get_as_variant_array_with_default_option!(
1036+
array,
1037+
vec![
1038+
Some(Variant::from(
1039+
DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
1040+
)),
1041+
None,
1042+
Some(Variant::from("n/a")),
1043+
Some(Variant::from(
1044+
DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1045+
.unwrap()
1046+
.naive_utc()
1047+
)),
1048+
]
1049+
)
9631050
}
9641051

9651052
partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
9661053
arrow::array::TimestampNanosecondArray::from(vec![
967-
Some(-4561),
1054+
Some(-4999999561),
9681055
None,
9691056
None,
970-
Some(1758375670123),
1057+
Some(1758602096000000000),
9711058
])
1059+
.with_timezone("+00:00")
9721060
});
9731061

9741062
#[test]
9751063
fn get_variant_partial_shredded_timestamp_nano_as_variant() {
9761064
let array = partially_shredded_timestamp_nano_variant_array();
977-
let options = GetOptions::new();
978-
let result = variant_get(&array, options).unwrap();
9791065

980-
// expect the result is a VariantArray
981-
let result: &VariantArray = result.as_any().downcast_ref().unwrap();
982-
assert_eq!(result.len(), 4);
983-
984-
// Expect the values are the same as the original values
985-
assert_eq!(
986-
result.value(0),
987-
Variant::from(DateTime::from_timestamp(-5i64, 439).unwrap())
988-
);
989-
assert!(result.is_null(1));
990-
assert_eq!(result.value(2), Variant::from("n/a"));
991-
assert_eq!(
992-
result.value(3),
993-
Variant::from(DateTime::from_timestamp(1758375670, 123).unwrap())
994-
);
1066+
assert_variant_get_as_variant_array_with_default_option!(
1067+
array,
1068+
vec![
1069+
Some(Variant::from(
1070+
DateTime::from_timestamp(-5, 439).unwrap().to_utc()
1071+
)),
1072+
None,
1073+
Some(Variant::from("n/a")),
1074+
Some(Variant::from(
1075+
DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
1076+
.unwrap()
1077+
.to_utc()
1078+
)),
1079+
]
1080+
)
9951081
}
9961082

9971083
/// Return a VariantArray that represents a normal "shredded" variant

parquet/tests/variant_integration.rs

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -94,23 +94,10 @@ variant_test_case!(16);
9494
variant_test_case!(17);
9595
variant_test_case!(18);
9696
variant_test_case!(19);
97-
// https://github.com/apache/arrow-rs/issues/8331
98-
variant_test_case!(
99-
20,
100-
"Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
101-
);
102-
variant_test_case!(
103-
21,
104-
"Unsupported typed_value type: Timestamp(Microsecond, Some(\"UTC\"))"
105-
);
106-
variant_test_case!(
107-
22,
108-
"Unsupported typed_value type: Timestamp(Microsecond, None)"
109-
);
110-
variant_test_case!(
111-
23,
112-
"Unsupported typed_value type: Timestamp(Microsecond, None)"
113-
);
97+
variant_test_case!(20);
98+
variant_test_case!(21);
99+
variant_test_case!(22);
100+
variant_test_case!(23);
114101
// https://github.com/apache/arrow-rs/issues/8332
115102
variant_test_case!(24, "Unsupported typed_value type: Decimal128(9, 4)");
116103
variant_test_case!(25, "Unsupported typed_value type: Decimal128(9, 4)");
@@ -122,23 +109,10 @@ variant_test_case!(30);
122109
variant_test_case!(31);
123110
// https://github.com/apache/arrow-rs/issues/8334
124111
variant_test_case!(32, "Unsupported typed_value type: Time64(Microsecond)");
125-
// https://github.com/apache/arrow-rs/issues/8331
126-
variant_test_case!(
127-
33,
128-
"Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
129-
);
130-
variant_test_case!(
131-
34,
132-
"Unsupported typed_value type: Timestamp(Nanosecond, Some(\"UTC\"))"
133-
);
134-
variant_test_case!(
135-
35,
136-
"Unsupported typed_value type: Timestamp(Nanosecond, None)"
137-
);
138-
variant_test_case!(
139-
36,
140-
"Unsupported typed_value type: Timestamp(Nanosecond, None)"
141-
);
112+
variant_test_case!(33);
113+
variant_test_case!(34);
114+
variant_test_case!(35);
115+
variant_test_case!(36);
142116
variant_test_case!(37);
143117
// https://github.com/apache/arrow-rs/issues/8336
144118
variant_test_case!(38, "Unsupported typed_value type: Struct(");

0 commit comments

Comments
 (0)