-
Notifications
You must be signed in to change notification settings - Fork 1k
[WIP] Support Shredded Lists/Array in variant_get
#8354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
9c25cc4
ed961a4
03ecb95
158d6d7
d53c831
174e429
69de7d7
cc6d787
8f6ad1b
bc8abd9
c0d2065
85aaa3f
40b6311
f6e88ef
61ed178
1fb612d
2b6d280
398b52d
defa07b
5022acd
ed66007
196b5d4
642d192
76b3c80
35785d6
5914218
216d401
3aa6cf3
15fc8be
04b9941
857f0e2
1edfeca
6d6793d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1010,7 +1010,101 @@ mod test { | |
| let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)])); | ||
| assert_eq!(&result, &expected); | ||
| } | ||
| /// This test manually constructs a shredded variant array representing lists | ||
| /// like ["comedy", "drama"], ["horror", null] and ["comedy", "drama", "romance"] | ||
| /// as VariantArray using variant_get. | ||
| #[test] | ||
| fn test_shredded_list_field_access() { | ||
| let array = shredded_list_variant_array(); | ||
|
|
||
| // Test: Extract the 0 index field as VariantArray first | ||
| let options = GetOptions::new_with_path(VariantPath::from(0)); | ||
| let result = variant_get(&array, options).unwrap(); | ||
|
|
||
| let result_variant: &VariantArray = result.as_any().downcast_ref().unwrap(); | ||
| assert_eq!(result_variant.len(), 3); | ||
|
|
||
| // Row 0: expect 0 index = "comedy" | ||
| assert_eq!(result_variant.value(0), Variant::String("comedy")); | ||
| // Row 1: expect 0 index = "horror" | ||
| assert_eq!(result_variant.value(1), Variant::String("horror")); | ||
| // Row 2: expect 0 index = "comedy" | ||
| assert_eq!(result_variant.value(2), Variant::String("comedy")); | ||
| } | ||
| /// Test extracting shredded list field with type conversion | ||
| #[test] | ||
| fn test_shredded_list_as_string() { | ||
| let array = shredded_list_variant_array(); | ||
|
|
||
| // Test: Extract the 0 index values as StringArray (type conversion) | ||
| let field = Field::new("typed_value", DataType::Utf8, false); | ||
| let options = GetOptions::new_with_path(VariantPath::from(0)) | ||
| .with_as_type(Some(FieldRef::from(field))); | ||
| let result = variant_get(&array, options).unwrap(); | ||
|
|
||
| // Should get StringArray | ||
| let expected: ArrayRef = Arc::new(StringArray::from(vec![Some("comedy"), Some("drama")])); | ||
| assert_eq!(&result, &expected); | ||
| } | ||
| /// Helper function to create a shredded variant array representing lists | ||
| /// | ||
| /// This creates an array that represents: | ||
| /// Row 0: ["comedy", "drama"] ([0] is shredded, [1] is shredded - perfectly shredded) | ||
| /// Row 1: ["horror", null] ([0] is shredded, [1] is binary null - partially shredded) | ||
| /// Row 2: ["comedy", "drama", "romance"] (perfectly shredded) | ||
sdf-jkl marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| /// | ||
| /// The physical layout follows the shredding spec where: | ||
| /// - metadata: contains list metadata | ||
| /// - typed_value: StructArray with 0 index value | ||
| /// - value: contains fallback for | ||
| fn shredded_list_variant_array() -> ArrayRef { | ||
| // Create the base metadata for lists | ||
|
|
||
| // Could add this as an api for VariantList, like VariantList::from() | ||
| fn build_list_metadata(vector: Vec<Variant>) -> (Vec<u8>, Vec<u8>) { | ||
| let mut builder = parquet_variant::VariantBuilder::new(); | ||
| let mut list = builder.new_list(); | ||
| for value in vector { | ||
| list.append_value(value); | ||
| } | ||
| list.finish(); | ||
| builder.finish() | ||
| } | ||
| let (metadata1, _) = | ||
| build_list_metadata(vec![Variant::String("comedy"), Variant::String("drama")]); | ||
sdf-jkl marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| let (metadata2, _) = build_list_metadata(vec![Variant::String("horror"), Variant::Null]); | ||
|
|
||
| let (metadata3, _) = build_list_metadata(vec![ | ||
| Variant::String("comedy"), | ||
| Variant::String("drama"), | ||
| Variant::String("romance"), | ||
| ]); | ||
|
|
||
| // Create metadata array | ||
| let metadata_array = | ||
| BinaryViewArray::from_iter_values(vec![metadata1, metadata2, metadata3]); | ||
|
|
||
| // Create the untyped value array | ||
| let value_array = BinaryViewArray::from(vec![Variant::Null.as_u8_slice()]); | ||
| // Maybe I should try with an actual primitive array | ||
| let typed_value_array = StringArray::from(vec![ | ||
| Some("comedy"), | ||
| Some("drama"), | ||
| Some("horror"), | ||
| Some("comedy"), | ||
| Some("drama"), | ||
| Some("romance"), | ||
| ]); | ||
| // Build the main VariantArray | ||
| let main_struct = crate::variant_array::StructArrayBuilder::new() | ||
| .with_field("metadata", Arc::new(metadata_array)) | ||
| .with_field("value", Arc::new(value_array)) | ||
| .with_field("typed_value", Arc::new(typed_value_array)) | ||
|
||
| .build(); | ||
|
|
||
| Arc::new(VariantArray::try_new(Arc::new(main_struct)).expect("should create variant array")) | ||
| } | ||
| /// Helper function to create a shredded variant array representing objects | ||
| /// | ||
| /// This creates an array that represents: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.