Skip to content

Commit 010d0e7

Browse files
alambscovichmbrobbel
authored
Add Arrow Variant Extension Type, remove Array impl for VariantArray and ShreddedVariantFieldArray (#8392)
# Which issue does this PR close? - closes #8319 - closes #8296 # Rationale for this change This is needed to [read/write the Variant Parquet logical type](#8370) and work with the rest of the Arrow Ecosystem Note, this is broken out the larger PR here: - from #8365 We need a way to write Variant encoded data to/from parquet, and the current way the VariantArray is implemented doesn't work (panics when writing to parquet). See tickets above Instead of a `impl Array` it seems the better way to do this is using an Arrow Extension Type. See #8319 (comment) for more details # What changes are included in this PR? 1. remove the `Array` impl for `VariantArray`, which forces explict conversions back/forth when reading/writing 2. remove the `Array` impl for `ShreddedVariantFieldArray`, which forces explicit conversions back/forth when reading/writing 3. Add `VariantType` extension type # Are these changes tested? Yes, with new code and tests # Are there any user-facing changes? Yes, but this is not yet stable / released, so these changes have no impact on the releasability of this code --------- Co-authored-by: Ryan Johnson <[email protected]> Co-authored-by: Matthijs Brobbel <[email protected]>
1 parent 28ac449 commit 010d0e7

File tree

8 files changed

+472
-318
lines changed

8 files changed

+472
-318
lines changed

parquet-variant-compute/benches/variant_kernels.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
8484

8585
pub fn variant_get_bench(c: &mut Criterion) {
8686
let variant_array = create_primitive_variant_array(8192);
87-
let input: ArrayRef = Arc::new(variant_array);
87+
let input = ArrayRef::from(variant_array);
8888

8989
let options = GetOptions {
9090
path: vec![].into(),

parquet-variant-compute/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ mod variant_array_builder;
4646
pub mod variant_get;
4747
mod variant_to_arrow;
4848

49-
pub use variant_array::{ShreddingState, VariantArray};
49+
pub use variant_array::{ShreddingState, VariantArray, VariantType};
5050
pub use variant_array_builder::{VariantArrayBuilder, VariantValueArrayBuilder};
5151

5252
pub use cast_to_variant::{cast_to_variant, cast_to_variant_with_options};

parquet-variant-compute/src/shred_variant.rs

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::variant_to_arrow::{
2222
make_primitive_variant_to_arrow_row_builder, PrimitiveVariantToArrowRowBuilder,
2323
};
2424
use crate::{VariantArray, VariantValueArrayBuilder};
25-
use arrow::array::{Array as _, ArrayRef, BinaryViewArray, NullBufferBuilder};
25+
use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
2626
use arrow::buffer::NullBuffer;
2727
use arrow::compute::CastOptions;
2828
use arrow::datatypes::{DataType, Fields};
@@ -310,7 +310,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
310310
let (value, typed_value, nulls) = typed_value_builder.finish()?;
311311
let array =
312312
ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
313-
builder = builder.with_field(field_name, Arc::new(array), false);
313+
builder = builder.with_field(field_name, ArrayRef::from(array), false);
314314
}
315315
if let Some(nulls) = self.typed_value_nulls.finish() {
316316
builder = builder.with_nulls(nulls);
@@ -327,7 +327,7 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
327327
mod tests {
328328
use super::*;
329329
use crate::VariantArrayBuilder;
330-
use arrow::array::{Float64Array, Int64Array};
330+
use arrow::array::{Array, Float64Array, Int64Array};
331331
use arrow::datatypes::{DataType, Field, Fields};
332332
use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt as _};
333333
use std::sync::Arc;
@@ -556,18 +556,11 @@ mod tests {
556556
.unwrap();
557557

558558
// Extract score and age fields from typed_value struct
559-
let score_field = typed_value
560-
.column_by_name("score")
561-
.unwrap()
562-
.as_any()
563-
.downcast_ref::<crate::variant_array::ShreddedVariantFieldArray>()
564-
.unwrap();
565-
let age_field = typed_value
566-
.column_by_name("age")
567-
.unwrap()
568-
.as_any()
569-
.downcast_ref::<crate::variant_array::ShreddedVariantFieldArray>()
570-
.unwrap();
559+
let score_field =
560+
ShreddedVariantFieldArray::try_new(typed_value.column_by_name("score").unwrap())
561+
.unwrap();
562+
let age_field =
563+
ShreddedVariantFieldArray::try_new(typed_value.column_by_name("age").unwrap()).unwrap();
571564

572565
let score_value = score_field
573566
.value_field()

0 commit comments

Comments
 (0)