Skip to content

Commit afa37a5

Browse files
Impl Array for VariantArray
1 parent 77ca6dc commit afa37a5

File tree

6 files changed

+73
-47
lines changed

6 files changed

+73
-47
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::variant_to_arrow::{
2222
PrimitiveVariantToArrowRowBuilder, make_primitive_variant_to_arrow_row_builder,
2323
};
2424
use crate::{VariantArray, VariantValueArrayBuilder};
25-
use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
25+
use arrow::array::{Array, ArrayRef, BinaryViewArray, NullBufferBuilder};
2626
use arrow::buffer::NullBuffer;
2727
use arrow::compute::CastOptions;
2828
use arrow::datatypes::{DataType, Fields};

parquet-variant-compute/src/variant_array.rs

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ impl ExtensionType for VariantType {
159159
///
160160
/// ```
161161
/// # use arrow_schema::{Schema, Field, DataType};
162+
/// # use arrow::array::Array;
162163
/// # use parquet_variant::Variant;
163164
/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
164165
/// # fn get_variant_array() -> VariantArray {
@@ -385,54 +386,74 @@ impl VariantArray {
385386
.with_extension_type(VariantType)
386387
}
387388

388-
/// Returns a new DataType representing this VariantArray's inner type
389-
pub fn data_type(&self) -> &DataType {
389+
/// Is the element at index null?
390+
pub fn is_null(&self, index: usize) -> bool {
391+
self.nulls().is_some_and(|n| n.is_null(index))
392+
}
393+
394+
/// Is the element at index valid (not null)?
395+
pub fn is_valid(&self, index: usize) -> bool {
396+
!self.is_null(index)
397+
}
398+
}
399+
400+
impl From<VariantArray> for StructArray {
401+
fn from(variant_array: VariantArray) -> Self {
402+
variant_array.into_inner()
403+
}
404+
}
405+
406+
impl Array for VariantArray {
407+
fn as_any(&self) -> &dyn std::any::Any {
408+
self
409+
}
410+
411+
fn to_data(&self) -> arrow::array::ArrayData {
412+
self.inner.to_data()
413+
}
414+
415+
fn into_data(self) -> arrow::array::ArrayData {
416+
self.inner.into_data()
417+
}
418+
419+
fn data_type(&self) -> &DataType {
390420
self.inner.data_type()
391421
}
392422

393-
pub fn slice(&self, offset: usize, length: usize) -> Self {
423+
fn slice(&self, offset: usize, length: usize) -> ArrayRef {
394424
let inner = self.inner.slice(offset, length);
395425
let metadata = self.metadata.slice(offset, length);
396426
let shredding_state = self.shredding_state.slice(offset, length);
397-
Self {
427+
428+
Arc::new(Self {
398429
inner,
399430
metadata,
400431
shredding_state,
401-
}
432+
})
402433
}
403434

404-
pub fn len(&self) -> usize {
435+
fn len(&self) -> usize {
405436
self.inner.len()
406437
}
407438

408-
pub fn is_empty(&self) -> bool {
439+
fn is_empty(&self) -> bool {
409440
self.inner.is_empty()
410441
}
411442

412-
pub fn nulls(&self) -> Option<&NullBuffer> {
413-
self.inner.nulls()
414-
}
415-
416-
/// Is the element at index null?
417-
pub fn is_null(&self, index: usize) -> bool {
418-
self.nulls().is_some_and(|n| n.is_null(index))
443+
fn offset(&self) -> usize {
444+
self.inner.offset()
419445
}
420446

421-
/// Is the element at index valid (not null)?
422-
pub fn is_valid(&self, index: usize) -> bool {
423-
!self.is_null(index)
447+
fn nulls(&self) -> Option<&NullBuffer> {
448+
self.inner.nulls()
424449
}
425-
}
426450

427-
impl From<VariantArray> for StructArray {
428-
fn from(variant_array: VariantArray) -> Self {
429-
variant_array.into_inner()
451+
fn get_buffer_memory_size(&self) -> usize {
452+
self.inner.get_buffer_memory_size()
430453
}
431-
}
432454

433-
impl From<VariantArray> for ArrayRef {
434-
fn from(variant_array: VariantArray) -> Self {
435-
Arc::new(variant_array.into_inner())
455+
fn get_array_memory_size(&self) -> usize {
456+
std::mem::size_of::<Self>() + self.inner.get_buffer_memory_size()
436457
}
437458
}
438459

parquet-variant-compute/src/variant_get.rs

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ fn shredded_get_path(
196196

197197
// If our caller did not request any specific type, we can just return whatever we landed on.
198198
let Some(as_field) = as_field else {
199-
return Ok(ArrayRef::from(target));
199+
return Ok(Arc::new(target) as ArrayRef);
200200
};
201201

202202
// Structs are special. Recurse into each field separately, hoping to follow the shredding even
@@ -314,7 +314,8 @@ mod test {
314314
fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
315315
// Create input array from JSON string
316316
let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
317-
let input_variant_array_ref = ArrayRef::from(json_to_variant(&input_array_ref).unwrap());
317+
let input_variant_array_ref =
318+
Arc::new(json_to_variant(&input_array_ref).unwrap()) as ArrayRef;
318319

319320
let result =
320321
variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
@@ -452,9 +453,9 @@ mod test {
452453
.with_field("value", Arc::new(values), true)
453454
.with_nulls(nulls)
454455
.build();
455-
ArrayRef::from(
456-
VariantArray::try_new(&struct_array).expect("should create variant array"),
457-
)
456+
457+
Arc::new(VariantArray::try_new(&struct_array).expect("should create variant array"))
458+
as ArrayRef
458459
}
459460
};
460461
}
@@ -779,9 +780,10 @@ mod test {
779780
.with_field("typed_value", Arc::new(typed_value), true)
780781
.build();
781782

782-
VariantArray::try_new(&struct_array)
783-
.expect("should create variant array")
784-
.into()
783+
let variant_array =
784+
VariantArray::try_new(&struct_array).expect("should create variant array");
785+
786+
Arc::new(variant_array) as ArrayRef
785787
}
786788
};
787789
}
@@ -1189,9 +1191,10 @@ mod test {
11891191
.with_nulls(nulls)
11901192
.build();
11911193

1192-
ArrayRef::from(
1193-
VariantArray::try_new(&struct_array).expect("should create variant array"),
1194-
)
1194+
let variant_array =
1195+
VariantArray::try_new(&struct_array).expect("should create variant array");
1196+
1197+
Arc::new(variant_array) as ArrayRef
11951198
}
11961199
};
11971200
}
@@ -1642,7 +1645,8 @@ mod test {
16421645
}
16431646
}
16441647

1645-
ArrayRef::from(builder.build())
1648+
let variant_array = builder.build();
1649+
Arc::new(variant_array) as _
16461650
}
16471651

16481652
/// Create test data for depth 1 (single nested field)
@@ -1672,7 +1676,7 @@ mod test {
16721676
}
16731677
}
16741678

1675-
ArrayRef::from(builder.build())
1679+
Arc::new(builder.build()) as _
16761680
}
16771681

16781682
/// Create test data for depth 2 (double nested field)
@@ -1713,7 +1717,7 @@ mod test {
17131717
}
17141718
}
17151719

1716-
ArrayRef::from(builder.build())
1720+
Arc::new(builder.build()) as _
17171721
}
17181722

17191723
/// Create simple shredded test data for depth 0 using a simplified working pattern
@@ -2182,7 +2186,7 @@ mod test {
21822186
cast_options: CastOptions::default(),
21832187
};
21842188

2185-
let variant_array_ref = ArrayRef::from(variant_array);
2189+
let variant_array_ref = Arc::new(variant_array) as ArrayRef;
21862190
let result = variant_get(&variant_array_ref, options).unwrap();
21872191

21882192
// Verify the result is a StructArray
@@ -2260,7 +2264,7 @@ mod test {
22602264
cast_options: CastOptions::default(),
22612265
};
22622266

2263-
let variant_array_ref = ArrayRef::from(variant_array);
2267+
let variant_array_ref = Arc::new(variant_array) as ArrayRef;
22642268
let result_nullable = variant_get(&variant_array_ref, options_nullable).unwrap();
22652269

22662270
// Verify we get an Int32Array with nulls for cast failures
@@ -2315,7 +2319,7 @@ mod test {
23152319

23162320
// Create variant array again since we moved it
23172321
let variant_array_2 = json_to_variant(&string_array).unwrap();
2318-
let variant_array_ref_2 = ArrayRef::from(variant_array_2);
2322+
let variant_array_ref_2 = Arc::new(variant_array_2) as ArrayRef;
23192323
let result_non_nullable = variant_get(&variant_array_ref_2, options_non_nullable).unwrap();
23202324
let int32_result_2 = result_non_nullable
23212325
.as_any()
@@ -2632,7 +2636,7 @@ mod test {
26322636
cast_options: CastOptions::default(),
26332637
};
26342638

2635-
let variant_array_ref = ArrayRef::from(variant_array);
2639+
let variant_array_ref = Arc::new(variant_array) as ArrayRef;
26362640
let result = variant_get(&variant_array_ref, options);
26372641

26382642
// Should fail with NotYetImplemented when the row builder tries to handle struct type

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,6 @@ impl VariantToBinaryVariantArrowRowBuilder {
469469
self.nulls.finish(),
470470
);
471471

472-
Ok(ArrayRef::from(variant_array))
472+
Ok(Arc::new(variant_array) as ArrayRef)
473473
}
474474
}

parquet/src/variant.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ mod tests {
147147
use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
148148
use crate::file::reader::ChunkReader;
149149
use arrow::util::test_util::parquet_test_data;
150-
use arrow_array::{ArrayRef, RecordBatch};
150+
use arrow_array::{Array, ArrayRef, RecordBatch};
151151
use arrow_schema::Schema;
152152
use bytes::Bytes;
153153
use parquet_variant::{Variant, VariantBuilderExt};
@@ -237,7 +237,7 @@ mod tests {
237237
fn variant_array_to_batch(array: VariantArray) -> RecordBatch {
238238
let field = array.field("data");
239239
let schema = Schema::new(vec![field]);
240-
RecordBatch::try_new(Arc::new(schema), vec![ArrayRef::from(array)]).unwrap()
240+
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array) as ArrayRef]).unwrap()
241241
}
242242

243243
/// writes a RecordBatch to memory buffer and returns the buffer

parquet/tests/variant_integration.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
//!
2424
//! Inspired by the arrow-go implementation: <https://github.com/apache/arrow-go/pull/455/files>
2525
26+
use arrow::array::Array;
2627
use arrow::util::test_util::parquet_test_data;
2728
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
2829
use parquet_variant::{Variant, VariantMetadata};

0 commit comments

Comments
 (0)