Skip to content

Commit 0a76ed6

Browse files
committed
Merge remote-tracking branch 'apache/main' into variant-value-builder
2 parents 474fa31 + 7696432 commit 0a76ed6

File tree

6 files changed

+99
-93
lines changed

6 files changed

+99
-93
lines changed

arrow-cast/src/cast/decimal.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ impl DecimalCast for i64 {
8181
}
8282

8383
fn from_f64(n: f64) -> Option<Self> {
84-
n.to_i64()
84+
// Call implementation explicitly otherwise this resolves to `to_i64`
85+
// in arrow-buffer that behaves differently.
86+
num::traits::ToPrimitive::to_i64(&n)
8587
}
8688
}
8789

arrow-cast/src/cast/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3101,6 +3101,35 @@ mod tests {
31013101
);
31023102
}
31033103

3104+
#[test]
3105+
fn test_cast_floating_to_decimals() {
3106+
for output_type in [
3107+
DataType::Decimal32(9, 3),
3108+
DataType::Decimal64(9, 3),
3109+
DataType::Decimal128(9, 3),
3110+
DataType::Decimal256(9, 3),
3111+
] {
3112+
let input_type = DataType::Float64;
3113+
assert!(can_cast_types(&input_type, &output_type));
3114+
3115+
let array = vec![Some(1.1_f64)];
3116+
let array = PrimitiveArray::<Float64Type>::from_iter(array);
3117+
let result = cast_with_options(
3118+
&array,
3119+
&output_type,
3120+
&CastOptions {
3121+
safe: false,
3122+
format_options: FormatOptions::default(),
3123+
},
3124+
);
3125+
assert!(
3126+
result.is_ok(),
3127+
"Failed to cast to {output_type} with: {}",
3128+
result.unwrap_err()
3129+
);
3130+
}
3131+
}
3132+
31043133
#[test]
31053134
fn test_cast_decimal128_to_decimal128_overflow() {
31063135
let input_type = DataType::Decimal128(38, 3);

parquet-testing

Submodule parquet-testing updated 276 files

parquet-variant-compute/src/variant_array.rs

Lines changed: 27 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ impl VariantArray {
129129
Ok(Self {
130130
inner: inner.clone(),
131131
metadata: metadata.clone(),
132-
shredding_state: ShreddingState::try_new(metadata.clone(), value, typed_value)?,
132+
shredding_state: ShreddingState::try_new(value, typed_value)?,
133133
})
134134
}
135135

@@ -154,8 +154,7 @@ impl VariantArray {
154154
// This would be a lot simpler if ShreddingState were just a pair of Option... we already
155155
// have everything we need.
156156
let inner = builder.build();
157-
let shredding_state =
158-
ShreddingState::try_new(metadata.clone(), value, typed_value).unwrap(); // valid by construction
157+
let shredding_state = ShreddingState::try_new(value, typed_value).unwrap(); // valid by construction
159158
Self {
160159
inner,
161160
metadata,
@@ -222,7 +221,7 @@ impl VariantArray {
222221
typed_value_to_variant(typed_value, index)
223222
}
224223
}
225-
ShreddingState::AllNull { .. } => {
224+
ShreddingState::AllNull => {
226225
// AllNull case: neither value nor typed_value fields exist
227226
// NOTE: This handles the case where neither value nor typed_value fields exist.
228227
// For top-level variants, this returns Variant::Null (JSON null).
@@ -325,14 +324,11 @@ impl ShreddedVariantFieldArray {
325324
.and_then(|col| col.as_binary_view_opt().cloned());
326325
let typed_value = inner_struct.column_by_name("typed_value").cloned();
327326

328-
// Use a dummy metadata for the constructor (ShreddedVariantFieldArray doesn't have metadata)
329-
let dummy_metadata = arrow::array::BinaryViewArray::new_null(inner_struct.len());
330-
331327
// Note this clone is cheap, it just bumps the ref count
332328
let inner = inner_struct.clone();
333329
Ok(Self {
334330
inner: inner.clone(),
335-
shredding_state: ShreddingState::try_new(dummy_metadata, value, typed_value)?,
331+
shredding_state: ShreddingState::try_new(value, typed_value)?,
336332
})
337333
}
338334

@@ -432,16 +428,10 @@ impl Array for ShreddedVariantFieldArray {
432428
#[derive(Debug)]
433429
pub enum ShreddingState {
434430
/// This variant has no typed_value field
435-
Unshredded {
436-
metadata: BinaryViewArray,
437-
value: BinaryViewArray,
438-
},
431+
Unshredded { value: BinaryViewArray },
439432
/// This variant has a typed_value field and no value field
440433
/// meaning it is the shredded type
441-
Typed {
442-
metadata: BinaryViewArray,
443-
typed_value: ArrayRef,
444-
},
434+
Typed { typed_value: ArrayRef },
445435
/// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
446436
/// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
447437
/// values have NULL `typed_value` and `Variant::Null` in `value`.
@@ -453,7 +443,6 @@ pub enum ShreddingState {
453443
/// the `value` is a variant object containing the subset of fields for which shredding was
454444
/// not even attempted.
455445
PartiallyShredded {
456-
metadata: BinaryViewArray,
457446
value: BinaryViewArray,
458447
typed_value: ArrayRef,
459448
},
@@ -463,38 +452,20 @@ pub enum ShreddingState {
463452
/// Note: By strict spec interpretation, this should only be valid for shredded object fields,
464453
/// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
465454
/// handling of missing data.
466-
AllNull { metadata: BinaryViewArray },
455+
AllNull,
467456
}
468457

469458
impl ShreddingState {
470459
/// try to create a new `ShreddingState` from the given fields
471460
pub fn try_new(
472-
metadata: BinaryViewArray,
473461
value: Option<BinaryViewArray>,
474462
typed_value: Option<ArrayRef>,
475463
) -> Result<Self, ArrowError> {
476-
match (metadata, value, typed_value) {
477-
(metadata, Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded {
478-
metadata,
479-
value,
480-
typed_value,
481-
}),
482-
(metadata, Some(value), None) => Ok(Self::Unshredded { metadata, value }),
483-
(metadata, None, Some(typed_value)) => Ok(Self::Typed {
484-
metadata,
485-
typed_value,
486-
}),
487-
(metadata, None, None) => Ok(Self::AllNull { metadata }),
488-
}
489-
}
490-
491-
/// Return a reference to the metadata field
492-
pub fn metadata_field(&self) -> &BinaryViewArray {
493-
match self {
494-
ShreddingState::Unshredded { metadata, .. } => metadata,
495-
ShreddingState::Typed { metadata, .. } => metadata,
496-
ShreddingState::PartiallyShredded { metadata, .. } => metadata,
497-
ShreddingState::AllNull { metadata } => metadata,
464+
match (value, typed_value) {
465+
(Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded { value, typed_value }),
466+
(Some(value), None) => Ok(Self::Unshredded { value }),
467+
(None, Some(typed_value)) => Ok(Self::Typed { typed_value }),
468+
(None, None) => Ok(Self::AllNull),
498469
}
499470
}
500471

@@ -504,7 +475,7 @@ impl ShreddingState {
504475
ShreddingState::Unshredded { value, .. } => Some(value),
505476
ShreddingState::Typed { .. } => None,
506477
ShreddingState::PartiallyShredded { value, .. } => Some(value),
507-
ShreddingState::AllNull { .. } => None,
478+
ShreddingState::AllNull => None,
508479
}
509480
}
510481

@@ -514,36 +485,26 @@ impl ShreddingState {
514485
ShreddingState::Unshredded { .. } => None,
515486
ShreddingState::Typed { typed_value, .. } => Some(typed_value),
516487
ShreddingState::PartiallyShredded { typed_value, .. } => Some(typed_value),
517-
ShreddingState::AllNull { .. } => None,
488+
ShreddingState::AllNull => None,
518489
}
519490
}
520491

521492
/// Slice all the underlying arrays
522493
pub fn slice(&self, offset: usize, length: usize) -> Self {
523494
match self {
524-
ShreddingState::Unshredded { metadata, value } => ShreddingState::Unshredded {
525-
metadata: metadata.slice(offset, length),
495+
ShreddingState::Unshredded { value } => ShreddingState::Unshredded {
526496
value: value.slice(offset, length),
527497
},
528-
ShreddingState::Typed {
529-
metadata,
530-
typed_value,
531-
} => ShreddingState::Typed {
532-
metadata: metadata.slice(offset, length),
533-
typed_value: typed_value.slice(offset, length),
534-
},
535-
ShreddingState::PartiallyShredded {
536-
metadata,
537-
value,
538-
typed_value,
539-
} => ShreddingState::PartiallyShredded {
540-
metadata: metadata.slice(offset, length),
541-
value: value.slice(offset, length),
498+
ShreddingState::Typed { typed_value } => ShreddingState::Typed {
542499
typed_value: typed_value.slice(offset, length),
543500
},
544-
ShreddingState::AllNull { metadata } => ShreddingState::AllNull {
545-
metadata: metadata.slice(offset, length),
546-
},
501+
ShreddingState::PartiallyShredded { value, typed_value } => {
502+
ShreddingState::PartiallyShredded {
503+
value: value.slice(offset, length),
504+
typed_value: typed_value.slice(offset, length),
505+
}
506+
}
507+
ShreddingState::AllNull => ShreddingState::AllNull,
547508
}
548509
}
549510
}
@@ -744,7 +705,7 @@ mod test {
744705
// Verify the shredding state is AllNull
745706
assert!(matches!(
746707
variant_array.shredding_state(),
747-
ShreddingState::AllNull { .. }
708+
ShreddingState::AllNull
748709
));
749710

750711
// Verify that value() returns Variant::Null (compensating for spec violation)
@@ -801,17 +762,10 @@ mod test {
801762

802763
#[test]
803764
fn all_null_shredding_state() {
804-
let metadata = BinaryViewArray::from(vec![b"test" as &[u8]]);
805-
let shredding_state = ShreddingState::try_new(metadata.clone(), None, None).unwrap();
765+
let shredding_state = ShreddingState::try_new(None, None).unwrap();
806766

807767
// Verify the shredding state is AllNull
808-
assert!(matches!(shredding_state, ShreddingState::AllNull { .. }));
809-
810-
// Verify metadata is preserved correctly
811-
if let ShreddingState::AllNull { metadata: m } = shredding_state {
812-
assert_eq!(m.len(), metadata.len());
813-
assert_eq!(m.value(0), metadata.value(0));
814-
}
768+
assert!(matches!(shredding_state, ShreddingState::AllNull));
815769
}
816770

817771
#[test]
@@ -827,7 +781,7 @@ mod test {
827781
// Verify the shredding state is AllNull
828782
assert!(matches!(
829783
variant_array.shredding_state(),
830-
ShreddingState::AllNull { .. }
784+
ShreddingState::AllNull
831785
));
832786

833787
// Verify all values are null

parquet-variant-compute/src/variant_get.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ fn shredded_get_path(
135135
let shred_basic_variant =
136136
|target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
137137
let as_type = as_field.map(|f| f.data_type());
138-
let mut builder = make_variant_to_arrow_row_builder(path, as_type, cast_options)?;
138+
let mut builder =
139+
make_variant_to_arrow_row_builder(path, as_type, cast_options, target.len())?;
139140
for i in 0..target.len() {
140141
if target.is_null(i) {
141142
builder.append_null()?;

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ impl<'a> VariantToArrowRowBuilder<'a> {
7676
}
7777
}
7878

79-
pub fn finish(&mut self) -> Result<ArrayRef> {
79+
pub fn finish(self) -> Result<ArrayRef> {
8080
use VariantToArrowRowBuilder::*;
8181
match self {
8282
Int8(b) => b.finish(),
@@ -97,19 +97,41 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>(
9797
path: VariantPath<'a>,
9898
data_type: Option<&'a DataType>,
9999
cast_options: &'a CastOptions,
100+
capacity: usize,
100101
) -> Result<VariantToArrowRowBuilder<'a>> {
101102
use VariantToArrowRowBuilder::*;
102103

103104
let mut builder = match data_type {
104105
// If no data type was requested, build an unshredded VariantArray.
105-
None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(16)),
106-
Some(DataType::Int8) => Int8(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
107-
Some(DataType::Int16) => Int16(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
108-
Some(DataType::Int32) => Int32(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
109-
Some(DataType::Int64) => Int64(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
110-
Some(DataType::Float16) => Float16(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
111-
Some(DataType::Float32) => Float32(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
112-
Some(DataType::Float64) => Float64(VariantToPrimitiveArrowRowBuilder::new(cast_options)),
106+
None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(capacity)),
107+
Some(DataType::Int8) => Int8(VariantToPrimitiveArrowRowBuilder::new(
108+
cast_options,
109+
capacity,
110+
)),
111+
Some(DataType::Int16) => Int16(VariantToPrimitiveArrowRowBuilder::new(
112+
cast_options,
113+
capacity,
114+
)),
115+
Some(DataType::Int32) => Int32(VariantToPrimitiveArrowRowBuilder::new(
116+
cast_options,
117+
capacity,
118+
)),
119+
Some(DataType::Int64) => Int64(VariantToPrimitiveArrowRowBuilder::new(
120+
cast_options,
121+
capacity,
122+
)),
123+
Some(DataType::Float16) => Float16(VariantToPrimitiveArrowRowBuilder::new(
124+
cast_options,
125+
capacity,
126+
)),
127+
Some(DataType::Float32) => Float32(VariantToPrimitiveArrowRowBuilder::new(
128+
cast_options,
129+
capacity,
130+
)),
131+
Some(DataType::Float64) => Float64(VariantToPrimitiveArrowRowBuilder::new(
132+
cast_options,
133+
capacity,
134+
)),
113135
_ => {
114136
return Err(ArrowError::NotYetImplemented(format!(
115137
"variant_get with path={:?} and data_type={:?} not yet implemented",
@@ -150,7 +172,7 @@ impl<'a> VariantPathRowBuilder<'a> {
150172
}
151173
}
152174

153-
fn finish(&mut self) -> Result<ArrayRef> {
175+
fn finish(self) -> Result<ArrayRef> {
154176
self.builder.finish()
155177
}
156178
}
@@ -180,9 +202,9 @@ pub(crate) struct VariantToPrimitiveArrowRowBuilder<'a, T: ArrowPrimitiveType> {
180202
}
181203

182204
impl<'a, T: ArrowPrimitiveType> VariantToPrimitiveArrowRowBuilder<'a, T> {
183-
fn new(cast_options: &'a CastOptions<'a>) -> Self {
205+
fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
184206
Self {
185-
builder: PrimitiveBuilder::<T>::new(),
207+
builder: PrimitiveBuilder::<T>::with_capacity(capacity),
186208
cast_options,
187209
}
188210
}
@@ -217,7 +239,7 @@ where
217239
}
218240
}
219241

220-
fn finish(&mut self) -> Result<ArrayRef> {
242+
fn finish(mut self) -> Result<ArrayRef> {
221243
Ok(Arc::new(self.builder.finish()))
222244
}
223245
}
@@ -253,9 +275,7 @@ impl VariantToBinaryVariantArrowRowBuilder {
253275
Ok(true)
254276
}
255277

256-
fn finish(&mut self) -> Result<ArrayRef> {
257-
// VariantArrayBuilder::build takes ownership, so we need to replace it
258-
let builder = std::mem::replace(&mut self.builder, VariantArrayBuilder::new(0));
259-
Ok(Arc::new(builder.build()))
278+
fn finish(self) -> Result<ArrayRef> {
279+
Ok(Arc::new(self.builder.build()))
260280
}
261281
}

0 commit comments

Comments
 (0)