|
18 | 18 | use std::collections::HashMap; |
19 | 19 | use std::sync::Arc; |
20 | 20 |
|
| 21 | +use crate::arrow::schema::extension::add_extension_type; |
21 | 22 | use crate::arrow::schema::primitive::convert_primitive; |
22 | 23 | use crate::arrow::{ProjectionMask, PARQUET_FIELD_ID_META_KEY}; |
23 | 24 | use crate::basic::{ConvertedType, Repetition}; |
@@ -172,7 +173,7 @@ impl Visitor { |
172 | 173 |
|
173 | 174 | let parquet_fields = struct_type.get_fields(); |
174 | 175 |
|
175 | | - // Extract the arrow fields |
| 176 | + // Extract any arrow fields from the hints |
176 | 177 | let arrow_fields = match &context.data_type { |
177 | 178 | Some(DataType::Struct(fields)) => { |
178 | 179 | if fields.len() != parquet_fields.len() { |
@@ -220,10 +221,10 @@ impl Visitor { |
220 | 221 | data_type, |
221 | 222 | }; |
222 | 223 |
|
223 | | - if let Some(child) = self.dispatch(parquet_field, child_ctx)? { |
| 224 | + if let Some(mut child) = self.dispatch(parquet_field, child_ctx)? { |
224 | 225 | // The child type returned may be different from what is encoded in the arrow |
225 | 226 | // schema in the event of a mismatch or a projection |
226 | | - child_fields.push(convert_field(parquet_field, &child, arrow_field)); |
| 227 | + child_fields.push(convert_field(parquet_field, &mut child, arrow_field)); |
227 | 228 | children.push(child); |
228 | 229 | } |
229 | 230 | } |
@@ -352,13 +353,13 @@ impl Visitor { |
352 | 353 |
|
353 | 354 | // Need both columns to be projected |
354 | 355 | match (maybe_key, maybe_value) { |
355 | | - (Some(key), Some(value)) => { |
| 356 | + (Some(mut key), Some(mut value)) => { |
356 | 357 | let key_field = Arc::new( |
357 | | - convert_field(map_key, &key, arrow_key) |
| 358 | + convert_field(map_key, &mut key, arrow_key) |
358 | 359 | // The key is always non-nullable (#5630) |
359 | 360 | .with_nullable(false), |
360 | 361 | ); |
361 | | - let value_field = Arc::new(convert_field(map_value, &value, arrow_value)); |
| 362 | + let value_field = Arc::new(convert_field(map_value, &mut value, arrow_value)); |
362 | 363 | let field_metadata = match arrow_map { |
363 | 364 | Some(field) => field.metadata().clone(), |
364 | 365 | _ => HashMap::default(), |
@@ -495,8 +496,8 @@ impl Visitor { |
495 | 496 | }; |
496 | 497 |
|
497 | 498 | match self.dispatch(item_type, new_context) { |
498 | | - Ok(Some(item)) => { |
499 | | - let item_field = Arc::new(convert_field(item_type, &item, arrow_field)); |
| 499 | + Ok(Some(mut item)) => { |
| 500 | + let item_field = Arc::new(convert_field(item_type, &mut item, arrow_field)); |
500 | 501 |
|
501 | 502 | // Use arrow type as hint for index size |
502 | 503 | let arrow_type = match context.data_type { |
@@ -540,11 +541,15 @@ impl Visitor { |
540 | 541 | } |
541 | 542 | } |
542 | 543 |
|
543 | | -/// Computes the [`Field`] for a child column |
| 544 | +/// Computes the Arrow [`Field`] for a child column |
544 | 545 | /// |
545 | | -/// The resulting [`Field`] will have the type dictated by `field`, a name |
| 546 | +/// The resulting Arrow [`Field`] will have the type dictated by the Parquet `field`, a name |
546 | 547 | /// dictated by the `parquet_type`, and any metadata from `arrow_hint` |
547 | | -fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint: Option<&Field>) -> Field { |
| 548 | +fn convert_field( |
| 549 | + parquet_type: &Type, |
| 550 | + field: &mut ParquetField, |
| 551 | + arrow_hint: Option<&Field>, |
| 552 | +) -> Field { |
548 | 553 | let name = parquet_type.name(); |
549 | 554 | let data_type = field.arrow_type.clone(); |
550 | 555 | let nullable = field.nullable; |
@@ -575,7 +580,7 @@ fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint: Option<& |
575 | 580 | ); |
576 | 581 | ret.set_metadata(meta); |
577 | 582 | } |
578 | | - ret |
| 583 | + add_extension_type(ret, parquet_type) |
579 | 584 | } |
580 | 585 | } |
581 | 586 | } |
|
0 commit comments