@@ -113,7 +113,7 @@ void LevelBuilder::_write_column_chunk(const LevelBuilderContext& ctx, const Typ
113
113
break ;
114
114
}
115
115
case TYPE_ARRAY: {
116
- _write_array_column_chunk_branchless (ctx, type_desc, node, col, write_leaf_callback);
116
+ _write_array_column_chunk (ctx, type_desc, node, col, write_leaf_callback);
117
117
break ;
118
118
}
119
119
case TYPE_MAP: {
@@ -399,101 +399,6 @@ void LevelBuilder::_write_array_column_chunk(const LevelBuilderContext& ctx, con
399
399
_write_column_chunk (derived_ctx, type_desc.children [0 ], inner_node, elements, write_leaf_callback);
400
400
}
401
401
402
- void LevelBuilder::_write_array_column_chunk_branchless (const LevelBuilderContext& ctx, const TypeDescriptor& type_desc,
403
- const ::parquet::schema::NodePtr& node, const ColumnPtr& col,
404
- const CallbackFunction& write_leaf_callback) {
405
- DCHECK (type_desc.type == TYPE_ARRAY);
406
- auto outer_node = std::static_pointer_cast<::parquet::schema::GroupNode>(node);
407
- auto mid_node = std::static_pointer_cast<::parquet::schema::GroupNode>(outer_node->field (0 ));
408
- auto inner_node = mid_node->field (0 );
409
-
410
- auto * null_col = get_raw_null_column (col);
411
- auto * array_col = down_cast<ArrayColumn*>(ColumnHelper::get_data_column (col.get ()));
412
- const auto & elements = array_col->elements_column ();
413
- const auto & offsets = array_col->offsets_column ()->get_data ();
414
-
415
- size_t num_levels_upper_bound = ctx._num_levels + elements->size ();
416
- auto def_levels = std::make_shared<std::vector<int16_t >>(num_levels_upper_bound,
417
- ctx._max_def_level + node->is_optional () + 1 );
418
- auto rep_levels = std::make_shared<std::vector<int16_t >>(num_levels_upper_bound, ctx._max_rep_level + 1 );
419
-
420
- size_t num_levels = 0 ; // pointer to def/rep levels
421
- int offset = 0 ; // pointer to current column
422
- int ctx_ptr = 0 ; // pointer to levels in context
423
-
424
- if (null_col != nullptr ) {
425
- while (ctx_ptr < ctx._num_levels && offset < col->size ()) {
426
- auto def_level = ctx._def_levels ? (*ctx._def_levels )[ctx_ptr] : 0 ;
427
- auto rep_level = ctx._rep_levels ? (*ctx._rep_levels )[ctx_ptr] : 0 ;
428
-
429
- uint8_t null_in_parent_column = def_level != ctx._max_def_level ;
430
- uint8_t null_in_current_column = null_col[offset] == 1 ;
431
- uint8_t empty_array = offsets[offset + 1 ] == offsets[offset];
432
- uint8_t is_optional = node->is_optional ();
433
- auto array_size = offsets[offset + 1 ] - offsets[offset];
434
-
435
- // set def_level back for null in parent column
436
- (*def_levels)[num_levels] += null_in_parent_column * (def_level - (*def_levels)[num_levels]);
437
- // decrement 1 for empty array
438
- (*def_levels)[num_levels] -= ((1 - null_in_parent_column) & (1 - null_in_current_column) & empty_array);
439
- // decrement node->is_optional for null in current column
440
- (*def_levels)[num_levels] -= ((1 - null_in_parent_column) & null_in_current_column & is_optional);
441
- (*rep_levels)[num_levels] = rep_level;
442
-
443
- // update pointers
444
- uint8_t advance_one_step = null_in_parent_column | null_in_current_column | empty_array;
445
- num_levels += advance_one_step + (1 - advance_one_step) * array_size;
446
- offset += (1 - null_in_parent_column);
447
- ctx_ptr++;
448
- }
449
- } else {
450
- while (ctx_ptr < ctx._num_levels && offset < col->size ()) {
451
- auto def_level = ctx._def_levels ? (*ctx._def_levels )[ctx_ptr] : 0 ;
452
- auto rep_level = ctx._rep_levels ? (*ctx._rep_levels )[ctx_ptr] : 0 ;
453
-
454
- uint8_t null_in_parent_column = def_level != ctx._max_def_level ;
455
- uint8_t empty_array = offsets[offset + 1 ] == offsets[offset];
456
- auto array_size = offsets[offset + 1 ] - offsets[offset];
457
-
458
- // set def_level back for null in parent column
459
- (*def_levels)[num_levels] =
460
- null_in_parent_column * def_level + (1 - null_in_parent_column) * (*def_levels)[num_levels];
461
- // decrement 1 for empty array
462
- (*def_levels)[num_levels] -= ((1 - null_in_parent_column) & empty_array);
463
- (*rep_levels)[num_levels] = rep_level;
464
-
465
- // update pointers
466
- uint8_t advance_one_step = null_in_parent_column | empty_array;
467
- num_levels += advance_one_step + (1 - advance_one_step) * array_size;
468
- offset += (1 - null_in_parent_column);
469
- ctx_ptr++;
470
- }
471
- }
472
- DCHECK (col->size () == offset);
473
-
474
- // handle remaining undefined entries
475
- if (ctx_ptr < ctx._num_levels ) {
476
- if (ctx._def_levels == nullptr ) {
477
- memset (def_levels->data () + num_levels, 0 , ctx._num_levels - ctx_ptr);
478
- } else {
479
- memcpy (def_levels->data () + num_levels, ctx._def_levels ->data () + ctx_ptr, ctx._num_levels - ctx_ptr);
480
- }
481
-
482
- if (ctx._rep_levels == nullptr ) {
483
- memset (rep_levels->data () + num_levels, 0 , ctx._num_levels - ctx_ptr);
484
- } else {
485
- memcpy (rep_levels->data () + num_levels, ctx._rep_levels ->data () + ctx_ptr, ctx._num_levels - ctx_ptr);
486
- }
487
- }
488
-
489
- def_levels->resize (num_levels);
490
- rep_levels->resize (num_levels);
491
- LevelBuilderContext derived_ctx (def_levels->size (), def_levels, ctx._max_def_level + node->is_optional () + 1 ,
492
- rep_levels, ctx._max_rep_level + 1 );
493
-
494
- _write_column_chunk (derived_ctx, type_desc.children [0 ], inner_node, elements, write_leaf_callback);
495
- }
496
-
497
402
void LevelBuilder::_write_map_column_chunk (const LevelBuilderContext& ctx, const TypeDescriptor& type_desc,
498
403
const ::parquet::schema::NodePtr& node, const ColumnPtr& col,
499
404
const CallbackFunction& write_leaf_callback) {
0 commit comments