-
Notifications
You must be signed in to change notification settings - Fork 1k
Open
Labels
Description
Describe the bug
existing function such as interleave
or concat will panic working with nested data type containing dictionary
To Reproduce
#[test]
fn test_nested_dictionary_lists() {
let fields = Fields::from(vec![Field::new(
"dict_col",
DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt8)),
false,
)]);
let struct_arr = {
let input_1_keys = UInt8Array::from_iter_values(0..=255);
let input_1_values = UInt8Array::from_iter_values(0..=255);
let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
StructArray::try_new(fields.clone(), vec![Arc::new(input_1)], None).unwrap()
};
let offset_buffer = OffsetBuffer::<i32>::from_lengths(repeat(1).take(256));
let struct_fields = struct_arr.fields();
let struct_list_arr = GenericListArray::new(
Arc::new(Field::new_struct("element", struct_fields.clone(), false)),
offset_buffer,
Arc::new(struct_arr) as ArrayRef,
None,
);
let arr1 = Arc::new(struct_list_arr) as ArrayRef;
let arr2 = arr1.clone();
interleave(&[&arr1, &arr2], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
concat::concat(&[&arr1,&arr2]).unwrap();
}
Expected behavior
The functions returns correctly, and if possible avoid using MutableArrayData::new
and reuse any shared dictionary (either via ptr_eq function, or logically equivalent dictionary if there is possible way to do it)
Additional context
This was reported in a Datafusion's issue: apache/datafusion#17445