Open
Description
Describe the enhancement requested
The current implementation of the asof join operation does not support the Fixed Size List. When attempting to use this data type in an asof join, the following error occurs:
Invalid: Unsupported data type fixed_size_list<item: int32>[3] for field List
Example of the code causing errors
const int32_t list_size = 3;
const int num_rows = 5;
auto list_type = arrow::fixed_size_list(arrow::int32(), list_size);
auto inner_builder = std::make_shared<arrow::Int32Builder>(arrow::default_memory_pool());
arrow::Int16Builder idx_builder;
arrow::FixedSizeListBuilder builder(arrow::default_memory_pool(), inner_builder, list_type);
ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));
for (int i = 0; i < num_rows; ++i) {
ARROW_RETURN_NOT_OK(inner_builder->AppendValues({i * 10, i * 10 + 1, i * 10 + 2}));
ARROW_RETURN_NOT_OK(builder.Append());
ARROW_RETURN_NOT_OK(idx_builder.Append(i));
}
ARROW_ASSIGN_OR_RAISE(auto idx_list, idx_builder.Finish());
ARROW_ASSIGN_OR_RAISE(auto result1, builder.Finish());
for (int j = 0; j < num_rows; ++j) {
ARROW_RETURN_NOT_OK(inner_builder->AppendValues({j * 20, j * 20 + 4, j * 20 + 5}));
ARROW_RETURN_NOT_OK(builder.Append());
ARROW_RETURN_NOT_OK(idx_builder.Append(j*2));
}
ARROW_ASSIGN_OR_RAISE(auto idx_list2, idx_builder.Finish());
ARROW_ASSIGN_OR_RAISE(auto result2, builder.Finish());
//first table
std::shared_ptr<arrow::Schema> schema1;
schema1=arrow::schema({arrow::field("List", list_type), arrow::field("idx", arrow::int16())});
std::shared_ptr<arrow::Table> l_table;
l_table = arrow::Table::Make(schema1, {result1, idx_list}, num_rows);
//second table
std::shared_ptr<arrow::Schema> schema2;
schema2=arrow::schema({arrow::field("List", list_type), arrow::field("idx", arrow::int16())});
std::shared_ptr<arrow::Table> r_table;
r_table = arrow::Table::Make(schema2, {result2, idx_list2}, num_rows);
auto table_source_options_l = ac::TableSourceNodeOptions{l_table};
ac::Declaration left{"table_source", std::move(table_source_options_l), "lTable"};
auto table_source_options_r = ac::TableSourceNodeOptions{r_table};
ac::Declaration right{"table_source", std::move(table_source_options_r),"rTable"};
arrow::acero::AsofJoinNodeOptions::Keys left_keys;
left_keys.on_key = arrow::FieldRef("idx");
arrow::acero::AsofJoinNodeOptions::Keys right_keys;
right_keys.on_key = arrow::FieldRef("idx");
ac::AsofJoinNodeOptions asof_opt{{left_keys, right_keys}, 3};
arrow::acero::Declaration asof{"asofjoin", {std::move(left), std::move(right)}, std::move(asof_opt),};
ARROW_ASSIGN_OR_RAISE(auto output_table, ac::DeclarationToTable(std::move(asof)));
Component(s)
C++