Skip to content

Commit 4c5b644

Browse files
Implemented casting for RunEnd Encoding
1 parent a19fc62 commit 4c5b644

File tree

2 files changed

+632
-0
lines changed

2 files changed

+632
-0
lines changed

arrow-cast/src/cast/mod.rs

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,13 @@ mod decimal;
4141
mod dictionary;
4242
mod list;
4343
mod map;
44+
mod run_array;
4445
mod string;
4546
use crate::cast::decimal::*;
4647
use crate::cast::dictionary::*;
4748
use crate::cast::list::*;
4849
use crate::cast::map::*;
50+
use crate::cast::run_array::{cast_to_run_end_encoded, run_end_encoded_cast};
4951
use crate::cast::string::*;
5052

5153
use arrow_buffer::IntervalMonthDayNano;
@@ -137,6 +139,10 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
137139
can_cast_types(from_value_type, to_value_type)
138140
}
139141
(Dictionary(_, value_type), _) => can_cast_types(value_type, to_type),
142+
(RunEndEncoded(_, value_type), _) => can_cast_types(value_type.data_type(), to_type),
143+
(_, RunEndEncoded(_, _value_type)) => true,
144+
145+
140146
(_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type),
141147
(List(list_from) | LargeList(list_from), List(list_to) | LargeList(list_to)) => {
142148
can_cast_types(list_from.data_type(), list_to.data_type())
@@ -739,6 +745,28 @@ pub fn cast_with_options(
739745
| Map(_, _)
740746
| Dictionary(_, _),
741747
) => Ok(new_null_array(to_type, array.len())),
748+
(RunEndEncoded(index_type, _), _) => match index_type.data_type() {
749+
Int16 => run_end_encoded_cast::<Int16Type>(array, to_type, cast_options),
750+
Int32 => run_end_encoded_cast::<Int32Type>(array, to_type, cast_options),
751+
Int64 => run_end_encoded_cast::<Int64Type>(array, to_type, cast_options),
752+
_ => Err(ArrowError::CastError(format!(
753+
"Casting from run end encoded type {from_type:?} to {to_type:?} not supported",
754+
))),
755+
},
756+
(_, RunEndEncoded(index_type, value_type)) => match index_type.data_type() {
757+
Int16 => {
758+
cast_to_run_end_encoded::<Int16Type>(array, value_type.data_type(), cast_options)
759+
}
760+
Int32 => {
761+
cast_to_run_end_encoded::<Int32Type>(array, value_type.data_type(), cast_options)
762+
}
763+
Int64 => {
764+
cast_to_run_end_encoded::<Int64Type>(array, value_type.data_type(), cast_options)
765+
}
766+
_ => Err(ArrowError::CastError(format!(
767+
"Casting from type {from_type:?} to run end encoded type {to_type:?} not supported",
768+
))),
769+
},
742770
(Dictionary(index_type, _), _) => match **index_type {
743771
Int8 => dictionary_cast::<Int8Type>(array, to_type, cast_options),
744772
Int16 => dictionary_cast::<Int16Type>(array, to_type, cast_options),
@@ -10684,4 +10712,251 @@ mod tests {
1068410712
)) as ArrayRef;
1068510713
assert_eq!(*fixed_array, *r);
1068610714
}
10715+
#[cfg(test)]
10716+
mod run_end_encoded_tests {
10717+
use super::*;
10718+
use arrow_schema::{DataType, Field};
10719+
use std::sync::Arc;
10720+
10721+
/// Test casting FROM RunEndEncoded to primitive types
10722+
#[test]
10723+
fn test_run_end_encoded_to_primitive() {
10724+
// Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3]
10725+
let run_ends = Int32Array::from(vec![2, 5, 6]);
10726+
let values = Int32Array::from(vec![1, 2, 3]);
10727+
let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
10728+
let array_ref = Arc::new(run_array) as ArrayRef;
10729+
10730+
// Cast to Int64
10731+
let cast_result = cast(&array_ref, &DataType::Int64).unwrap();
10732+
10733+
// Verify the result is a RunArray with Int64 values
10734+
let result_run_array = cast_result
10735+
.as_any()
10736+
.downcast_ref::<RunArray<Int32Type>>()
10737+
.unwrap();
10738+
10739+
// Check that values were cast to Int64
10740+
assert_eq!(result_run_array.values().data_type(), &DataType::Int64);
10741+
10742+
// Check that run structure is preserved
10743+
assert_eq!(result_run_array.run_ends().values(), &[2, 5, 6]);
10744+
10745+
// Check that values are correct
10746+
let values_array = result_run_array.values().as_primitive::<Int64Type>();
10747+
assert_eq!(values_array.values(), &[1i64, 2i64, 3i64]);
10748+
}
10749+
10750+
/// Test casting FROM RunEndEncoded to string
10751+
#[test]
10752+
fn test_run_end_encoded_to_string() {
10753+
// Create a RunEndEncoded array with Int32 values: [10, 10, 20, 30, 30]
10754+
let run_ends = Int32Array::from(vec![2, 3, 5]);
10755+
let values = Int32Array::from(vec![10, 20, 30]);
10756+
let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
10757+
let array_ref = Arc::new(run_array) as ArrayRef;
10758+
10759+
// Cast to String
10760+
let cast_result = cast(&array_ref, &DataType::Utf8).unwrap();
10761+
10762+
// Verify the result is a RunArray with String values
10763+
let result_run_array = cast_result
10764+
.as_any()
10765+
.downcast_ref::<RunArray<Int32Type>>()
10766+
.unwrap();
10767+
10768+
// Check that values were cast to String
10769+
assert_eq!(result_run_array.values().data_type(), &DataType::Utf8);
10770+
10771+
// Check that run structure is preserved
10772+
assert_eq!(result_run_array.run_ends().values(), &[2, 3, 5]);
10773+
10774+
// Check that values are correct
10775+
let values_array = result_run_array.values().as_string::<i32>();
10776+
assert_eq!(values_array.value(0), "10");
10777+
assert_eq!(values_array.value(1), "20");
10778+
assert_eq!(values_array.value(2), "30");
10779+
}
10780+
10781+
/// Test casting TO RunEndEncoded from primitive types
10782+
#[test]
10783+
fn test_primitive_to_run_end_encoded() {
10784+
// Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3]
10785+
let source_array = Int32Array::from(vec![1, 1, 2, 2, 2, 3]);
10786+
let array_ref = Arc::new(source_array) as ArrayRef;
10787+
10788+
// Cast to RunEndEncoded<Int32, Int32>
10789+
let target_type = DataType::RunEndEncoded(
10790+
Arc::new(Field::new("run_ends", DataType::Int32, false)),
10791+
Arc::new(Field::new("values", DataType::Int32, true)),
10792+
);
10793+
let cast_result = cast(&array_ref, &target_type).unwrap();
10794+
10795+
// Verify the result is a RunArray
10796+
let result_run_array = cast_result
10797+
.as_any()
10798+
.downcast_ref::<RunArray<Int32Type>>()
10799+
.unwrap();
10800+
10801+
// Check run structure: runs should end at positions [2, 5, 6]
10802+
assert_eq!(result_run_array.run_ends().values(), &[2, 5, 6]);
10803+
10804+
// Check values: should be [1, 2, 3]
10805+
let values_array = result_run_array.values().as_primitive::<Int32Type>();
10806+
assert_eq!(values_array.values(), &[1, 2, 3]);
10807+
}
10808+
10809+
/// Test casting TO RunEndEncoded from string
10810+
#[test]
10811+
fn test_string_to_run_end_encoded() {
10812+
// Create a String array with repeated values: ["a", "a", "b", "c", "c"]
10813+
let source_array = StringArray::from(vec!["a", "a", "b", "c", "c"]);
10814+
let array_ref = Arc::new(source_array) as ArrayRef;
10815+
10816+
// Cast to RunEndEncoded<Int32, String>
10817+
let target_type = DataType::RunEndEncoded(
10818+
Arc::new(Field::new("run_ends", DataType::Int32, false)),
10819+
Arc::new(Field::new("values", DataType::Utf8, true)),
10820+
);
10821+
let cast_result = cast(&array_ref, &target_type).unwrap();
10822+
10823+
// Verify the result is a RunArray
10824+
let result_run_array = cast_result
10825+
.as_any()
10826+
.downcast_ref::<RunArray<Int32Type>>()
10827+
.unwrap();
10828+
10829+
// Check run structure: runs should end at positions [2, 3, 5]
10830+
assert_eq!(result_run_array.run_ends().values(), &[2, 3, 5]);
10831+
10832+
// Check values: should be ["a", "b", "c"]
10833+
let values_array = result_run_array.values().as_string::<i32>();
10834+
assert_eq!(values_array.value(0), "a");
10835+
assert_eq!(values_array.value(1), "b");
10836+
assert_eq!(values_array.value(2), "c");
10837+
}
10838+
10839+
/// Test casting with type conversion (Int32 -> RunEndEncoded<Int32, String>)
10840+
#[test]
10841+
fn test_cast_with_type_conversion() {
10842+
// Create an Int32 array: [1, 1, 2, 2, 3]
10843+
let source_array = Int32Array::from(vec![1, 1, 2, 2, 3]);
10844+
let array_ref = Arc::new(source_array) as ArrayRef;
10845+
10846+
// Cast to RunEndEncoded<Int32, String> (values get converted to strings)
10847+
let target_type = DataType::RunEndEncoded(
10848+
Arc::new(Field::new("run_ends", DataType::Int32, false)),
10849+
Arc::new(Field::new("values", DataType::Utf8, true)),
10850+
);
10851+
let cast_result = cast(&array_ref, &target_type).unwrap();
10852+
10853+
// Verify the result is a RunArray with String values
10854+
let result_run_array = cast_result
10855+
.as_any()
10856+
.downcast_ref::<RunArray<Int32Type>>()
10857+
.unwrap();
10858+
10859+
// Check that values were converted to strings
10860+
assert_eq!(result_run_array.values().data_type(), &DataType::Utf8);
10861+
10862+
// Check run structure: runs should end at positions [2, 4, 5]
10863+
assert_eq!(result_run_array.run_ends().values(), &[2, 4, 5]);
10864+
10865+
// Check values: should be ["1", "2", "3"]
10866+
let values_array = result_run_array.values().as_string::<i32>();
10867+
assert_eq!(values_array.value(0), "1");
10868+
assert_eq!(values_array.value(1), "2");
10869+
assert_eq!(values_array.value(2), "3");
10870+
}
10871+
10872+
/// Test casting empty array to RunEndEncoded
10873+
#[test]
10874+
fn test_empty_array_to_run_end_encoded() {
10875+
// Create an empty Int32 array
10876+
let source_array = Int32Array::from(Vec::<i32>::new());
10877+
let array_ref = Arc::new(source_array) as ArrayRef;
10878+
10879+
// Cast to RunEndEncoded<Int32, Int32>
10880+
let target_type = DataType::RunEndEncoded(
10881+
Arc::new(Field::new("run_ends", DataType::Int32, false)),
10882+
Arc::new(Field::new("values", DataType::Int32, true)),
10883+
);
10884+
let cast_result = cast(&array_ref, &target_type).unwrap();
10885+
10886+
// Verify the result is an empty RunArray
10887+
let result_run_array = cast_result
10888+
.as_any()
10889+
.downcast_ref::<RunArray<Int32Type>>()
10890+
.unwrap();
10891+
10892+
// Check that both run_ends and values are empty
10893+
assert_eq!(result_run_array.run_ends().len(), 0);
10894+
assert_eq!(result_run_array.values().len(), 0);
10895+
}
10896+
10897+
/// Test casting RunEndEncoded with nulls
10898+
#[test]
10899+
fn test_run_end_encoded_with_nulls() {
10900+
// Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2]
10901+
let run_ends = Int32Array::from(vec![2, 3, 5]);
10902+
let values = Int32Array::from(vec![Some(1), None, Some(2)]);
10903+
let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
10904+
let array_ref = Arc::new(run_array) as ArrayRef;
10905+
10906+
// Cast to String
10907+
let cast_result = cast(&array_ref, &DataType::Utf8).unwrap();
10908+
10909+
// Verify the result preserves nulls
10910+
let result_run_array = cast_result
10911+
.as_any()
10912+
.downcast_ref::<RunArray<Int32Type>>()
10913+
.unwrap();
10914+
10915+
let values_array = result_run_array.values().as_string::<i32>();
10916+
assert_eq!(values_array.value(0), "1");
10917+
assert!(values_array.is_null(1));
10918+
assert_eq!(values_array.value(2), "2");
10919+
}
10920+
10921+
/// Test different index types (Int16, Int64)
10922+
#[test]
10923+
fn test_different_index_types() {
10924+
// Test with Int16 index type
10925+
let source_array = Int32Array::from(vec![1, 1, 2, 3, 3]);
10926+
let array_ref = Arc::new(source_array) as ArrayRef;
10927+
10928+
let target_type = DataType::RunEndEncoded(
10929+
Arc::new(Field::new("run_ends", DataType::Int16, false)),
10930+
Arc::new(Field::new("values", DataType::Int32, true)),
10931+
);
10932+
let cast_result = cast(&array_ref, &target_type).unwrap();
10933+
assert_eq!(cast_result.data_type(), &target_type);
10934+
10935+
// Test with Int64 index type
10936+
let target_type = DataType::RunEndEncoded(
10937+
Arc::new(Field::new("run_ends", DataType::Int64, false)),
10938+
Arc::new(Field::new("values", DataType::Int32, true)),
10939+
);
10940+
let cast_result = cast(&array_ref, &target_type).unwrap();
10941+
assert_eq!(cast_result.data_type(), &target_type);
10942+
}
10943+
#[test]
10944+
fn test_unsupported_cast_to_run_end_encoded() {
10945+
// Create a Struct array - complex nested type that might not be supported
10946+
let field = Field::new("item", DataType::Int32, false);
10947+
let struct_array = StructArray::from(vec![(
10948+
Arc::new(field),
10949+
Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
10950+
)]);
10951+
let array_ref = Arc::new(struct_array) as ArrayRef;
10952+
10953+
// This should fail because:
10954+
// 1. The target type is not RunEndEncoded
10955+
// 2. The target type is not supported for casting from StructArray
10956+
let cast_result = cast(&array_ref, &DataType::FixedSizeBinary(10));
10957+
10958+
// Expect this to fail
10959+
assert!(cast_result.is_err());
10960+
}
10961+
}
1068710962
}

0 commit comments

Comments
 (0)