@@ -41,11 +41,13 @@ mod decimal;
4141mod dictionary;
4242mod list;
4343mod map;
44+ mod run_array;
4445mod string;
4546use crate :: cast:: decimal:: * ;
4647use crate :: cast:: dictionary:: * ;
4748use crate :: cast:: list:: * ;
4849use crate :: cast:: map:: * ;
50+ use crate :: cast:: run_array:: { cast_to_run_end_encoded, run_end_encoded_cast} ;
4951use crate :: cast:: string:: * ;
5052
5153use arrow_buffer:: IntervalMonthDayNano ;
@@ -137,6 +139,10 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
137139 can_cast_types ( from_value_type, to_value_type)
138140 }
139141 ( Dictionary ( _, value_type) , _) => can_cast_types ( value_type, to_type) ,
142+ ( RunEndEncoded ( _, value_type) , _) => can_cast_types ( value_type. data_type ( ) , to_type) ,
143+ ( _, RunEndEncoded ( _, _value_type) ) => true ,
144+
145+
140146 ( _, Dictionary ( _, value_type) ) => can_cast_types ( from_type, value_type) ,
141147 ( List ( list_from) | LargeList ( list_from) , List ( list_to) | LargeList ( list_to) ) => {
142148 can_cast_types ( list_from. data_type ( ) , list_to. data_type ( ) )
@@ -739,6 +745,28 @@ pub fn cast_with_options(
739745 | Map ( _, _)
740746 | Dictionary ( _, _) ,
741747 ) => Ok ( new_null_array ( to_type, array. len ( ) ) ) ,
748+ ( RunEndEncoded ( index_type, _) , _) => match index_type. data_type ( ) {
749+ Int16 => run_end_encoded_cast :: < Int16Type > ( array, to_type, cast_options) ,
750+ Int32 => run_end_encoded_cast :: < Int32Type > ( array, to_type, cast_options) ,
751+ Int64 => run_end_encoded_cast :: < Int64Type > ( array, to_type, cast_options) ,
752+ _ => Err ( ArrowError :: CastError ( format ! (
753+ "Casting from run end encoded type {from_type:?} to {to_type:?} not supported" ,
754+ ) ) ) ,
755+ } ,
756+ ( _, RunEndEncoded ( index_type, value_type) ) => match index_type. data_type ( ) {
757+ Int16 => {
758+ cast_to_run_end_encoded :: < Int16Type > ( array, value_type. data_type ( ) , cast_options)
759+ }
760+ Int32 => {
761+ cast_to_run_end_encoded :: < Int32Type > ( array, value_type. data_type ( ) , cast_options)
762+ }
763+ Int64 => {
764+ cast_to_run_end_encoded :: < Int64Type > ( array, value_type. data_type ( ) , cast_options)
765+ }
766+ _ => Err ( ArrowError :: CastError ( format ! (
767+ "Casting from type {from_type:?} to run end encoded type {to_type:?} not supported" ,
768+ ) ) ) ,
769+ } ,
742770 ( Dictionary ( index_type, _) , _) => match * * index_type {
743771 Int8 => dictionary_cast :: < Int8Type > ( array, to_type, cast_options) ,
744772 Int16 => dictionary_cast :: < Int16Type > ( array, to_type, cast_options) ,
@@ -10684,4 +10712,251 @@ mod tests {
1068410712 ) ) as ArrayRef ;
1068510713 assert_eq ! ( * fixed_array, * r) ;
1068610714 }
10715+ #[ cfg( test) ]
10716+ mod run_end_encoded_tests {
10717+ use super :: * ;
10718+ use arrow_schema:: { DataType , Field } ;
10719+ use std:: sync:: Arc ;
10720+
10721+ /// Test casting FROM RunEndEncoded to primitive types
10722+ #[ test]
10723+ fn test_run_end_encoded_to_primitive ( ) {
10724+ // Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3]
10725+ let run_ends = Int32Array :: from ( vec ! [ 2 , 5 , 6 ] ) ;
10726+ let values = Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ;
10727+ let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10728+ let array_ref = Arc :: new ( run_array) as ArrayRef ;
10729+
10730+ // Cast to Int64
10731+ let cast_result = cast ( & array_ref, & DataType :: Int64 ) . unwrap ( ) ;
10732+
10733+ // Verify the result is a RunArray with Int64 values
10734+ let result_run_array = cast_result
10735+ . as_any ( )
10736+ . downcast_ref :: < RunArray < Int32Type > > ( )
10737+ . unwrap ( ) ;
10738+
10739+ // Check that values were cast to Int64
10740+ assert_eq ! ( result_run_array. values( ) . data_type( ) , & DataType :: Int64 ) ;
10741+
10742+ // Check that run structure is preserved
10743+ assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 5 , 6 ] ) ;
10744+
10745+ // Check that values are correct
10746+ let values_array = result_run_array. values ( ) . as_primitive :: < Int64Type > ( ) ;
10747+ assert_eq ! ( values_array. values( ) , & [ 1i64 , 2i64 , 3i64 ] ) ;
10748+ }
10749+
10750+ /// Test casting FROM RunEndEncoded to string
10751+ #[ test]
10752+ fn test_run_end_encoded_to_string ( ) {
10753+ // Create a RunEndEncoded array with Int32 values: [10, 10, 20, 30, 30]
10754+ let run_ends = Int32Array :: from ( vec ! [ 2 , 3 , 5 ] ) ;
10755+ let values = Int32Array :: from ( vec ! [ 10 , 20 , 30 ] ) ;
10756+ let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10757+ let array_ref = Arc :: new ( run_array) as ArrayRef ;
10758+
10759+ // Cast to String
10760+ let cast_result = cast ( & array_ref, & DataType :: Utf8 ) . unwrap ( ) ;
10761+
10762+ // Verify the result is a RunArray with String values
10763+ let result_run_array = cast_result
10764+ . as_any ( )
10765+ . downcast_ref :: < RunArray < Int32Type > > ( )
10766+ . unwrap ( ) ;
10767+
10768+ // Check that values were cast to String
10769+ assert_eq ! ( result_run_array. values( ) . data_type( ) , & DataType :: Utf8 ) ;
10770+
10771+ // Check that run structure is preserved
10772+ assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 3 , 5 ] ) ;
10773+
10774+ // Check that values are correct
10775+ let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
10776+ assert_eq ! ( values_array. value( 0 ) , "10" ) ;
10777+ assert_eq ! ( values_array. value( 1 ) , "20" ) ;
10778+ assert_eq ! ( values_array. value( 2 ) , "30" ) ;
10779+ }
10780+
10781+ /// Test casting TO RunEndEncoded from primitive types
10782+ #[ test]
10783+ fn test_primitive_to_run_end_encoded ( ) {
10784+ // Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3]
10785+ let source_array = Int32Array :: from ( vec ! [ 1 , 1 , 2 , 2 , 2 , 3 ] ) ;
10786+ let array_ref = Arc :: new ( source_array) as ArrayRef ;
10787+
10788+ // Cast to RunEndEncoded<Int32, Int32>
10789+ let target_type = DataType :: RunEndEncoded (
10790+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int32 , false ) ) ,
10791+ Arc :: new ( Field :: new ( "values" , DataType :: Int32 , true ) ) ,
10792+ ) ;
10793+ let cast_result = cast ( & array_ref, & target_type) . unwrap ( ) ;
10794+
10795+ // Verify the result is a RunArray
10796+ let result_run_array = cast_result
10797+ . as_any ( )
10798+ . downcast_ref :: < RunArray < Int32Type > > ( )
10799+ . unwrap ( ) ;
10800+
10801+ // Check run structure: runs should end at positions [2, 5, 6]
10802+ assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 5 , 6 ] ) ;
10803+
10804+ // Check values: should be [1, 2, 3]
10805+ let values_array = result_run_array. values ( ) . as_primitive :: < Int32Type > ( ) ;
10806+ assert_eq ! ( values_array. values( ) , & [ 1 , 2 , 3 ] ) ;
10807+ }
10808+
10809+ /// Test casting TO RunEndEncoded from string
10810+ #[ test]
10811+ fn test_string_to_run_end_encoded ( ) {
10812+ // Create a String array with repeated values: ["a", "a", "b", "c", "c"]
10813+ let source_array = StringArray :: from ( vec ! [ "a" , "a" , "b" , "c" , "c" ] ) ;
10814+ let array_ref = Arc :: new ( source_array) as ArrayRef ;
10815+
10816+ // Cast to RunEndEncoded<Int32, String>
10817+ let target_type = DataType :: RunEndEncoded (
10818+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int32 , false ) ) ,
10819+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
10820+ ) ;
10821+ let cast_result = cast ( & array_ref, & target_type) . unwrap ( ) ;
10822+
10823+ // Verify the result is a RunArray
10824+ let result_run_array = cast_result
10825+ . as_any ( )
10826+ . downcast_ref :: < RunArray < Int32Type > > ( )
10827+ . unwrap ( ) ;
10828+
10829+ // Check run structure: runs should end at positions [2, 3, 5]
10830+ assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 3 , 5 ] ) ;
10831+
10832+ // Check values: should be ["a", "b", "c"]
10833+ let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
10834+ assert_eq ! ( values_array. value( 0 ) , "a" ) ;
10835+ assert_eq ! ( values_array. value( 1 ) , "b" ) ;
10836+ assert_eq ! ( values_array. value( 2 ) , "c" ) ;
10837+ }
10838+
10839+ /// Test casting with type conversion (Int32 -> RunEndEncoded<Int32, String>)
10840+ #[ test]
10841+ fn test_cast_with_type_conversion ( ) {
10842+ // Create an Int32 array: [1, 1, 2, 2, 3]
10843+ let source_array = Int32Array :: from ( vec ! [ 1 , 1 , 2 , 2 , 3 ] ) ;
10844+ let array_ref = Arc :: new ( source_array) as ArrayRef ;
10845+
10846+ // Cast to RunEndEncoded<Int32, String> (values get converted to strings)
10847+ let target_type = DataType :: RunEndEncoded (
10848+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int32 , false ) ) ,
10849+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
10850+ ) ;
10851+ let cast_result = cast ( & array_ref, & target_type) . unwrap ( ) ;
10852+
10853+ // Verify the result is a RunArray with String values
10854+ let result_run_array = cast_result
10855+ . as_any ( )
10856+ . downcast_ref :: < RunArray < Int32Type > > ( )
10857+ . unwrap ( ) ;
10858+
10859+ // Check that values were converted to strings
10860+ assert_eq ! ( result_run_array. values( ) . data_type( ) , & DataType :: Utf8 ) ;
10861+
10862+ // Check run structure: runs should end at positions [2, 4, 5]
10863+ assert_eq ! ( result_run_array. run_ends( ) . values( ) , & [ 2 , 4 , 5 ] ) ;
10864+
10865+ // Check values: should be ["1", "2", "3"]
10866+ let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
10867+ assert_eq ! ( values_array. value( 0 ) , "1" ) ;
10868+ assert_eq ! ( values_array. value( 1 ) , "2" ) ;
10869+ assert_eq ! ( values_array. value( 2 ) , "3" ) ;
10870+ }
10871+
10872+ /// Test casting empty array to RunEndEncoded
10873+ #[ test]
10874+ fn test_empty_array_to_run_end_encoded ( ) {
10875+ // Create an empty Int32 array
10876+ let source_array = Int32Array :: from ( Vec :: < i32 > :: new ( ) ) ;
10877+ let array_ref = Arc :: new ( source_array) as ArrayRef ;
10878+
10879+ // Cast to RunEndEncoded<Int32, Int32>
10880+ let target_type = DataType :: RunEndEncoded (
10881+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int32 , false ) ) ,
10882+ Arc :: new ( Field :: new ( "values" , DataType :: Int32 , true ) ) ,
10883+ ) ;
10884+ let cast_result = cast ( & array_ref, & target_type) . unwrap ( ) ;
10885+
10886+ // Verify the result is an empty RunArray
10887+ let result_run_array = cast_result
10888+ . as_any ( )
10889+ . downcast_ref :: < RunArray < Int32Type > > ( )
10890+ . unwrap ( ) ;
10891+
10892+ // Check that both run_ends and values are empty
10893+ assert_eq ! ( result_run_array. run_ends( ) . len( ) , 0 ) ;
10894+ assert_eq ! ( result_run_array. values( ) . len( ) , 0 ) ;
10895+ }
10896+
10897+ /// Test casting RunEndEncoded with nulls
10898+ #[ test]
10899+ fn test_run_end_encoded_with_nulls ( ) {
10900+ // Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2]
10901+ let run_ends = Int32Array :: from ( vec ! [ 2 , 3 , 5 ] ) ;
10902+ let values = Int32Array :: from ( vec ! [ Some ( 1 ) , None , Some ( 2 ) ] ) ;
10903+ let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10904+ let array_ref = Arc :: new ( run_array) as ArrayRef ;
10905+
10906+ // Cast to String
10907+ let cast_result = cast ( & array_ref, & DataType :: Utf8 ) . unwrap ( ) ;
10908+
10909+ // Verify the result preserves nulls
10910+ let result_run_array = cast_result
10911+ . as_any ( )
10912+ . downcast_ref :: < RunArray < Int32Type > > ( )
10913+ . unwrap ( ) ;
10914+
10915+ let values_array = result_run_array. values ( ) . as_string :: < i32 > ( ) ;
10916+ assert_eq ! ( values_array. value( 0 ) , "1" ) ;
10917+ assert ! ( values_array. is_null( 1 ) ) ;
10918+ assert_eq ! ( values_array. value( 2 ) , "2" ) ;
10919+ }
10920+
10921+ /// Test different index types (Int16, Int64)
10922+ #[ test]
10923+ fn test_different_index_types ( ) {
10924+ // Test with Int16 index type
10925+ let source_array = Int32Array :: from ( vec ! [ 1 , 1 , 2 , 3 , 3 ] ) ;
10926+ let array_ref = Arc :: new ( source_array) as ArrayRef ;
10927+
10928+ let target_type = DataType :: RunEndEncoded (
10929+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int16 , false ) ) ,
10930+ Arc :: new ( Field :: new ( "values" , DataType :: Int32 , true ) ) ,
10931+ ) ;
10932+ let cast_result = cast ( & array_ref, & target_type) . unwrap ( ) ;
10933+ assert_eq ! ( cast_result. data_type( ) , & target_type) ;
10934+
10935+ // Test with Int64 index type
10936+ let target_type = DataType :: RunEndEncoded (
10937+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int64 , false ) ) ,
10938+ Arc :: new ( Field :: new ( "values" , DataType :: Int32 , true ) ) ,
10939+ ) ;
10940+ let cast_result = cast ( & array_ref, & target_type) . unwrap ( ) ;
10941+ assert_eq ! ( cast_result. data_type( ) , & target_type) ;
10942+ }
10943+ #[ test]
10944+ fn test_unsupported_cast_to_run_end_encoded ( ) {
10945+ // Create a Struct array - complex nested type that might not be supported
10946+ let field = Field :: new ( "item" , DataType :: Int32 , false ) ;
10947+ let struct_array = StructArray :: from ( vec ! [ (
10948+ Arc :: new( field) ,
10949+ Arc :: new( Int32Array :: from( vec![ 1 , 2 , 3 ] ) ) as ArrayRef ,
10950+ ) ] ) ;
10951+ let array_ref = Arc :: new ( struct_array) as ArrayRef ;
10952+
10953+ // This should fail because:
10954+ // 1. The target type is not RunEndEncoded
10955+ // 2. The target type is not supported for casting from StructArray
10956+ let cast_result = cast ( & array_ref, & DataType :: FixedSizeBinary ( 10 ) ) ;
10957+
10958+ // Expect this to fail
10959+ assert ! ( cast_result. is_err( ) ) ;
10960+ }
10961+ }
1068710962}
0 commit comments