@@ -47,7 +47,9 @@ use crate::cast::decimal::*;
4747use crate :: cast:: dictionary:: * ;
4848use crate :: cast:: list:: * ;
4949use crate :: cast:: map:: * ;
50- use crate :: cast:: run_array:: { cast_to_run_end_encoded, run_end_encoded_cast} ;
50+ use crate :: cast:: run_array:: {
51+ can_cast_run_end_encoded, cast_to_run_end_encoded, run_end_encoded_cast,
52+ } ;
5153use crate :: cast:: string:: * ;
5254
5355use arrow_buffer:: IntervalMonthDayNano ;
@@ -140,9 +142,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
140142 }
141143 ( Dictionary ( _, value_type) , _) => can_cast_types ( value_type, to_type) ,
142144 ( RunEndEncoded ( _, value_type) , _) => can_cast_types ( value_type. data_type ( ) , to_type) ,
143- ( _, RunEndEncoded ( _, _value_type) ) => true ,
144-
145-
145+ ( _, RunEndEncoded ( _, _value_type) ) => can_cast_run_end_encoded ( from_type, to_type) ,
146146 ( _, Dictionary ( _, value_type) ) => can_cast_types ( from_type, value_type) ,
147147 ( List ( list_from) | LargeList ( list_from) , List ( list_to) | LargeList ( list_to) ) => {
148148 can_cast_types ( list_from. data_type ( ) , list_to. data_type ( ) )
@@ -745,14 +745,18 @@ pub fn cast_with_options(
745745 | Map ( _, _)
746746 | Dictionary ( _, _) ,
747747 ) => Ok ( new_null_array ( to_type, array. len ( ) ) ) ,
748- ( RunEndEncoded ( index_type, _) , _) => match index_type. data_type ( ) {
749- Int16 => run_end_encoded_cast :: < Int16Type > ( array, to_type, cast_options) ,
750- Int32 => run_end_encoded_cast :: < Int32Type > ( array, to_type, cast_options) ,
751- Int64 => run_end_encoded_cast :: < Int64Type > ( array, to_type, cast_options) ,
752- _ => Err ( ArrowError :: CastError ( format ! (
753- "Casting from run end encoded type {from_type:?} to {to_type:?} not supported" ,
754- ) ) ) ,
755- } ,
748+ ( RunEndEncoded ( index_type, _) , _) => {
749+ let mut new_cast_options = cast_options. clone ( ) ;
750+ new_cast_options. safe = false ;
751+ match index_type. data_type ( ) {
752+ Int16 => run_end_encoded_cast :: < Int16Type > ( array, to_type, & new_cast_options) ,
753+ Int32 => run_end_encoded_cast :: < Int32Type > ( array, to_type, & new_cast_options) ,
754+ Int64 => run_end_encoded_cast :: < Int64Type > ( array, to_type, & new_cast_options) ,
755+ _ => Err ( ArrowError :: CastError ( format ! (
756+ "Casting from run end encoded type {from_type:?} to {to_type:?} not supported" ,
757+ ) ) ) ,
758+ }
759+ }
756760 ( _, RunEndEncoded ( index_type, value_type) ) => match index_type. data_type ( ) {
757761 Int16 => {
758762 cast_to_run_end_encoded :: < Int16Type > ( array, value_type. data_type ( ) , cast_options)
@@ -10726,16 +10730,14 @@ mod tests {
1072610730 let values = Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ;
1072710731 let run_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
1072810732 let array_ref = Arc :: new ( run_array) as ArrayRef ;
10729- println ! ( "1" ) ;
1073010733 // Cast to Int64
1073110734 let cast_result = cast ( & array_ref, & DataType :: Int64 ) . unwrap ( ) ;
10732- println ! ( "2" ) ;
1073310735 // Verify the result is a RunArray with Int64 values
10734- let result_run_array = cast_result
10735- . as_any ( )
10736- . downcast_ref :: < Int64Array > ( )
10737- . unwrap ( ) ;
10738- assert_eq ! ( result_run_array . values ( ) , & [ 1i64 , 1i64 , 2i64 , 2i64 , 2i64 , 3i64 ] ) ;
10736+ let result_run_array = cast_result. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
10737+ assert_eq ! (
10738+ result_run_array . values ( ) ,
10739+ & [ 1i64 , 1i64 , 2i64 , 2i64 , 2i64 , 3i64 ]
10740+ ) ;
1073910741 }
1074010742
1074110743 /// Test casting FROM RunEndEncoded to string
@@ -10751,10 +10753,7 @@ mod tests {
1075110753 let cast_result = cast ( & array_ref, & DataType :: Utf8 ) . unwrap ( ) ;
1075210754
1075310755 // Verify the result is a RunArray with String values
10754- let result_array = cast_result
10755- . as_any ( )
10756- . downcast_ref :: < StringArray > ( )
10757- . unwrap ( ) ;
10756+ let result_array = cast_result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
1075810757 // Check that values are correct
1075910758 assert_eq ! ( result_array. value( 0 ) , "10" ) ;
1076010759 assert_eq ! ( result_array. value( 1 ) , "10" ) ;
@@ -10890,10 +10889,7 @@ mod tests {
1089010889 let cast_result = cast ( & array_ref, & DataType :: Utf8 ) . unwrap ( ) ;
1089110890
1089210891 // Verify the result preserves nulls
10893- let result_run_array = cast_result
10894- . as_any ( )
10895- . downcast_ref :: < StringArray > ( )
10896- . unwrap ( ) ;
10892+ let result_run_array = cast_result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
1089710893 assert_eq ! ( result_run_array. value( 0 ) , "1" ) ;
1089810894 assert ! ( result_run_array. is_null( 2 ) ) ;
1089910895 assert_eq ! ( result_run_array. value( 4 ) , "2" ) ;
@@ -10939,5 +10935,131 @@ mod tests {
1093910935 // Expect this to fail
1094010936 assert ! ( cast_result. is_err( ) ) ;
1094110937 }
10938+ #[ test]
10939+ fn test_cast_run_end_encoded_int64_to_int16_should_fail ( ) {
10940+ use arrow_array:: { Int64Array , RunArray , StringArray } ;
10941+ use arrow_schema:: { DataType , Field } ;
10942+ use std:: sync:: Arc ;
10943+
10944+ // Construct a valid REE array with Int64 run-ends
10945+ let run_ends = Int64Array :: from ( vec ! [ 100_000 , 400_000 , 700_000 ] ) ; // values too large for Int16
10946+ let values = StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ;
10947+
10948+ let ree_array = RunArray :: < Int64Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10949+ let array_ref = Arc :: new ( ree_array) as ArrayRef ;
10950+
10951+ // Attempt to cast to RunEndEncoded<Int16, Utf8>
10952+ let target_type = DataType :: RunEndEncoded (
10953+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int16 , false ) ) ,
10954+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
10955+ ) ;
10956+ let cast_options = CastOptions {
10957+ safe : false , // This should make it fail instead of returning nulls
10958+ format_options : FormatOptions :: default ( ) ,
10959+ } ;
10960+
10961+ // This should fail due to run-end overflow
10962+ let result: Result < Arc < dyn Array + ' static > , ArrowError > =
10963+ cast_with_options ( & array_ref, & target_type, & cast_options) ;
10964+
10965+ match result {
10966+ Err ( e) => {
10967+ assert ! ( e
10968+ . to_string( )
10969+ . contains( "Cast error: Can't cast value 100000 to type Int16" ) ) ;
10970+ }
10971+ Ok ( _array_ref) => {
10972+ panic ! ( "This should not happen" ) ;
10973+ }
10974+ }
10975+ }
10976+ #[ test]
10977+ fn test_cast_run_end_encoded_int16_to_int64_should_succeed ( ) {
10978+ use arrow_array:: { Int16Array , RunArray , StringArray } ;
10979+ use arrow_schema:: { DataType , Field } ;
10980+ use std:: sync:: Arc ;
10981+
10982+ // Construct a valid REE array with Int16 run-ends
10983+ let run_ends = Int16Array :: from ( vec ! [ 2 , 5 , 8 ] ) ; // values that fit in Int16
10984+ let values = StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ;
10985+
10986+ let ree_array = RunArray :: < Int16Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
10987+ let array_ref = Arc :: new ( ree_array) as ArrayRef ;
10988+
10989+ // Attempt to cast to RunEndEncoded<Int64, Utf8> (upcast should succeed)
10990+ let target_type = DataType :: RunEndEncoded (
10991+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int64 , false ) ) ,
10992+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
10993+ ) ;
10994+ let cast_options = CastOptions {
10995+ safe : false ,
10996+ format_options : FormatOptions :: default ( ) ,
10997+ } ;
10998+
10999+ // This should succeed due to valid upcast
11000+ let result: Result < Arc < dyn Array + ' static > , ArrowError > =
11001+ cast_with_options ( & array_ref, & target_type, & cast_options) ;
11002+
11003+ match result {
11004+ Ok ( array_ref) => {
11005+ // Downcast to RunArray<Int64Type>
11006+ let run_array = array_ref
11007+ . as_any ( )
11008+ . downcast_ref :: < RunArray < Int64Type > > ( )
11009+ . unwrap ( ) ;
11010+
11011+ // Verify the cast worked correctly
11012+ // Assert the values were cast correctly
11013+ assert_eq ! ( run_array. run_ends( ) . values( ) , & [ 2i64 , 5i64 , 8i64 ] ) ;
11014+ assert_eq ! ( run_array. values( ) . as_string:: <i32 >( ) . value( 0 ) , "a" ) ;
11015+ assert_eq ! ( run_array. values( ) . as_string:: <i32 >( ) . value( 1 ) , "b" ) ;
11016+ assert_eq ! ( run_array. values( ) . as_string:: <i32 >( ) . value( 2 ) , "c" ) ;
11017+ }
11018+ Err ( e) => {
11019+ panic ! ( "Cast should have succeeded but failed: {}" , e) ;
11020+ }
11021+ }
11022+ }
11023+
11024+ #[ test]
11025+ fn test_cast_run_end_encoded_int32_to_int16_should_fail ( ) {
11026+ use arrow_array:: { Int32Array , RunArray , StringArray } ;
11027+ use arrow_schema:: { DataType , Field } ;
11028+ use std:: sync:: Arc ;
11029+
11030+ // Construct a valid REE array with Int32 run-ends
11031+ let run_ends = Int32Array :: from ( vec ! [ 1000 , 50000 , 80000 ] ) ; // values too large for Int16
11032+ let values = StringArray :: from ( vec ! [ "x" , "y" , "z" ] ) ;
11033+
11034+ println ! ( "Original run_ends null count: {}" , run_ends. null_count( ) ) ;
11035+ println ! ( "Original run_ends values: {:?}" , run_ends. values( ) ) ;
11036+
11037+ let ree_array = RunArray :: < Int32Type > :: try_new ( & run_ends, & values) . unwrap ( ) ;
11038+ let array_ref = Arc :: new ( ree_array) as ArrayRef ;
11039+
11040+ // Attempt to cast to RunEndEncoded<Int16, Utf8> (downcast should fail)
11041+ let target_type = DataType :: RunEndEncoded (
11042+ Arc :: new ( Field :: new ( "run_ends" , DataType :: Int16 , false ) ) ,
11043+ Arc :: new ( Field :: new ( "values" , DataType :: Utf8 , true ) ) ,
11044+ ) ;
11045+ let cast_options = CastOptions {
11046+ safe : false ,
11047+ format_options : FormatOptions :: default ( ) ,
11048+ } ;
11049+
11050+ // This should fail due to run-end overflow
11051+ let result: Result < Arc < dyn Array + ' static > , ArrowError > =
11052+ cast_with_options ( & array_ref, & target_type, & cast_options) ;
11053+
11054+ match result {
11055+ Ok ( _) => {
11056+ panic ! ( "Cast should have failed due to overflow but succeeded" ) ;
11057+ }
11058+ Err ( e) => {
11059+ // Verify the error is about overflow/out of range
11060+ assert ! ( e. to_string( ) . contains( "Can't cast value" ) ) ;
11061+ }
11062+ }
11063+ }
1094211064 }
1094311065}
0 commit comments