apache · kazuyukitanimura · Mar 11, 2025 · Feb 6, 2025 · Feb 11, 2025 · Feb 14, 2025
diff --git a/docs/source/user-guide/compatibility.md b/docs/source/user-guide/compatibility.md
@@ -162,6 +162,7 @@ The following cast operations are generally compatible with Spark except for the
 | decimal | long |  |
 | decimal | float |  |
 | decimal | double |  |
+| decimal | decimal |  |
 | decimal | string | There can be formatting differences in some case due to Spark using scientific notation where Comet does not |
 | string | boolean |  |
 | string | byte |  |

diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs
@@ -872,6 +872,13 @@ fn cast_array(
     let array = array_with_timezone(array, cast_options.timezone.clone(), Some(to_type))?;
     let from_type = array.data_type().clone();
 
+    let native_cast_options: CastOptions = CastOptions {
+        safe: !matches!(cast_options.eval_mode, EvalMode::Ansi), // take safe mode from cast_options passed
+        format_options: FormatOptions::new()
+            .with_timestamp_tz_format(TIMESTAMP_FORMAT)
+            .with_timestamp_format(TIMESTAMP_FORMAT),
+    };
+
     let array = match &from_type {
         Dictionary(key_type, value_type)
             if key_type.as_ref() == &Int32
@@ -963,7 +970,7 @@ fn cast_array(
             || is_datafusion_spark_compatible(from_type, to_type, cast_options.allow_incompat) =>
         {
             // use DataFusion cast only when we know that it is compatible with Spark
-            Ok(cast_with_options(&array, to_type, &CAST_OPTIONS)?)
+            Ok(cast_with_options(&array, to_type, &native_cast_options)?)
         }
         _ => {
             // we should never reach this code because the Scala code should be checking

diff --git a/spark/src/main/scala/org/apache/comet/GenerateDocs.scala b/spark/src/main/scala/org/apache/comet/GenerateDocs.scala
@@ -69,7 +69,8 @@ object GenerateDocs {
         w.write("|-|-|-|\n".getBytes)
         for (fromType <- CometCast.supportedTypes) {
           for (toType <- CometCast.supportedTypes) {
-            if (Cast.canCast(fromType, toType) && fromType != toType) {
+            if (Cast.canCast(fromType, toType) && (fromType != toType || fromType.typeName
+                .contains("decimal"))) {
               val fromTypeName = fromType.typeName.replace("(10,2)", "")
               val toTypeName = toType.typeName.replace("(10,2)", "")
               CometCast.isSupported(fromType, toType, None, CometEvalMode.LEGACY) match {

diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -70,13 +70,8 @@ object CometCast {
           case _ =>
             Unsupported
         }
-      case (from: DecimalType, to: DecimalType) =>
-        if (to.precision < from.precision) {
-          // https://github.com/apache/datafusion/issues/13492
-          Incompatible(Some("Casting to smaller precision is not supported"))
-        } else {
-          Compatible()
-        }
+      case (_: DecimalType, _: DecimalType) =>
+        Compatible()
       case (DataTypes.StringType, _) =>
         canCastFromString(toType, timeZoneId, evalMode)
       case (_, DataTypes.StringType) =>

diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -25,12 +25,12 @@ import scala.util.Random
 import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
-import org.apache.spark.sql.{CometTestBase, DataFrame, SaveMode}
+import org.apache.spark.sql.{CometTestBase, DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.expressions.Cast
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType}
+import org.apache.spark.sql.types.{DataType, DataTypes, DecimalType, StructField, StructType}
 
 import org.apache.comet.expressions.{CometCast, CometEvalMode, Compatible}
 
@@ -981,12 +981,15 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   test("cast between decimals with different precision and scale") {
-    // cast between default Decimal(38, 18) to Decimal(6,2)
-    val values = Seq(BigDecimal("12345.6789"), BigDecimal("9876.5432"), BigDecimal("123.4567"))
-    val df = withNulls(values)
-      .toDF("b")
-      .withColumn("a", col("b").cast(DecimalType(6, 2)))
-    checkSparkAnswer(df)
+    val rowData = Seq(
+      Row(BigDecimal("12345.6789")),
+      Row(BigDecimal("9876.5432")),
+      Row(BigDecimal("123.4567")))
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(rowData),
+      StructType(Seq(StructField("a", DataTypes.createDecimalType(10, 4)))))
+
+    castTest(df, DecimalType(6, 2))
   }
 
   test("cast between decimals with higher precision than source") {
@@ -1210,27 +1213,33 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
               val cometMessage =
                 if (cometException.getCause != null) cometException.getCause.getMessage
                 else cometException.getMessage
-              if (CometSparkSessionExtensions.isSpark40Plus) {
-                // for Spark 4 we expect to sparkException carries the message
-                assert(
-                  sparkException.getMessage
-                    .replace(".WITH_SUGGESTION] ", "]")
-                    .startsWith(cometMessage))
-              } else if (CometSparkSessionExtensions.isSpark34Plus) {
-                // for Spark 3.4 we expect to reproduce the error message exactly
-                assert(cometMessage == sparkMessage)
+              // this if branch should only check decimal to decimal cast and errors when output precision, scale causes overflow.
+              if (df.schema("a").dataType.typeName.contains("decimal") && toType.typeName
+                  .contains("decimal") && sparkMessage.contains("cannot be represented as")) {
+                assert(cometMessage.contains("too large to store"))
               } else {
-                // for Spark 3.3 we just need to strip the prefix from the Comet message
-                // before comparing
-                val cometMessageModified = cometMessage
-                  .replace("[CAST_INVALID_INPUT] ", "")
-                  .replace("[CAST_OVERFLOW] ", "")
-                  .replace("[NUMERIC_VALUE_OUT_OF_RANGE] ", "")
-
-                if (sparkMessage.contains("cannot be represented as")) {
-                  assert(cometMessage.contains("cannot be represented as"))
+                if (CometSparkSessionExtensions.isSpark40Plus) {
+                  // for Spark 4 we expect to sparkException carries the message
+                  assert(
+                    sparkException.getMessage
+                      .replace(".WITH_SUGGESTION] ", "]")
+                      .startsWith(cometMessage))
+                } else if (CometSparkSessionExtensions.isSpark34Plus) {
+                  // for Spark 3.4 we expect to reproduce the error message exactly
+                  assert(cometMessage == sparkMessage)
                 } else {
-                  assert(cometMessageModified == sparkMessage)
+                  // for Spark 3.3 we just need to strip the prefix from the Comet message
+                  // before comparing
+                  val cometMessageModified = cometMessage
+                    .replace("[CAST_INVALID_INPUT] ", "")
+                    .replace("[CAST_OVERFLOW] ", "")
+                    .replace("[NUMERIC_VALUE_OUT_OF_RANGE] ", "")
+
+                  if (sparkMessage.contains("cannot be represented as")) {
+                    assert(cometMessage.contains("cannot be represented as"))
+                  } else {
+                    assert(cometMessageModified == sparkMessage)
+                  }
                 }
               }
           }