fix build index with special full table name

seankao31 · seankao31 · commit 9d377fc7bae6 · 2024-05-09T19:00:10.000-07:00
Signed-off-by: Sean Kao &lt;seankao@amazon.com&gt;
diff --git a/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndex.scala b/flint-spark-integration/src/main/scala/org/opensearch/flint/spark/FlintSparkIndex.scala
@@ -102,7 +102,7 @@ object FlintSparkIndex {
   }
 
   /**
-   * Add backticks to table name to escape special character
+   * Add backticks to all parts of full table name to escape special character
    *
    * @param fullTableName
    *   source full table name
@@ -113,7 +113,7 @@ object FlintSparkIndex {
     require(fullTableName.split('.').length >= 3, s"Table name $fullTableName is not qualified")
 
     val parts = fullTableName.split('.')
-    s"${parts(0)}.${parts(1)}.`${parts.drop(2).mkString(".")}`"
+    s"`${parts(0)}`.`${parts(1)}`.`${parts.drop(2).mkString(".")}`"
   }
 
   /**
diff --git a/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/covering/FlintSparkCoveringIndexSuite.scala b/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/covering/FlintSparkCoveringIndexSuite.scala
@@ -9,6 +9,7 @@ import org.scalatest.matchers.must.Matchers.contain
 import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
 
 import org.apache.spark.FlintSuite
+import org.apache.spark.sql.AnalysisException
 
 class FlintSparkCoveringIndexSuite extends FlintSuite {
 
@@ -31,22 +32,15 @@ class FlintSparkCoveringIndexSuite extends FlintSuite {
     }
   }
 
-  test("can build index building job with unique ID column") {
-    val index =
-      new FlintSparkCoveringIndex("ci", "spark_catalog.default.test", Map("name" -> "string"))
-
-    val df = spark.createDataFrame(Seq(("hello", 20))).toDF("name", "age")
-    val indexDf = index.build(spark, Some(df))
-    indexDf.schema.fieldNames should contain only ("name")
-  }
-
-  test("can build index on table name with special characters") {
-    val testTableSpecial = "spark_catalog.default.test/2023/10"
+  test("can parse identifier name with special characters during index build") {
+    val testTableSpecial = "spark_catalog.de-fault.test/2023/10"
     val index = new FlintSparkCoveringIndex("ci", testTableSpecial, Map("name" -> "string"))
 
-    val df = spark.createDataFrame(Seq(("hello", 20))).toDF("name", "age")
-    val indexDf = index.build(spark, Some(df))
-    indexDf.schema.fieldNames should contain only ("name")
+    val error = intercept[AnalysisException] {
+      index.build(spark, None)
+    }
+    // Getting this error means that parsing doesn't fail with unquoted identifier
+    assert(error.getMessage().contains("UnresolvedRelation"))
   }
 
   test("should fail if no indexed column given") {
diff --git a/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/skipping/FlintSparkSkippingIndexSuite.scala b/flint-spark-integration/src/test/scala/org/opensearch/flint/spark/skipping/FlintSparkSkippingIndexSuite.scala
@@ -20,6 +20,7 @@ import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
 import org.scalatestplus.mockito.MockitoSugar.mock
 
 import org.apache.spark.FlintSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet
 import org.apache.spark.sql.functions.col
 
@@ -71,17 +72,19 @@ class FlintSparkSkippingIndexSuite extends FlintSuite {
     indexDf.schema.fieldNames should contain only ("name", FILE_PATH_COLUMN, ID_COLUMN)
   }
 
-  test("can build index on table name with special characters") {
-    val testTableSpecial = "spark_catalog.default.test/2023/10"
+  test("can parse identifier name with special characters during index build") {
+    val testTableSpecial = "spark_catalog.de-fault.test/2023/10"
     val indexCol = mock[FlintSparkSkippingStrategy]
     when(indexCol.outputSchema()).thenReturn(Map("name" -> "string"))
     when(indexCol.getAggregators).thenReturn(
       Seq(CollectSet(col("name").expr).toAggregateExpression()))
     val index = new FlintSparkSkippingIndex(testTableSpecial, Seq(indexCol))
 
-    val df = spark.createDataFrame(Seq(("hello", 20))).toDF("name", "age")
-    val indexDf = index.build(spark, Some(df))
-    indexDf.schema.fieldNames should contain only ("name", FILE_PATH_COLUMN, ID_COLUMN)
+    val error = intercept[AnalysisException] {
+      index.build(spark, None)
+    }
+    // Getting this error means that parsing doesn't fail with unquoted identifier
+    assert(error.getMessage().contains("UnresolvedRelation"))
   }
 
   // Test index build for different column type

Original file line number	Diff line number	Diff line change
`@@ -102,7 +102,7 @@ object FlintSparkIndex {`
`102`	`102`	`}`
`103`	`103`
`104`	`104`	`/**`
`105`		`- * Add backticks to table name to escape special character`
	`105`	`+ * Add backticks to all parts of full table name to escape special character`
`106`	`106`	`*`
`107`	`107`	`* @param fullTableName`
`108`	`108`	`* source full table name`
`@@ -113,7 +113,7 @@ object FlintSparkIndex {`
`113`	`113`	`require(fullTableName.split('.').length >= 3, s"Table name $fullTableName is not qualified")`
`114`	`114`
`115`	`115`	`val parts = fullTableName.split('.')`
`116`		- s"${parts(0)}.${parts(1)}.`${parts.drop(2).mkString(".")}`"
	`116`	+ s"`${parts(0)}`.`${parts(1)}`.`${parts.drop(2).mkString(".")}`"
`117`	`117`	`}`
`118`	`118`
`119`	`119`	`/**`