|
| 1 | +/* |
| 2 | + * Copyright OpenSearch Contributors |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + */ |
| 5 | + |
| 6 | +package org.opensearch.flint.spark |
| 7 | + |
| 8 | +import java.io.IOException |
| 9 | + |
| 10 | +import org.apache.hadoop.fs.Path |
| 11 | +import org.opensearch.flint.spark.covering.FlintSparkCoveringIndex |
| 12 | +import org.opensearch.flint.spark.mv.FlintSparkMaterializedView |
| 13 | +import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex |
| 14 | + |
| 15 | +import org.apache.spark.internal.Logging |
| 16 | +import org.apache.spark.sql.SparkSession |
| 17 | +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation |
| 18 | +import org.apache.spark.sql.execution.command.DDLUtils |
| 19 | +import org.apache.spark.sql.execution.streaming.CheckpointFileManager |
| 20 | +import org.apache.spark.sql.flint.{loadTable, parseTableName, qualifyTableName} |
| 21 | + |
| 22 | +/** |
| 23 | + * Flint Spark validation helper. |
| 24 | + */ |
| 25 | +trait FlintSparkValidationHelper extends Logging { |
| 26 | + |
| 27 | + /** |
| 28 | + * Determines whether the source table(s) for a given Flint index are supported. |
| 29 | + * |
| 30 | + * @param spark |
| 31 | + * Spark session |
| 32 | + * @param index |
| 33 | + * Flint index |
| 34 | + * @return |
| 35 | + * true if all non Hive, otherwise false |
| 36 | + */ |
| 37 | + def isTableProviderSupported(spark: SparkSession, index: FlintSparkIndex): Boolean = { |
| 38 | + // Extract source table name (possibly more than one for MV query) |
| 39 | + val tableNames = index match { |
| 40 | + case skipping: FlintSparkSkippingIndex => Seq(skipping.tableName) |
| 41 | + case covering: FlintSparkCoveringIndex => Seq(covering.tableName) |
| 42 | + case mv: FlintSparkMaterializedView => |
| 43 | + spark.sessionState.sqlParser |
| 44 | + .parsePlan(mv.query) |
| 45 | + .collect { case relation: UnresolvedRelation => |
| 46 | + qualifyTableName(spark, relation.tableName) |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + // Validate if any source table is not supported (currently Hive only) |
| 51 | + tableNames.exists { tableName => |
| 52 | + val (catalog, ident) = parseTableName(spark, tableName) |
| 53 | + val table = loadTable(catalog, ident).get |
| 54 | + |
| 55 | + // TODO: add allowed table provider list |
| 56 | + DDLUtils.isHiveTable(Option(table.properties().get("provider"))) |
| 57 | + } |
| 58 | + } |
| 59 | + |
| 60 | + /** |
| 61 | + * Checks whether a specified checkpoint location is accessible. Accessibility, in this context, |
| 62 | + * means that the folder exists and the current Spark session has the necessary permissions to |
| 63 | + * access it. |
| 64 | + * |
| 65 | + * @param spark |
| 66 | + * Spark session |
| 67 | + * @param checkpointLocation |
| 68 | + * checkpoint location |
| 69 | + * @return |
| 70 | + * true if accessible, otherwise false |
| 71 | + */ |
| 72 | + def isCheckpointLocationAccessible(spark: SparkSession, checkpointLocation: String): Boolean = { |
| 73 | + try { |
| 74 | + val checkpointManager = |
| 75 | + CheckpointFileManager.create( |
| 76 | + new Path(checkpointLocation), |
| 77 | + spark.sessionState.newHadoopConf()) |
| 78 | + |
| 79 | + checkpointManager.exists(new Path(checkpointLocation)) |
| 80 | + } catch { |
| 81 | + case e: IOException => |
| 82 | + logWarning(s"Failed to check if checkpoint location $checkpointLocation exists", e) |
| 83 | + false |
| 84 | + } |
| 85 | + } |
| 86 | +} |
0 commit comments