opensearch-project
diff --git a/‎flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricConstants.java
+25 b/‎flint-core/src/main/java/org/opensearch/flint/core/metrics/MetricConstants.java
+25
diff --git a/‎flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala
+14 b/‎flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala
+14
diff --git a/‎integ-test/src/integration/scala/org/apache/spark/sql/FlintJobITSuite.scala
+20-3 b/‎integ-test/src/integration/scala/org/apache/spark/sql/FlintJobITSuite.scala
+20-3
diff --git a/‎spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJob.scala
+57-34 b/‎spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJob.scala
+57-34
diff --git a/‎spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala
+64 b/‎spark-sql-application/src/main/scala/org/apache/spark/sql/FlintJobExecutor.scala
+64
@@ -100,6 +100,11 @@ public final class MetricConstants {
      */
     public static final String RESULT_METADATA_WRITE_METRIC_PREFIX = "result.metadata.write";
 
+    /**
+     * Prefix for metrics related to interactive queries
+     */
+     public static final String STATEMENT = "statement";
+
     /**
      * Metric name for counting the number of statements currently running.
      */
@@ -140,11 +145,31 @@ public final class MetricConstants {
      */
     public static final String STREAMING_HEARTBEAT_FAILED_METRIC = "streaming.heartbeat.failed.count";
 
+    /**
+     * Metric for tracking the count of jobs failed during query execution
+     */
+    public static final String QUERY_EXECUTION_FAILED_METRIC = "execution.failed.count";
+
+    /**
+     * Metric for tracking the count of jobs failed during query result write
+     */
+    public static final String RESULT_WRITER_FAILED_METRIC = "writer.failed.count";
+
     /**
      * Metric for tracking the latency of query execution (start to complete query execution) excluding result write.
      */
     public static final String QUERY_EXECUTION_TIME_METRIC = "query.execution.processingTime";
 
+    /**
+     * Metric for tracking the latency of query result write only (excluding query execution)
+     */
+    public static final String QUERY_RESULT_WRITER_TIME_METRIC = "result.writer.processingTime";
+
+    /**
+     * Metric for tracking the latency of query total execution including result write.
+     */
+    public static final String QUERY_TOTAL_TIME_METRIC = "query.total.processingTime";
+
     /**
      * Metric for query count of each query type (DROP/VACUUM/ALTER/REFRESH/CREATE INDEX)
      */
 
@@ -214,6 +214,13 @@ object FlintSparkConf {
     .doc("Enable external scheduler for index refresh")
     .createWithDefault("false")
 
+  val WARMPOOL_ENABLED =
+    FlintConfig("spark.flint.job.warmpoolEnabled")
+      .doc("Enable warmPool mode for the EMR Job to reduce startup times")
+      .createWithDefault("false")
+
+  val MAX_EXECUTORS_COUNT = FlintConfig("spark.dynamicAllocation.maxExecutors").createOptional()
+
   val EXTERNAL_SCHEDULER_INTERVAL_THRESHOLD =
     FlintConfig("spark.flint.job.externalScheduler.interval")
       .doc("Interval threshold in minutes for external scheduler to trigger index refresh")
@@ -289,6 +296,10 @@ object FlintSparkConf {
     FlintConfig(s"spark.flint.job.requestIndex")
       .doc("Request index")
       .createOptional()
+  val RESULT_INDEX =
+    FlintConfig(s"spark.flint.job.resultIndex")
+      .doc("Result index")
+      .createOptional()
   val EXCLUDE_JOB_IDS =
     FlintConfig(s"spark.flint.deployment.excludeJobs")
       .doc("Exclude job ids")
@@ -314,6 +325,9 @@ object FlintSparkConf {
   val CUSTOM_QUERY_RESULT_WRITER =
     FlintConfig("spark.flint.job.customQueryResultWriter")
       .createOptional()
+  val TERMINATE_JVM = FlintConfig("spark.flint.terminateJVM")
+    .doc("Indicates whether the JVM should be terminated after query execution")
+    .createWithDefault("true")
 }
 
 /**
 
@@ -16,12 +16,15 @@ import scala.util.{Failure, Success}
 import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest
 import org.opensearch.action.get.GetRequest
 import org.opensearch.client.RequestOptions
+import org.opensearch.flint.common.model.FlintStatement
+import org.opensearch.flint.common.scheduler.model.LangType
 import org.opensearch.flint.core.FlintOptions
 import org.opensearch.flint.spark.{FlintSparkIndexMonitor, FlintSparkSuite}
 import org.opensearch.flint.spark.skipping.FlintSparkSkippingIndex.getSkippingIndexName
 import org.scalatest.matchers.must.Matchers.{contain, defined}
 import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
 
+import org.apache.spark.sql.FlintREPL.currentTimeProvider
 import org.apache.spark.sql.flint.FlintDataSourceV2.FLINT_DATASOURCE
 import org.apache.spark.sql.flint.config.FlintSparkConf
 import org.apache.spark.sql.flint.config.FlintSparkConf._
@@ -39,6 +42,7 @@ class FlintJobITSuite extends FlintSparkSuite with JobTest {
   val appId = "00feq82b752mbt0p"
   val dataSourceName = "my_glue1"
   val queryId = "testQueryId"
+  val requestIndex = "testRequestIndex"
   var osClient: OSClient = _
   val threadLocalFuture = new ThreadLocal[Future[Unit]]()
 
@@ -83,24 +87,37 @@ class FlintJobITSuite extends FlintSparkSuite with JobTest {
 
   def createJobOperator(query: String, jobRunId: String): JobOperator = {
     val streamingRunningCount = new AtomicInteger(0)
+    val statementRunningCount = new AtomicInteger(0)
 
     /*
      * Because we cannot test from FlintJob.main() for the reason below, we have to configure
      * all Spark conf required by Flint code underlying manually.
      */
     spark.conf.set(DATA_SOURCE_NAME.key, dataSourceName)
     spark.conf.set(JOB_TYPE.key, FlintJobType.STREAMING)
+    spark.conf.set(REQUEST_INDEX.key, requestIndex)
+
+    val flintStatement =
+      new FlintStatement(
+        "running",
+        query,
+        "",
+        queryId,
+        LangType.SQL,
+        currentTimeProvider.currentEpochMillis(),
+        Option.empty,
+        Map.empty)
 
     val job = JobOperator(
       appId,
       jobRunId,
       spark,
-      query,
-      queryId,
+      flintStatement,
       dataSourceName,
       resultIndex,
       FlintJobType.STREAMING,
-      streamingRunningCount)
+      streamingRunningCount,
+      statementRunningCount)
     job.terminateJVM = false
     job
   }
 
@@ -8,12 +8,17 @@ package org.apache.spark.sql
 
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}
+
+import org.opensearch.flint.common.model.FlintStatement
+import org.opensearch.flint.common.scheduler.model.LangType
 import org.opensearch.flint.core.logging.CustomLogging
 import org.opensearch.flint.core.metrics.MetricConstants
 import org.opensearch.flint.core.metrics.MetricsUtil.registerGauge
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.flint.config.FlintSparkConf
+import org.apache.spark.util.ThreadUtils
 
 /**
  * Spark SQL Application entrypoint
@@ -26,52 +31,70 @@ import org.apache.spark.sql.flint.config.FlintSparkConf
  *   write sql query result to given opensearch index
  */
 object FlintJob extends Logging with FlintJobExecutor {
+  private val streamingRunningCount = new AtomicInteger(0)
+  private val statementRunningCount = new AtomicInteger(0)
+
   def main(args: Array[String]): Unit = {
     val (queryOption, resultIndexOption) = parseArgs(args)
 
     val conf = createSparkConf()
-    val jobType = conf.get("spark.flint.job.type", FlintJobType.BATCH)
-    CustomLogging.logInfo(s"""Job type is: ${jobType}""")
-    conf.set(FlintSparkConf.JOB_TYPE.key, jobType)
-
-    val dataSource = conf.get("spark.flint.datasource.name", "")
-    val query = queryOption.getOrElse(unescapeQuery(conf.get(FlintSparkConf.QUERY.key, "")))
-    if (query.isEmpty) {
-      logAndThrow(s"Query undefined for the ${jobType} job.")
-    }
-    val queryId = conf.get(FlintSparkConf.QUERY_ID.key, "")
-
-    if (resultIndexOption.isEmpty) {
-      logAndThrow("resultIndex is not set")
-    }
-    // https://github.com/opensearch-project/opensearch-spark/issues/138
-    /*
-     * To execute queries such as `CREATE SKIPPING INDEX ON my_glue1.default.http_logs_plain (`@timestamp` VALUE_SET) WITH (auto_refresh = true)`,
-     * it's necessary to set `spark.sql.defaultCatalog=my_glue1`. This is because AWS Glue uses a single database (default) and table (http_logs_plain),
-     * and we need to configure Spark to recognize `my_glue1` as a reference to AWS Glue's database and table.
-     * By doing this, we effectively map `my_glue1` to AWS Glue, allowing Spark to resolve the database and table names correctly.
-     * Without this setup, Spark would not recognize names in the format `my_glue1.default`.
-     */
-    conf.set("spark.sql.defaultCatalog", dataSource)
-    configDYNMaxExecutors(conf, jobType)
-
+    val sparkSession = createSparkSession(conf)
     val applicationId =
       environmentProvider.getEnvVar("SERVERLESS_EMR_VIRTUAL_CLUSTER_ID", "unknown")
     val jobId = environmentProvider.getEnvVar("SERVERLESS_EMR_JOB_ID", "unknown")
+    val isWarmpoolEnabled = conf.get(FlintSparkConf.WARMPOOL_ENABLED.key, "false").toBoolean
+    logInfo(s"isWarmpoolEnabled: ${isWarmpoolEnabled}")
+
+    if (!isWarmpoolEnabled) {
+      val jobType = sparkSession.conf.get("spark.flint.job.type", FlintJobType.BATCH)
+      CustomLogging.logInfo(s"""Job type is: ${jobType}""")
+      sparkSession.conf.set(FlintSparkConf.JOB_TYPE.key, jobType)
+
+      val dataSource = conf.get("spark.flint.datasource.name", "")
+      val query = queryOption.getOrElse(unescapeQuery(conf.get(FlintSparkConf.QUERY.key, "")))
+      if (query.isEmpty) {
+        logAndThrow(s"Query undefined for the ${jobType} job.")
+      }
+      val queryId = conf.get(FlintSparkConf.QUERY_ID.key, "")
 
-    val streamingRunningCount = new AtomicInteger(0)
-    val jobOperator =
-      JobOperator(
+      if (resultIndexOption.isEmpty) {
+        logAndThrow("resultIndex is not set")
+      }
+
+      configDYNMaxExecutors(conf, jobType)
+      val flintStatement =
+        new FlintStatement(
+          "running",
+          query,
+          "",
+          queryId,
+          LangType.SQL,
+          currentTimeProvider.currentEpochMillis(),
+          Option.empty,
+          Map.empty)
+
+      val jobOperator = createJobOperator(
+        sparkSession,
         applicationId,
         jobId,
-        createSparkSession(conf),
-        query,
-        queryId,
+        flintStatement,
         dataSource,
         resultIndexOption.get,
         jobType,
-        streamingRunningCount)
-    registerGauge(MetricConstants.STREAMING_RUNNING_METRIC, streamingRunningCount)
-    jobOperator.start()
+        streamingRunningCount,
+        statementRunningCount)
+      registerGauge(MetricConstants.STREAMING_RUNNING_METRIC, streamingRunningCount)
+      jobOperator.start()
+    } else {
+      // Fetch and execute queries in warm pool mode
+      val warmpoolJob =
+        WarmpoolJob(
+          applicationId,
+          jobId,
+          sparkSession,
+          streamingRunningCount,
+          statementRunningCount)
+      warmpoolJob.start()
+    }
   }
 }
@@ -6,12 +6,15 @@
 package org.apache.spark.sql
 
 import java.util.Locale
+import java.util.concurrent.ThreadPoolExecutor
+import java.util.concurrent.atomic.AtomicInteger
 
 import com.amazonaws.services.glue.model.{AccessDeniedException, AWSGlueException}
 import com.amazonaws.services.s3.model.AmazonS3Exception
 import com.fasterxml.jackson.databind.ObjectMapper
 import org.apache.commons.text.StringEscapeUtils.unescapeJava
 import org.opensearch.common.Strings
+import org.opensearch.flint.common.model.FlintStatement
 import org.opensearch.flint.core.IRestHighLevelClient
 import org.opensearch.flint.core.logging.{CustomLogging, ExceptionMessages, OperationMessage}
 import org.opensearch.flint.core.metrics.MetricConstants
@@ -20,6 +23,7 @@ import play.api.libs.json._
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.FlintREPL.instantiate
 import org.apache.spark.sql.SparkConfConstants.{DEFAULT_SQL_EXTENSIONS, SQL_EXTENSIONS_KEY}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.exception.UnrecoverableException
@@ -566,4 +570,64 @@ trait FlintJobExecutor {
       }
     }
   }
+
+  def createJobOperator(
+      spark: SparkSession,
+      applicationId: String,
+      jobId: String,
+      flintStatement: FlintStatement,
+      dataSource: String,
+      resultIndex: String,
+      jobType: String,
+      streamingRunningCount: AtomicInteger,
+      statementRunningCount: AtomicInteger): JobOperator = {
+    // https://github.com/opensearch-project/opensearch-spark/issues/138
+    /*
+     * To execute queries such as `CREATE SKIPPING INDEX ON my_glue1.default.http_logs_plain (`@timestamp` VALUE_SET) WITH (auto_refresh = true)`,
+     * it's necessary to set `spark.sql.defaultCatalog=my_glue1`. This is because AWS Glue uses a single database (default) and table (http_logs_plain),
+     * and we need to configure Spark to recognize `my_glue1` as a reference to AWS Glue's database and table.
+     * By doing this, we effectively map `my_glue1` to AWS Glue, allowing Spark to resolve the database and table names correctly.
+     * Without this setup, Spark would not recognize names in the format `my_glue1.default`.
+     */
+    spark.conf.set("spark.sql.defaultCatalog", dataSource)
+    val jobOperator =
+      JobOperator(
+        applicationId,
+        jobId,
+        spark,
+        flintStatement,
+        dataSource,
+        resultIndex,
+        jobType,
+        streamingRunningCount,
+        statementRunningCount)
+    jobOperator
+  }
+
+  def instantiateQueryResultWriter(
+      spark: SparkSession,
+      commandContext: CommandContext): QueryResultWriter = {
+    instantiate(
+      new QueryResultWriterImpl(commandContext),
+      spark.conf.get(FlintSparkConf.CUSTOM_QUERY_RESULT_WRITER.key, ""))
+  }
+
+  def instantiateStatementExecutionManager(
+      commandContext: CommandContext): StatementExecutionManager = {
+    import commandContext._
+    instantiate(
+      new StatementExecutionManagerImpl(commandContext),
+      spark.conf.get(FlintSparkConf.CUSTOM_STATEMENT_MANAGER.key, ""),
+      spark,
+      sessionId)
+  }
+
+  def instantiateSessionManager(
+      spark: SparkSession,
+      resultIndexOption: Option[String]): SessionManager = {
+    instantiate(
+      new SessionManagerImpl(spark, resultIndexOption),
+      spark.conf.get(FlintSparkConf.CUSTOM_SESSION_MANAGER.key, ""),
+      resultIndexOption.getOrElse(""))
+  }
 }