Skip to content

Commit 9c15194

Browse files
authored
Fix Session state bug and improve Query Efficiency in REPL (#245)
* Fix Session state bug and improve Query Efficiency in REPL This PR introduces a bug fix and enhancements to FlintREPL's session management and optimizes query execution methods. It addresses a specific issue where marking a session as 'dead' inadvertently triggered the creation of a new, unnecessary session. This behavior resulted in the new session entering a spin-wait state, leading to duplicate jobs. The improvements include: - **Introduction of `earlyExitFlag`**: A new flag, `earlyExitFlag`, has been introduced and is set to `true` under two conditions: when a job is excluded or when it is not associated with the current session's job run ID. This flag is evaluated in the shutdown hook to determine whether the session state should be marked as 'dead'. This change effectively prevents the unintended creation of duplicate sessions by the SQL plugin, ensuring resources are utilized more efficiently. - **Query Method Optimization**: The method for executing queries has been shifted from scrolling to search, eliminating the need for creating unnecessary scroll contexts. This adjustment enhances the performance and efficiency of query operations. - **Reversion of Previous Commit**: The PR reverts a previous change (be82024) following the resolution of the related issue in the SQL plugin (opensearch-project/sql#2436), further streamlining the operation and maintenance of the system. **Testing**: 1. Integration tests were added to cover both REPL and streaming job functionalities, ensuring the robustness of the fixes. 2. Manual testing was conducted to validate the bug fix. Signed-off-by: Kaituo Li <[email protected]> * added Java doc Signed-off-by: Kaituo Li <[email protected]> * fix IT by restore env variable change Signed-off-by: Kaituo Li <[email protected]> --------- Signed-off-by: Kaituo Li <[email protected]>
1 parent f446de0 commit 9c15194

File tree

23 files changed

+1279
-59
lines changed

23 files changed

+1279
-59
lines changed

build.sbt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ lazy val commonSettings = Seq(
4242
testScalastyle := (Test / scalastyle).toTask("").value,
4343
Test / test := ((Test / test) dependsOn testScalastyle).value)
4444

45+
// running `scalafmtAll` includes all subprojects under root
4546
lazy val root = (project in file("."))
46-
.aggregate(flintCore, flintSparkIntegration, pplSparkIntegration, sparkSqlApplication)
47+
.aggregate(flintCore, flintSparkIntegration, pplSparkIntegration, sparkSqlApplication, integtest)
4748
.disablePlugins(AssemblyPlugin)
4849
.settings(name := "flint", publish / skip := true)
4950

@@ -159,7 +160,7 @@ lazy val flintSparkIntegration = (project in file("flint-spark-integration"))
159160

160161
// Test assembly package with integration test.
161162
lazy val integtest = (project in file("integ-test"))
162-
.dependsOn(flintSparkIntegration % "test->test", pplSparkIntegration % "test->test" )
163+
.dependsOn(flintSparkIntegration % "test->test", pplSparkIntegration % "test->test", sparkSqlApplication % "test->test")
163164
.settings(
164165
commonSettings,
165166
name := "integ-test",
@@ -175,7 +176,9 @@ lazy val integtest = (project in file("integ-test"))
175176
"org.opensearch.client" % "opensearch-java" % "2.6.0" % "test"
176177
exclude ("com.fasterxml.jackson.core", "jackson-databind")),
177178
libraryDependencies ++= deps(sparkVersion),
178-
Test / fullClasspath ++= Seq((flintSparkIntegration / assembly).value, (pplSparkIntegration / assembly).value))
179+
Test / fullClasspath ++= Seq((flintSparkIntegration / assembly).value, (pplSparkIntegration / assembly).value,
180+
(sparkSqlApplication / assembly).value
181+
))
179182

180183
lazy val standaloneCosmetic = project
181184
.settings(

flint-core/src/main/scala/org/opensearch/flint/core/FlintOptions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ public class FlintOptions implements Serializable {
7777

7878
public static final int DEFAULT_SOCKET_TIMEOUT_MILLIS = 60000;
7979

80+
public static final int DEFAULT_INACTIVITY_LIMIT_MILLIS = 10 * 60 * 1000;
81+
8082
public FlintOptions(Map<String, String> options) {
8183
this.options = options;
8284
this.retryOptions = new FlintRetryOptions(options);
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.flint.core.storage;
7+
8+
import org.opensearch.OpenSearchStatusException;
9+
import org.opensearch.action.search.ClearScrollRequest;
10+
import org.opensearch.action.search.SearchRequest;
11+
import org.opensearch.action.search.SearchResponse;
12+
import org.opensearch.action.search.SearchScrollRequest;
13+
import org.opensearch.client.RequestOptions;
14+
import org.opensearch.client.RestHighLevelClient;
15+
import org.opensearch.common.Strings;
16+
import org.opensearch.common.unit.TimeValue;
17+
import org.opensearch.flint.core.FlintOptions;
18+
import org.opensearch.flint.core.IRestHighLevelClient;
19+
import org.opensearch.search.builder.SearchSourceBuilder;
20+
21+
import java.io.IOException;
22+
import java.util.Optional;
23+
import java.util.logging.Level;
24+
import java.util.logging.Logger;
25+
26+
/**
27+
* {@link OpenSearchReader} using search. https://opensearch.org/docs/latest/api-reference/search/
28+
*/
29+
public class OpenSearchQueryReader extends OpenSearchReader {
30+
31+
private static final Logger LOG = Logger.getLogger(OpenSearchQueryReader.class.getName());
32+
33+
public OpenSearchQueryReader(IRestHighLevelClient client, String indexName, SearchSourceBuilder searchSourceBuilder) {
34+
super(client, new SearchRequest().indices(indexName).source(searchSourceBuilder));
35+
}
36+
37+
/**
38+
* search.
39+
*/
40+
Optional<SearchResponse> search(SearchRequest request) throws IOException {
41+
return Optional.of(client.search(request, RequestOptions.DEFAULT));
42+
}
43+
44+
/**
45+
* nothing to clean
46+
*/
47+
void clean() throws IOException {}
48+
}

flint-core/src/main/scala/org/opensearch/flint/core/storage/OpenSearchReader.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.flint.core.storage;
77

8+
import org.opensearch.OpenSearchStatusException;
89
import org.opensearch.action.search.SearchRequest;
910
import org.opensearch.action.search.SearchResponse;
1011
import org.opensearch.flint.core.IRestHighLevelClient;
@@ -48,6 +49,13 @@ public OpenSearchReader(IRestHighLevelClient client, SearchRequest searchRequest
4849
iterator = searchHits.iterator();
4950
}
5051
return iterator.hasNext();
52+
} catch (OpenSearchStatusException e) {
53+
// e.g., org.opensearch.OpenSearchStatusException: OpenSearch exception [type=index_not_found_exception, reason=no such index [query_results2]]
54+
if (e.getMessage() != null && (e.getMessage().contains("index_not_found_exception"))) {
55+
return false;
56+
} else {
57+
throw e;
58+
}
5159
} catch (IOException e) {
5260
// todo. log error.
5361
throw new RuntimeException(e);

flint-core/src/test/java/org/opensearch/flint/core/metrics/reporter/DimensionUtilsTest.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,20 @@ public void testGetDimensionsFromSystemEnv() throws NoSuchFieldException, Illega
5757
Field field = classOfMap.getDeclaredField("m");
5858
field.setAccessible(true);
5959
Map<String, String> writeableEnvironmentVariables = (Map<String, String>)field.get(System.getenv());
60-
writeableEnvironmentVariables.put("TEST_VAR", "dummy1");
61-
writeableEnvironmentVariables.put("SERVERLESS_EMR_JOB_ID", "dummy2");
62-
Dimension result1 = DimensionUtils.constructDimension("TEST_VAR", parts);
63-
assertEquals("TEST_VAR", result1.getName());
64-
assertEquals("dummy1", result1.getValue());
65-
Dimension result2 = DimensionUtils.constructDimension("jobId", parts);
66-
assertEquals("jobId", result2.getName());
67-
assertEquals("dummy2", result2.getValue());
60+
try {
61+
writeableEnvironmentVariables.put("TEST_VAR", "dummy1");
62+
writeableEnvironmentVariables.put("SERVERLESS_EMR_JOB_ID", "dummy2");
63+
Dimension result1 = DimensionUtils.constructDimension("TEST_VAR", parts);
64+
assertEquals("TEST_VAR", result1.getName());
65+
assertEquals("dummy1", result1.getValue());
66+
Dimension result2 = DimensionUtils.constructDimension("jobId", parts);
67+
assertEquals("jobId", result2.getName());
68+
assertEquals("dummy2", result2.getValue());
69+
} finally {
70+
// since system environment is shared by other tests. Make sure to remove them before exiting.
71+
writeableEnvironmentVariables.remove("SERVERLESS_EMR_JOB_ID");
72+
writeableEnvironmentVariables.remove("TEST_VAR");
73+
}
74+
6875
}
6976
}

flint-spark-integration/src/main/scala/org/apache/spark/sql/flint/config/FlintSparkConf.scala

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,30 @@ object FlintSparkConf {
146146
.datasourceOption()
147147
.doc("socket duration in milliseconds")
148148
.createWithDefault(String.valueOf(FlintOptions.DEFAULT_SOCKET_TIMEOUT_MILLIS))
149-
149+
val DATA_SOURCE_NAME =
150+
FlintConfig(s"spark.flint.datasource.name")
151+
.doc("data source name")
152+
.createOptional()
153+
val JOB_TYPE =
154+
FlintConfig(s"spark.flint.job.type")
155+
.doc("Flint job type. Including interactive and streaming")
156+
.createWithDefault("interactive")
157+
val SESSION_ID =
158+
FlintConfig(s"spark.flint.job.sessionId")
159+
.doc("Flint session id")
160+
.createOptional()
161+
val REQUEST_INDEX =
162+
FlintConfig(s"spark.flint.job.requestIndex")
163+
.doc("Request index")
164+
.createOptional()
165+
val EXCLUDE_JOB_IDS =
166+
FlintConfig(s"spark.flint.deployment.excludeJobs")
167+
.doc("Exclude job ids")
168+
.createOptional()
169+
val REPL_INACTIVITY_TIMEOUT_MILLIS =
170+
FlintConfig(s"spark.flint.job.inactivityLimitMillis")
171+
.doc("inactivity timeout")
172+
.createWithDefault(String.valueOf(FlintOptions.DEFAULT_INACTIVITY_LIMIT_MILLIS))
150173
}
151174

152175
/**
@@ -196,11 +219,18 @@ case class FlintSparkConf(properties: JMap[String, String]) extends Serializable
196219
CUSTOM_AWS_CREDENTIALS_PROVIDER,
197220
USERNAME,
198221
PASSWORD,
199-
SOCKET_TIMEOUT_MILLIS)
222+
SOCKET_TIMEOUT_MILLIS,
223+
JOB_TYPE,
224+
REPL_INACTIVITY_TIMEOUT_MILLIS)
200225
.map(conf => (conf.optionKey, conf.readFrom(reader)))
201226
.toMap
202227

203-
val optionsWithoutDefault = Seq(RETRYABLE_EXCEPTION_CLASS_NAMES)
228+
val optionsWithoutDefault = Seq(
229+
RETRYABLE_EXCEPTION_CLASS_NAMES,
230+
DATA_SOURCE_NAME,
231+
SESSION_ID,
232+
REQUEST_INDEX,
233+
EXCLUDE_JOB_IDS)
204234
.map(conf => (conf.optionKey, conf.readFrom(reader)))
205235
.flatMap {
206236
case (_, None) => None

flint-spark-integration/src/main/scala/org/opensearch/flint/app/FlintInstance.scala

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,14 @@ class FlintInstance(
2525
val lastUpdateTime: Long,
2626
val jobStartTime: Long = 0,
2727
val excludedJobIds: Seq[String] = Seq.empty[String],
28-
val error: Option[String] = None) {}
28+
val error: Option[String] = None) {
29+
override def toString: String = {
30+
val excludedJobIdsStr = excludedJobIds.mkString("[", ", ", "]")
31+
val errorStr = error.getOrElse("None")
32+
s"FlintInstance(applicationId=$applicationId, jobId=$jobId, sessionId=$sessionId, state=$state, " +
33+
s"lastUpdateTime=$lastUpdateTime, jobStartTime=$jobStartTime, excludedJobIds=$excludedJobIdsStr, error=$errorStr)"
34+
}
35+
}
2936

3037
object FlintInstance {
3138

0 commit comments

Comments
 (0)