Skip to content

Commit 957de4e

Browse files
Created docker files for an integ test cluster (#601) (#986)
* Created docker files for an integ test cluster (#601) Cluster contains: * Spark master * Spark worker * OpenSearch server * OpenSearch dashboards * Minio server Signed-off-by: Norman Jordan <[email protected]> * Updated to start Spark Connect on the Spark master container Signed-off-by: Norman Jordan <[email protected]> * Can integration tests against the docker cluster The Python script for integration tests was updated to run queries against the docker cluster. The required indices are created as part of the script. The queries for the Python script were likely out of date. These have been updated when the fix for the query was obvious. There are still 6 tests that fail. Signed-off-by: Norman Jordan <[email protected]> * Fixed up the documentation for docker integration tests Signed-off-by: Norman Jordan <[email protected]> * Added a link in the toplevel README Signed-off-by: Norman Jordan <[email protected]> * Described creation of test indices Signed-off-by: Norman Jordan <[email protected]> --------- Signed-off-by: Norman Jordan <[email protected]>
1 parent d789848 commit 957de4e

27 files changed

+1748
-476
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ bin/spark-shell --packages "org.opensearch:opensearch-spark-ppl_2.12:0.7.0-SNAPS
9090
### PPL Run queries on a local spark cluster
9191
See ppl usage sample on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
9292

93+
### Running integration tests on a local spark cluster
94+
See integration test documentation [Docker Integration Tests](integ-test/script/README.md)
9395

9496
## Code of Conduct
9597

docker/integ-test/.env

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
SPARK_VERSION=3.5.3
2+
OPENSEARCH_VERSION=latest
3+
DASHBOARDS_VERSION=latest
4+
MASTER_UI_PORT=8080
5+
MASTER_PORT=7077
6+
UI_PORT=4040
7+
SPARK_CONNECT_PORT=15002
8+
PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
9+
FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
10+
OPENSEARCH_NODE_MEMORY=512m
11+
OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
12+
OPENSEARCH_PORT=9200
13+
OPENSEARCH_DASHBOARDS_PORT=5601

docker/integ-test/docker-compose.yml

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
services:
2+
spark:
3+
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
4+
container_name: spark
5+
ports:
6+
- "${MASTER_UI_PORT:-8080}:8080"
7+
- "${MASTER_PORT:-7077}:7077"
8+
- "${UI_PORT:-4040}:4040"
9+
- "${SPARK_CONNECT_PORT}:15002"
10+
entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
11+
environment:
12+
- SPARK_MODE=master
13+
- SPARK_RPC_AUTHENTICATION_ENABLED=no
14+
- SPARK_RPC_ENCRYPTION_ENABLED=no
15+
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
16+
- SPARK_SSL_ENABLED=no
17+
- SPARK_PUBLIC_DNS=localhost
18+
volumes:
19+
- type: bind
20+
source: ./spark-master-entrypoint.sh
21+
target: /opt/bitnami/scripts/spark/master-entrypoint.sh
22+
- type: bind
23+
source: ./spark-defaults.conf
24+
target: /opt/bitnami/spark/conf/spark-defaults.conf
25+
- type: bind
26+
source: ./log4j2.properties
27+
target: /opt/bitnami/spark/conf/log4j2.properties
28+
- type: bind
29+
source: $PPL_JAR
30+
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
31+
- type: bind
32+
source: $FLINT_JAR
33+
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
34+
healthcheck:
35+
test: ["CMD", "curl", "-f", "http://localhost:8080/"]
36+
interval: 1m
37+
timeout: 5s
38+
retries: 3
39+
start_period: 30s
40+
start_interval: 5s
41+
networks:
42+
- opensearch-net
43+
44+
spark-worker:
45+
image: bitnami/spark:${SPARK_VERSION:-3.5.3}
46+
container_name: spark-worker
47+
environment:
48+
- SPARK_MODE=worker
49+
- SPARK_MASTER_URL=spark://spark:7077
50+
- SPARK_WORKER_MEMORY=${WORKER_MEMORY:-1G}
51+
- SPARK_WORKER_CORES=${WORKER_CORES:-1}
52+
- SPARK_RPC_AUTHENTICATION_ENABLED=no
53+
- SPARK_RPC_ENCRYPTION_ENABLED=no
54+
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
55+
- SPARK_SSL_ENABLED=no
56+
- SPARK_PUBLIC_DNS=localhost
57+
volumes:
58+
- type: bind
59+
source: ./spark-defaults.conf
60+
target: /opt/bitnami/spark/conf/spark-defaults.conf
61+
- type: bind
62+
source: ./log4j2.properties
63+
target: /opt/bitnami/spark/conf/log4j2.properties
64+
- type: bind
65+
source: $PPL_JAR
66+
target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
67+
- type: bind
68+
source: $FLINT_JAR
69+
target: /opt/bitnami/spark/jars/flint-spark-integration.jar
70+
networks:
71+
- opensearch-net
72+
depends_on:
73+
- spark
74+
75+
opensearch:
76+
image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
77+
container_name: opensearch
78+
environment:
79+
- cluster.name=opensearch-cluster
80+
- node.name=opensearch
81+
- discovery.seed_hosts=opensearch
82+
- cluster.initial_cluster_manager_nodes=opensearch
83+
- bootstrap.memory_lock=true
84+
- plugins.security.ssl.http.enabled=false
85+
- OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
86+
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
87+
ulimits:
88+
memlock:
89+
soft: -1
90+
hard: -1
91+
nofile:
92+
soft: 65536
93+
hard: 65536
94+
volumes:
95+
- opensearch-data:/usr/share/opensearch/data
96+
ports:
97+
- ${OPENSEARCH_PORT:-9200}:9200
98+
- 9600:9600
99+
expose:
100+
- "${OPENSEARCH_PORT:-9200}"
101+
healthcheck:
102+
test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
103+
interval: 1m
104+
timeout: 5s
105+
retries: 3
106+
start_period: 30s
107+
start_interval: 5s
108+
networks:
109+
- opensearch-net
110+
111+
opensearch-dashboards:
112+
image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
113+
container_name: opensearch-dashboards
114+
ports:
115+
- ${OPENSEARCH_DASHBOARDS_PORT:-5601}:5601
116+
expose:
117+
- "${OPENSEARCH_DASHBOARDS_PORT:-5601}"
118+
environment:
119+
OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
120+
networks:
121+
- opensearch-net
122+
depends_on:
123+
- opensearch
124+
125+
minio:
126+
image: minio/minio
127+
container_name: minio-S3
128+
# See original entrypoint/command under https://github.com/minio/minio/blob/master/Dockerfile
129+
entrypoint: sh -c 'mkdir -p /data/test && minio server /data --console-address ":9001"'
130+
ports:
131+
- "9000:9000"
132+
- "9001:9001"
133+
volumes:
134+
- minio-data:/data
135+
networks:
136+
- opensearch-net
137+
138+
volumes:
139+
opensearch-data:
140+
minio-data:
141+
142+
networks:
143+
opensearch-net:

docker/integ-test/log4j2.properties

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Set everything to be logged to the console
19+
rootLogger.level = info
20+
rootLogger.appenderRef.stdout.ref = console
21+
22+
# In the pattern layout configuration below, we specify an explicit `%ex` conversion
23+
# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
24+
# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
25+
# class packaging information. That extra information can sometimes add a substantial
26+
# performance overhead, so we disable it in our default logging config.
27+
# For more information, see SPARK-39361.
28+
appender.console.type = Console
29+
appender.console.name = console
30+
appender.console.target = SYSTEM_ERR
31+
appender.console.layout.type = PatternLayout
32+
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
33+
34+
# Set the default spark-shell/spark-sql log level to WARN. When running the
35+
# spark-shell/spark-sql, the log level for these classes is used to overwrite
36+
# the root logger's log level, so that the user can have different defaults
37+
# for the shell and regular Spark apps.
38+
logger.repl.name = org.apache.spark.repl.Main
39+
logger.repl.level = warn
40+
41+
logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
42+
logger.thriftserver.level = warn
43+
44+
# Settings to quiet third party logs that are too verbose
45+
logger.jetty1.name = org.sparkproject.jetty
46+
logger.jetty1.level = warn
47+
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
48+
logger.jetty2.level = error
49+
logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
50+
logger.replexprTyper.level = info
51+
logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
52+
logger.replSparkILoopInterpreter.level = info
53+
logger.parquet1.name = org.apache.parquet
54+
logger.parquet1.level = error
55+
logger.parquet2.name = parquet
56+
logger.parquet2.level = error
57+
58+
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
59+
logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
60+
logger.RetryingHMSHandler.level = fatal
61+
logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
62+
logger.FunctionRegistry.level = error
63+
64+
# For deploying Spark ThriftServer
65+
# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
66+
appender.console.filter.1.type = RegexFilter
67+
appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
68+
appender.console.filter.1.onMatch = deny
69+
appender.console.filter.1.onMismatch = neutral
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env python3
2+
3+
import csv
4+
5+
queries = None
6+
with open('../../integ-test/script/test_cases.csv', 'r') as f:
7+
reader = csv.DictReader(f)
8+
queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()]
9+
10+
print('try {')
11+
for query in queries:
12+
query_str = query[0].replace('\n', '').replace('"', '\\"')
13+
if 'FAILED' == query[2]:
14+
print(' try {')
15+
print(f' spark.sql("{query_str}")')
16+
print(' throw new Error')
17+
print(' } catch {')
18+
print(' case e: Exception => null')
19+
print(' }\n')
20+
else:
21+
print(f' spark.sql("{query_str}")\n')
22+
print('}')
23+

0 commit comments

Comments
 (0)