Skip to content

Commit 5694c32

Browse files
authored
Update scalardb-analytics-spark-sample to support 3.14 (#75)
* Update scalardb-analytics-spark-sample to support 3.14 * Remove unnecessary blank line * Do not specify patch version of image to follow the latest one
1 parent bdedc82 commit 5694c32

File tree

14 files changed

+135
-150
lines changed

14 files changed

+135
-150
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.scala_history

scalardb-analytics-spark-sample/cert.pem

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 60 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,50 @@
11
services:
2-
spark-shell:
2+
spark-sql:
33
build:
44
context: ./docker
55
dockerfile: Dockerfile.spark
66
volumes:
77
- ./scalardb.properties:/etc/scalardb.properties
8-
- ./cert.pem:/etc/cert.pem
9-
- .scala_history_jline3:/root/.scala_history_jline3
8+
- ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
9+
- .scala_history:/root/.scala_history
1010
networks:
1111
- scalar-network
1212
profiles:
1313
- dev
1414
depends_on:
15-
- backend-postgres
16-
- backend-cassandra
17-
- backend-dynamodb
15+
- scalardb-cassandra
16+
- scalardb-mysql
17+
- postgres
1818
command:
19-
- "/opt/spark/bin/spark-shell"
19+
- "/opt/spark/bin/spark-sql"
2020
- "--packages"
21-
- "com.scalar-labs:scalardb-analytics-spark-3.5_2.12:3.12.0"
21+
- "com.scalar-labs:scalardb-analytics-spark-all-3.5_2.12:3.14.0"
2222

23-
backend-postgres:
24-
image: postgres:15.1
25-
ports:
26-
- "5432"
23+
sample-data-loader:
24+
build:
25+
context: sample-data-loader
26+
dockerfile: Dockerfile
2727
volumes:
28-
- backend-postgres-data:/var/lib/postgresql/data
29-
environment:
30-
- POSTGRES_USER=postgres
31-
- POSTGRES_PASSWORD=postgres
32-
- POSTGRES_DB=test
28+
- ./scalardb.properties:/etc/scalardb.properties
29+
- ./schema.json:/etc/schema.json
30+
- ./data:/data
31+
working_dir: /sample-data-loader
3332
networks:
3433
- scalar-network
35-
healthcheck:
36-
test: ["CMD", "psql", "-U", "postgres", "-c", "select 1"]
37-
interval: 1s
38-
timeout: 1s
39-
retries: 10
40-
start_period: 1s
34+
profiles:
35+
- dev
36+
depends_on:
37+
- scalardb-cassandra
38+
- scalardb-mysql
39+
- postgres
40+
command: ["java", "-jar", "/app.jar"]
4141

42-
backend-cassandra:
42+
scalardb-cassandra:
4343
image: cassandra:3.11
4444
ports:
45-
- "9042"
45+
- 9042
4646
volumes:
47-
- backend-cassandra-data:/var/lib/cassandra
47+
- scalardb-cassandra-data:/var/lib/cassandra
4848
environment:
4949
- CASSANDRA_DC=dc1
5050
- CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch
@@ -55,50 +55,52 @@ services:
5555
interval: 1s
5656
timeout: 1s
5757
retries: 10
58-
start_period: 5s
58+
start_period: 10s
5959

60-
backend-dynamodb:
61-
image: amazon/dynamodb-local:1.21.0
60+
scalardb-mysql:
61+
image: mysql:8.0
6262
ports:
63-
- "8000"
64-
command:
65-
[
66-
"-jar",
67-
"DynamoDBLocal.jar",
68-
"-sharedDb",
69-
"-dbPath",
70-
"/home/dynamodblocal",
71-
"-optimizeDbBeforeStartup",
72-
]
63+
- 3306
7364
volumes:
74-
- backend-dynamodb-data:/home/dynamodblocal
65+
- scalardb-mysql-data:/var/lib/mysql
66+
environment:
67+
- MYSQL_ROOT_PASSWORD=mysql
68+
- MYSQL_DATABASE=sampledb
7569
networks:
7670
- scalar-network
71+
healthcheck:
72+
test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root"]
73+
interval: 1s
74+
timeout: 1s
75+
retries: 10
76+
start_period: 5s
7777

78-
sample-data-loader:
79-
build:
80-
context: sample-data-loader
81-
dockerfile: Dockerfile
78+
postgres:
79+
image: postgres:15.1
80+
ports:
81+
- 5432
8282
volumes:
83-
- ./scalardb.properties:/etc/scalardb.properties
84-
- ./schema.json:/etc/schema.json
85-
- ./data:/data
86-
working_dir: /sample-data-loader
83+
- postgres-data:/var/lib/postgresql/data
84+
- ./data/customer.csv:/opt/customer.csv
85+
- ./sql/postgres_copy.sql:/docker-entrypoint-initdb.d/postgres_copy.sql
86+
environment:
87+
- POSTGRES_USER=postgres
88+
- POSTGRES_PASSWORD=postgres
89+
- POSTGRES_DB=sampledb
8790
networks:
8891
- scalar-network
89-
profiles:
90-
- dev
91-
depends_on:
92-
- backend-postgres
93-
- backend-cassandra
94-
- backend-dynamodb
95-
command: ["java", "-jar", "/app.jar"]
92+
healthcheck:
93+
test: ["CMD", "psql", "-U", "postgres", "-c", "select 1"]
94+
interval: 1s
95+
timeout: 1s
96+
retries: 10
97+
start_period: 5s
9698

9799
volumes:
98100
analytics-data: {}
99-
backend-postgres-data: {}
100-
backend-cassandra-data: {}
101-
backend-dynamodb-data: {}
101+
scalardb-cassandra-data: {}
102+
scalardb-mysql-data: {}
103+
postgres-data: {}
102104

103105
networks:
104106
scalar-network: {}

scalardb-analytics-spark-sample/docker/Dockerfile.spark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ FROM eclipse-temurin:17-jre-jammy
33

44
WORKDIR /work
55

6-
ENV SPARK_VERSION 3.5.1
6+
ENV SPARK_VERSION 3.5.3
77

88
RUN apt-get update && \
99
apt-get install -y --no-install-recommends \

scalardb-analytics-spark-sample/sample-data-loader/build.gradle.kts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
plugins {
22
application
3-
id("com.github.johnrengelman.shadow") version "7.1.2"
3+
id("com.gradleup.shadow") version "8.3.5"
44
id("com.diffplug.spotless") version "6.24.0"
55
}
66

@@ -9,8 +9,8 @@ repositories {
99
}
1010

1111
dependencies {
12-
implementation("com.scalar-labs:scalardb:3.12.1")
13-
implementation("com.scalar-labs:scalardb-schema-loader:3.12.1")
12+
implementation("com.scalar-labs:scalardb:3.14.0")
13+
implementation("com.scalar-labs:scalardb-schema-loader:3.14.0")
1414
implementation("org.apache.commons:commons-csv:1.10.0")
1515

1616
implementation("io.netty:netty-transport-native-epoll:4.1.99.Final:linux-x86_64")
Binary file not shown.

scalardb-analytics-spark-sample/sample-data-loader/gradle/wrapper/gradle-wrapper.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
44
networkTimeout=10000
55
validateDistributionUrl=true
66
zipStoreBase=GRADLE_USER_HOME

scalardb-analytics-spark-sample/sample-data-loader/gradlew

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717
#
18+
# SPDX-License-Identifier: Apache-2.0
19+
#
1820

1921
##############################################################################
2022
#
@@ -55,7 +57,7 @@
5557
# Darwin, MinGW, and NonStop.
5658
#
5759
# (3) This script is generated from the Groovy template
58-
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
60+
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
5961
# within the Gradle project.
6062
#
6163
# You can find Gradle at https://github.com/gradle/gradle/.
@@ -84,7 +86,8 @@ done
8486
# shellcheck disable=SC2034
8587
APP_BASE_NAME=${0##*/}
8688
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
87-
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
89+
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
90+
' "$PWD" ) || exit
8891

8992
# Use the maximum available, or set MAX_FD != -1 to use that value.
9093
MAX_FD=maximum

scalardb-analytics-spark-sample/sample-data-loader/gradlew.bat

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
@rem See the License for the specific language governing permissions and
1414
@rem limitations under the License.
1515
@rem
16+
@rem SPDX-License-Identifier: Apache-2.0
17+
@rem
1618

1719
@if "%DEBUG%"=="" @echo off
1820
@rem ##########################################################################
@@ -43,11 +45,11 @@ set JAVA_EXE=java.exe
4345
%JAVA_EXE% -version >NUL 2>&1
4446
if %ERRORLEVEL% equ 0 goto execute
4547

46-
echo.
47-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48-
echo.
49-
echo Please set the JAVA_HOME variable in your environment to match the
50-
echo location of your Java installation.
48+
echo. 1>&2
49+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
50+
echo. 1>&2
51+
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
52+
echo location of your Java installation. 1>&2
5153

5254
goto fail
5355

@@ -57,11 +59,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
5759

5860
if exist "%JAVA_EXE%" goto execute
5961

60-
echo.
61-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62-
echo.
63-
echo Please set the JAVA_HOME variable in your environment to match the
64-
echo location of your Java installation.
62+
echo. 1>&2
63+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
64+
echo. 1>&2
65+
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
66+
echo location of your Java installation. 1>&2
6567

6668
goto fail
6769

scalardb-analytics-spark-sample/sample-data-loader/src/main/java/sample/data/Loader.java

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.scalar.db.api.DistributedTransaction;
44
import com.scalar.db.api.DistributedTransactionManager;
5+
import com.scalar.db.api.Mutation;
56
import com.scalar.db.api.Put;
67
import com.scalar.db.exception.transaction.TransactionException;
78
import com.scalar.db.io.Key;
@@ -14,29 +15,18 @@
1415
import java.nio.file.Files;
1516
import java.nio.file.Path;
1617
import java.util.HashMap;
18+
import java.util.List;
1719
import java.util.Map;
1820
import java.util.function.Function;
1921
import org.apache.commons.csv.CSVFormat;
2022
import org.apache.commons.csv.CSVRecord;
2123

2224
public class Loader implements AutoCloseable {
23-
private static final String CUSTOMER_DATA = "/data/customer.csv";
2425
private static final String ORDERS_DATA = "/data/orders.csv";
2526
private static final String LINEITEM_DATA = "/data/lineitem.csv";
2627
private static final String CONFIG_FILE_PATH = "/etc/scalardb.properties";
2728
private static final String SCHEMA_FILE_PATH = "/etc/schema.json";
2829

29-
private static final String[] CUSTOMER_COLUMNS = {
30-
"c_custkey",
31-
"c_name",
32-
"c_address",
33-
"c_nationkey",
34-
"c_phone",
35-
"c_acctbal",
36-
"c_mktsegment",
37-
"c_comment"
38-
};
39-
4030
private static final String[] ORDERS_COLUMNS = {
4131
"o_orderkey",
4232
"o_custkey",
@@ -82,8 +72,6 @@ public void close() {
8272
public void load() throws TransactionException, IOException, SchemaLoaderException {
8373
loadSchema();
8474

85-
loadData(this.manager, CUSTOMER_DATA, CUSTOMER_COLUMNS, this::buildPutCustomer);
86-
8775
loadData(this.manager, ORDERS_DATA, ORDERS_COLUMNS, this::buildPutOrders);
8876

8977
loadData(this.manager, LINEITEM_DATA, LINEITEM_COLUMNS, this::buildPutLineitem);
@@ -101,25 +89,9 @@ private void loadSchema() throws SchemaLoaderException {
10189
SchemaLoader.load(configFilePath, schemaFilePath, options, createCoordinatorTables);
10290
}
10391

104-
private Put buildPutCustomer(CSVRecord record) {
105-
return Put.newBuilder()
106-
.namespace("dynamons")
107-
.table("customer")
108-
.partitionKey(Key.ofInt("c_custkey", intCol(record, "c_custkey")))
109-
.textValue("c_name", stringCol(record, "c_name"))
110-
.textValue("c_address", stringCol(record, "c_address"))
111-
.intValue("c_nationkey", intCol(record, "c_nationkey"))
112-
.textValue("c_phone", stringCol(record, "c_phone"))
113-
.doubleValue("c_acctbal", doubleCol(record, "c_acctbal"))
114-
.textValue("c_mktsegment", stringCol(record, "c_mktsegment"))
115-
.textValue("c_comment", stringCol(record, "c_comment"))
116-
.enableImplicitPreRead()
117-
.build();
118-
}
119-
12092
private Put buildPutOrders(CSVRecord record) {
12193
return Put.newBuilder()
122-
.namespace("postgresns")
94+
.namespace("mysqlns")
12395
.table("orders")
12496
.partitionKey(Key.ofInt("o_orderkey", intCol(record, "o_orderkey")))
12597
.intValue("o_custkey", intCol(record, "o_custkey"))
@@ -175,7 +147,8 @@ private void loadData(
175147
transaction = manager.start();
176148
for (CSVRecord record : records) {
177149
Put put = putFunction.apply(record);
178-
transaction.put(put);
150+
List<Mutation> mutations = List.of(put);
151+
transaction.mutate(mutations);
179152
}
180153
transaction.commit();
181154
} catch (TransactionException e) {

0 commit comments

Comments
 (0)