Skip to content

Commit 6a54e54

Browse files
authored
Add support for Java 17 (#346)
* Update Spark to 3.3.3 * Update tests * Update .gitignore * Fix tests * Update dependencies in the template * Update dependencies in
1 parent b616f02 commit 6a54e54

File tree

8 files changed

+54
-35
lines changed

8 files changed

+54
-35
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ spark-warehouses/
66

77
*.DS_Store*
88
.clj-kondo/.cache
9+
.clj-kondo/marick
910

1011
pom.xml
1112
pom.xml.asc

lein-template/resources/leiningen/new/geni/project.clj

+15-6
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
[metosin/jsonista "0.3.3"
99
:exclusions [com.fasterxml.jackson.core/jackson-databind]]
1010
[expound "0.8.9"]
11+
[io.netty/netty-all "4.1.74.Final"]
12+
[com.fasterxml.jackson.core/jackson-core "2.15.3"]
13+
[com.fasterxml.jackson.core/jackson-annotations "2.15.3"]
1114
;; Spark
12-
[org.apache.spark/spark-core_2.12 "3.1.2"]
13-
[org.apache.spark/spark-hive_2.12 "3.1.2"]
14-
[org.apache.spark/spark-mllib_2.12 "3.1.2"]
15-
[org.apache.spark/spark-sql_2.12 "3.1.2"]
16-
[org.apache.spark/spark-streaming_2.12 "3.1.2"]
17-
[org.apache.spark/spark-yarn_2.12 "3.1.2"]
15+
[org.apache.spark/spark-core_2.12 "3.3.3"]
16+
[org.apache.spark/spark-hive_2.12 "3.3.3"]
17+
[org.apache.spark/spark-mllib_2.12 "3.3.3"]
18+
[org.apache.spark/spark-sql_2.12 "3.3.3"]
19+
[org.apache.spark/spark-streaming_2.12 "3.3.3"]
20+
[org.apache.spark/spark-yarn_2.12 "3.3.3"]
1821
[com.github.fommil.netlib/all "1.1.2" :extension "pom"]
1922
; Arrow
2023
[org.apache.arrow/arrow-memory-netty "4.0.0"]
@@ -40,6 +43,12 @@
4043
"--class"
4144
"{{namespace}}.core"
4245
"target/uberjar/{{raw-name}}-standalone.jar"]]}{{/dataproc?}}
46+
:jvm-opts ["--add-opens=java.base/java.io=ALL-UNNAMED"
47+
"--add-opens=java.base/java.nio=ALL-UNNAMED"
48+
"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED"
49+
"--add-opens=java.base/java.util=ALL-UNNAMED"
50+
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"
51+
"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED"]
4352
:profiles {:uberjar {:aot :all}
4453
:dev {:plugins [[lein-ancient "0.7.0"]]}}
4554
:main ^:skip-aot {{namespace}}.core

project.clj

+22-13
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,38 @@
11
(def spark-deps
2-
'[;; Spark
2+
'[[io.netty/netty-all "4.1.74.Final"]
3+
[com.fasterxml.jackson.core/jackson-core "2.15.3"]
4+
[com.fasterxml.jackson.core/jackson-annotations "2.15.3"]
5+
;; Spark
36
; This breaks cljcdoc: https://github.com/cljdoc/cljdoc/issues/407
47
; Frozen until issue is resolved.
58
;[com.github.fommil.netlib/all "1.1.2" :extension "pom"]
6-
[org.apache.spark/spark-avro_2.12 "3.1.1"]
7-
[org.apache.spark/spark-core_2.12 "3.1.1"]
8-
[org.apache.spark/spark-hive_2.12 "3.1.1"]
9-
[org.apache.spark/spark-mllib_2.12 "3.1.1"]
10-
[org.apache.spark/spark-sql_2.12 "3.1.1"]
11-
[org.apache.spark/spark-streaming_2.12 "3.1.1"]
9+
[org.apache.spark/spark-avro_2.12 "3.3.3"]
10+
[org.apache.spark/spark-core_2.12 "3.3.3"]
11+
[org.apache.spark/spark-hive_2.12 "3.3.3"]
12+
[org.apache.spark/spark-mllib_2.12 "3.3.3"]
13+
[org.apache.spark/spark-sql_2.12 "3.3.3"]
14+
[org.apache.spark/spark-streaming_2.12 "3.3.3"]
1215
; Arrow
13-
[org.apache.arrow/arrow-memory-netty "3.0.0"]
14-
[org.apache.arrow/arrow-memory-core "3.0.0"]
15-
[org.apache.arrow/arrow-vector "3.0.0"
16+
[org.apache.arrow/arrow-memory-netty "4.0.0"]
17+
[org.apache.arrow/arrow-memory-core "4.0.0"]
18+
[org.apache.arrow/arrow-vector "4.0.0"
1619
:exclusions [commons-codec com.fasterxml.jackson.core/jackson-databind]]
1720
; Databases
18-
[mysql/mysql-connector-java "8.0.23"]
19-
[org.postgresql/postgresql "42.2.19"]
21+
[mysql/mysql-connector-java "8.0.25"]
22+
[org.postgresql/postgresql "42.2.20"]
2023
[org.xerial/sqlite-jdbc "3.34.0"]
2124
;; Optional: Spark XGBoost
2225
[ml.dmlc/xgboost4j-spark_2.12 "1.2.0"]
2326
[ml.dmlc/xgboost4j_2.12 "1.2.0"]])
2427

2528
(defproject zero.one/geni "0.0.40"
26-
:jvm-opts ["-Duser.country=US" "-Duser.language=en"]
29+
:jvm-opts ["-Duser.country=US" "-Duser.language=en"
30+
"--add-opens=java.base/java.io=ALL-UNNAMED"
31+
"--add-opens=java.base/java.nio=ALL-UNNAMED"
32+
"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED"
33+
"--add-opens=java.base/java.util=ALL-UNNAMED"
34+
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"
35+
"--add-opens=java.base/sun.util.calendar=ALL-UNNAMED"]
2736
:description "A Clojure dataframe library that runs on Spark"
2837
:url "https://github.com/zero-one-group/geni"
2938
:license {:name "Apache License"

test/zero_one/geni/data_sources_test.clj

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
(g/dtypes dummy-df) => {:coord "ArrayType(DoubleType,true)"
3131
:prop "MapType(StringType,StringType,true)"
3232
:rooms (str "StructType("
33-
"StructField(rooms,LongType,true), "
33+
"StructField(rooms,LongType,true),"
3434
"StructField(bathroom,DoubleType,true))")})
3535
(fact "correct direct schema option"
3636
(-> (g/read-parquet!
@@ -46,7 +46,7 @@
4646
g/dtypes) => {:coord "ArrayType(LongType,true)"
4747
:prop "MapType(StringType,StringType,true)"
4848
:rooms (str "StructType("
49-
"StructField(rooms,IntegerType,true), "
49+
"StructField(rooms,IntegerType,true),"
5050
"StructField(bathroom,FloatType,true))")})
5151
(fact "correct data-oriented schema option"
5252
(-> (g/read-parquet!
@@ -57,7 +57,7 @@
5757
g/dtypes) => {:coord "ArrayType(ShortType,true)"
5858
:prop "MapType(StringType,StringType,true)"
5959
:rooms (str "StructType("
60-
"StructField(rooms,FloatType,true), "
60+
"StructField(rooms,FloatType,true),"
6161
"StructField(bathroom,LongType,true))")})))
6262

6363
(facts "On binary data" :binary

test/zero_one/geni/dataset_creation_test.clj

+5-5
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@
5151
[(g/row (g/row 27 42))
5252
(g/row (g/row 57 18))]
5353
{:coord {:x :int :y :int}}))
54-
=> {:coord "StructType(StructField(x,IntegerType,true), StructField(y,IntegerType,true))"})
54+
=> {:coord "StructType(StructField(x,IntegerType,true),StructField(y,IntegerType,true))"})
5555
(fact "of struct array fields"
5656
(g/dtypes
5757
(g/create-dataframe
5858
@tr/spark
5959
[(g/row [(g/row 27 42)])
6060
(g/row [(g/row 57 18)])]
6161
{:coords [{:x :int :y :int}]}))
62-
=> {:coords "ArrayType(StructType(StructField(x,IntegerType,true), StructField(y,IntegerType,true)),true)"}))
62+
=> {:coords "ArrayType(StructType(StructField(x,IntegerType,true),StructField(y,IntegerType,true)),true)"}))
6363

6464
(facts "On building blocks"
6565
(fact "can instantiate vectors"
@@ -266,7 +266,7 @@
266266
(instance? Dataset dataset) => true
267267
(g/column-names dataset) => ["a" "b"]
268268
(g/dtypes dataset) => {:a "LongType"
269-
:b "StructType(StructField(z,ArrayType(StringType,true),true), StructField(y,BooleanType,true))"}))
269+
:b "StructType(StructField(z,ArrayType(StringType,true),true),StructField(y,BooleanType,true))"}))
270270
(fact "should create the right schema for list of maps"
271271
(let [dataset (g/table->dataset
272272
@tr/spark
@@ -276,7 +276,7 @@
276276
(instance? Dataset dataset) => true
277277
(g/column-names dataset) => ["a" "b"]
278278
(g/dtypes dataset) => {:a "LongType"
279-
:b "ArrayType(StructType(StructField(z,LongType,true), StructField(y,DoubleType,true)),true)"}))
279+
:b "ArrayType(StructType(StructField(z,LongType,true),StructField(y,DoubleType,true)),true)"}))
280280
(fact "should create the right schema for list of list of maps"
281281
(let [dataset (g/table->dataset
282282
@tr/spark
@@ -286,7 +286,7 @@
286286
(instance? Dataset dataset) => true
287287
(g/column-names dataset) => ["a" "b"]
288288
(g/dtypes dataset) => {:a "LongType"
289-
:b "ArrayType(ArrayType(StructType(StructField(z,LongType,true), StructField(y,BooleanType,true)),true),true)"})))
289+
:b "ArrayType(ArrayType(StructType(StructField(z,LongType,true),StructField(y,BooleanType,true)),true),true)"})))
290290

291291
(facts "On spark range"
292292
(fact "should create simple datasets"

test/zero_one/geni/dataset_test.clj

+1-1
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@
430430
(-> (df-20)
431431
(g/repartition :Suburb :SellerG)
432432
g/partitions
433-
count) => #(< 1 %))
433+
count) => #(<= 1 %))
434434
(fact "able to repartition by number and columns"
435435
(-> (df-20)
436436
(g/repartition 10 :Suburb :SellerG)

test/zero_one/geni/rdd_test.clj

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
(rdd/resources) => {}
5656
(rdd/spark-home) => (System/getenv "SPARK_HOME")
5757
(rdd/sc) => (partial instance? SparkContext)
58-
(rdd/version) => "3.1.1"))
58+
(rdd/version) => "3.3.3"))
5959

6060
(facts "On repartitioning" :rdd
6161
(fact "partition-by works"

test/zero_one/geni/sql_functions_test.clj

+6-6
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
:to-2 (g/to-json (g/struct {:time (g/to-timestamp (g/lit "2015-08-26") "yyyy-MM-dd")})
2525
{:timestampFormat "dd/MM/yyyy"})})
2626
g/collect
27-
first) => {:schema-1 "ARRAY<STRUCT<`col`: BIGINT>>"
28-
:schema-2 "ARRAY<STRUCT<`col`: BIGINT>>"
27+
first) => {:schema-1 "ARRAY<STRUCT<col: BIGINT>>"
28+
:schema-2 "ARRAY<STRUCT<col: BIGINT>>"
2929
:from-1 {:a 1 :b 0.8}
3030
:from-2 {:time (Timestamp. 1440547200000)}
3131
:to-1 "{\"a\":1,\"b\":2}"
@@ -44,8 +44,8 @@
4444
:to-2 (g/to-csv (g/struct {:time (g/to-timestamp (g/lit "2015-08-26") "yyyy-MM-dd")})
4545
{:timestampFormat "dd/MM/yyyy"})})
4646
g/collect
47-
first) => {:schema-1 "STRUCT<`_c0`: INT, `_c1`: STRING>"
48-
:schema-2 "STRUCT<`_c0`: INT, `_c1`: STRING>"
47+
first) => {:schema-1 "STRUCT<_c0: INT, _c1: STRING>"
48+
:schema-2 "STRUCT<_c0: INT, _c1: STRING>"
4949
:from-1 {:a 1 :b 0.8}
5050
:from-2 {:time (Timestamp. 1440547200000)}
5151
:to-1 "1,2"
@@ -214,7 +214,7 @@
214214
(-> (df-20)
215215
(g/cube :SellerG :Regionname)
216216
(g/agg (g/grouping-id :SellerG :Regionname))
217-
g/first-vals) => ["Nelson" nil 1]
217+
g/first-vals) => ["Biggin" "Northern Metropolitan" 0]
218218
(-> (df-20)
219219
(g/group-by :SellerG)
220220
(g/agg (-> (g/collect-list :Regionname) (g/as :regions)))
@@ -503,7 +503,7 @@
503503
(g/agg
504504
(g/count-distinct {:seller :SellerG
505505
:suburb :Suburb}))
506-
g/column-names) => ["count(SellerG AS `seller`, Suburb AS `suburb`)"])))
506+
g/column-names) => ["count(SellerG AS seller, Suburb AS suburb)"])))
507507

508508
(facts "On window functions" :slow
509509
(let [window (g/window {:partition-by :SellerG :order-by :Price})]

0 commit comments

Comments
 (0)