Skip to content

Commit 108a8b7

Browse files
GitHub migration (#3)
* Cleaned up core * Moved dependencies to :provided to allow clojars deployment + get rid off unit-test CI stage * Added provided dependencies instruction * Added GitHub action * migrate to actions * change logo
1 parent 8d89563 commit 108a8b7

File tree

10 files changed

+114
-80
lines changed

10 files changed

+114
-80
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Continuous Integration
2+
3+
on:
4+
push:
5+
branches:
6+
- develop
7+
pull_request:
8+
branches:
9+
- develop
10+
11+
jobs:
12+
lint:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v2
16+
- name: lint
17+
uses: docker://zeroonetechnology/geni:latest
18+
with:
19+
entrypoint: clj-kondo
20+
args: --lint src test
21+
coverage:
22+
runs-on: ubuntu-latest
23+
steps:
24+
- uses: actions/checkout@v2
25+
- name: coverage
26+
uses: docker://zeroonetechnology/geni:latest
27+
with:
28+
entrypoint: sh
29+
args: -c "lein cloverage --fail-threshold 90 --codecov"
30+
- uses: codecov/codecov-action@v1
31+
with:
32+
file: target/coverage/codecov.json
33+
fail_ci_if_error: true

.gitlab-ci.yml

-27
This file was deleted.

Makefile

+9-7
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,19 @@ repl: build
1212
docker run --rm -v $(PWD):/root/geni -w /root/geni -it $(DOCKERNAME) \
1313
lein repl
1414

15-
unit-tests: build
16-
docker run --rm -v $(PWD):/root/geni -w /root/geni -it $(DOCKERNAME) \
17-
lein midje
18-
1915
autotest: build
2016
docker run --rm -v $(PWD):/root/geni -w /root/geni -it $(DOCKERNAME) \
2117
lein midje :autotest
2218

23-
coverage:
19+
docker-push: build
20+
docker push $(DOCKERNAME)
21+
22+
coverage: build
2423
docker run --rm -v $(PWD):/root/geni -w /root/geni -it $(DOCKERNAME) \
2524
lein cloverage --fail-threshold 90
2625

27-
docker-push:
28-
docker push $(DOCKERNAME)
26+
lint: build
27+
docker run --rm -v $(PWD):/root/geni -w /root/geni -it $(DOCKERNAME) \
28+
clj-kondo --lint src test --cache false
29+
30+
ci: coverage lint

README.md

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
<img src="logo/geni.png" width="250px">
22

3-
[![pipeline status](https://gitlab.com/zero-one-open-source/geni/badges/develop/pipeline.svg)](https://gitlab.com/zero-one-open-source/geni/-/commits/develop)
4-
[![coverage report](https://gitlab.com/zero-one-open-source/geni/badges/develop/coverage.svg)](https://gitlab.com/zero-one-open-source/geni/-/commits/develop)
5-
<!--[![Clojars Project](https://img.shields.io/clojars/v/zero.one/geni.svg)](http://clojars.org/zero.one/geni)-->
3+
[![Continuous Integration](https://github.com/zero-one-group/geni/workflows/Continuous%20Integration/badge.svg?branch=develop)](https://github.com/zero-one-group/geni/commits/develop)
4+
[![Code Coverage](https://codecov.io/gh/zero-one-group/geni/branch/develop/graph/badge.svg)](https://codecov.io/gh/zero-one-group/geni)
5+
[![Clojars Project](https://img.shields.io/clojars/v/zero.one/geni.svg)](http://clojars.org/zero.one/geni)
66

77
WARNING! This library is still unstable. Some information here may be outdated. Do not use it in production just yet!
88

99
See [Flambo](https://github.com/sorenmacbeth/flambo) and [Sparkling](https://github.com/gorillalabs/sparkling) for more mature alternatives.
1010

11-
[[_TOC_]]
12-
1311
# Introduction
1412

1513
`geni` (*/gɜni/* or "gurney" without the r) is a Clojure library that wraps Apache Spark. The name comes from the Javanese word for fire.
@@ -20,7 +18,18 @@ Note that `geni` wraps Apache Spark 2.4.5, which uses Scala 2.12, which has [inc
2018

2119
Add the following to your `project.clj` dependency:
2220

23-
<!--[![Clojars Project](https://clojars.org/zero.one/geni/latest-version.svg)](http://clojars.org/zero.one/geni)-->
21+
[![Clojars Project](https://clojars.org/zero.one/geni/latest-version.svg)](http://clojars.org/zero.one/geni)
22+
23+
You would also need to add Spark as provided dependencies. For instance, have the following key-value pair for the `:profiles` map:
24+
25+
```clojure
26+
:provided
27+
{:dependencies [[org.apache.spark/spark-core_2.12 "2.4.5"]
28+
[org.apache.spark/spark-hive_2.12 "2.4.5"]
29+
[org.apache.spark/spark-mllib_2.12 "2.4.5"]
30+
[org.apache.spark/spark-sql_2.12 "2.4.5"]
31+
[org.apache.spark/spark-streaming_2.12 "2.4.5"]]}
32+
```
2433

2534
# License
2635

docker/Dockerfile

+5
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@ FROM clojure:openjdk-8-lein-2.9.1
33
ENV USER root
44
ENV HOME /root
55

6+
ADD spark-project.clj /root/spark/project.clj
7+
RUN cd /root/spark && lein with-profile +dev deps
8+
69
ADD project.clj /root/project.clj
710
RUN cd /root && lein with-profile +dev deps
811

912
RUN bash -c "bash <(curl -s https://raw.githubusercontent.com/borkdude/clj-kondo/master/script/install-clj-kondo)"
1013

14+
RUN cd /root/ && lein cloverage || true
15+
1116
ENTRYPOINT []

docker/project.clj

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1-
(defproject geni "0.0.1-SNAPSHOT"
1+
(defproject zero.one/geni "0.0.1-SNAPSHOT"
22
:description "A Clojure library that wraps Apache Spark"
33
:url "https://gitlab.com/zero-one-open-source/geni"
44
:license {:name "Apache License"
55
:url "https://www.apache.org/licenses/LICENSE-2.0"}
6-
:dependencies [[org.apache.spark/spark-core_2.11 "2.4.5"]
7-
[org.apache.spark/spark-hive_2.11 "2.4.5"]
8-
[org.apache.spark/spark-sql_2.11 "2.4.5"]
9-
[org.clojure/clojure "1.10.1"]]
10-
:profiles {:dev {:dependencies [[expound "0.8.4"]
6+
:dependencies [[org.clojure/clojure "1.10.1"]]
7+
:profiles {:provided
8+
{:dependencies [[org.apache.spark/spark-core_2.12 "2.4.5"]
9+
[org.apache.spark/spark-hive_2.12 "2.4.5"]
10+
[org.apache.spark/spark-mllib_2.12 "2.4.5"]
11+
[org.apache.spark/spark-sql_2.12 "2.4.5"]
12+
[org.apache.spark/spark-streaming_2.12 "2.4.5"]]}
13+
:dev {:dependencies [[expound "0.8.4"]
1114
[midje "1.9.9"]]
1215
:plugins [[lein-cloverage "1.1.2"]
1316
[lein-midje "3.2.1"]]}}

docker/spark-project.clj

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
(defproject spark-dummy "spark-dummy"
2+
:dependencies [[org.apache.spark/spark-core_2.12 "2.4.5"]
3+
[org.apache.spark/spark-hive_2.12 "2.4.5"]
4+
[org.apache.spark/spark-mllib_2.12 "2.4.5"]
5+
[org.apache.spark/spark-sql_2.12 "2.4.5"]
6+
[org.apache.spark/spark-streaming_2.12 "2.4.5"]])

logo/geni.png

100755100644
2.87 KB
Loading

project.clj

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1-
(defproject geni "0.0.1-SNAPSHOT"
1+
(defproject zero.one/geni "0.0.1-SNAPSHOT"
22
:description "A Clojure library that wraps Apache Spark"
33
:url "https://gitlab.com/zero-one-open-source/geni"
44
:license {:name "Apache License"
55
:url "https://www.apache.org/licenses/LICENSE-2.0"}
6-
:dependencies [[org.apache.spark/spark-core_2.11 "2.4.5"]
7-
[org.apache.spark/spark-hive_2.11 "2.4.5"]
8-
[org.apache.spark/spark-sql_2.11 "2.4.5"]
9-
[org.clojure/clojure "1.10.1"]]
10-
:profiles {:dev {:dependencies [[expound "0.8.4"]
6+
:dependencies [[org.clojure/clojure "1.10.1"]]
7+
:profiles {:provided
8+
{:dependencies [[org.apache.spark/spark-core_2.12 "2.4.5"]
9+
[org.apache.spark/spark-hive_2.12 "2.4.5"]
10+
[org.apache.spark/spark-mllib_2.12 "2.4.5"]
11+
[org.apache.spark/spark-sql_2.12 "2.4.5"]
12+
[org.apache.spark/spark-streaming_2.12 "2.4.5"]]}
13+
:dev {:dependencies [[expound "0.8.4"]
1114
[midje "1.9.9"]]
1215
:plugins [[lein-cloverage "1.1.2"]
1316
[lein-midje "3.2.1"]]}}

src/geni/core.clj

+28-28
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
(ns geni.core
2-
(:refer-clojure :exclude [map
3-
filter
4-
take
5-
concat
2+
(:refer-clojure :exclude [*
3+
+
4+
-
5+
/
6+
<
7+
<=
8+
>
9+
>=
610
cast
7-
when
8-
drop
9-
second
11+
concat
12+
count
1013
distinct
11-
*
12-
/
13-
not
14-
partition-by
14+
drop
15+
filter
1516
group-by
16-
min
17+
map
1718
max
18-
<=
19-
<
20-
>=
21-
>
22-
+
23-
-
24-
count])
19+
min
20+
not
21+
partition-by
22+
second
23+
take
24+
when])
2525
(:import
2626
(scala.collection JavaConversions Map)
2727
(org.apache.spark.sql SparkSession)
@@ -73,9 +73,7 @@
7373
:or {num-rows 20
7474
truncate 0
7575
vertical false}} options]
76-
(-> dataframe
77-
(.showString num-rows truncate vertical)
78-
println))))
76+
(-> dataframe (.showString num-rows truncate vertical) println))))
7977

8078
(defn show-vertical
8179
([dataframe] (show dataframe {:vertical true}))
@@ -203,7 +201,7 @@
203201
(defn when
204202
([condition if-expr] (functions/when condition (->column if-expr)))
205203
([condition if-expr else-expr]
206-
(.otherwise (when condition if-expr) (->column else-expr))))
204+
(-> (when condition if-expr) (.otherwise (->column else-expr)))))
207205

208206
(defn coalesce [& exprs]
209207
(functions/coalesce (->col-array exprs)))
@@ -247,13 +245,14 @@
247245

248246
(defn concat [& exprs] (functions/concat (->col-array exprs)))
249247

250-
(defn agg-all [dataframe name->col]
251-
(let [agg-cols (clojure.core/map name->col (column-names dataframe))]
248+
(defn agg-all [dataframe agg-fn]
249+
(let [agg-cols (clojure.core/map agg-fn (column-names dataframe))]
252250
(apply agg dataframe agg-cols)))
253251

254252
(defn sample
255253
([dataframe fraction] (.sample dataframe fraction))
256-
([dataframe fraction with-replacement] (.sample dataframe with-replacement fraction)))
254+
([dataframe fraction with-replacement]
255+
(.sample dataframe with-replacement fraction)))
257256

258257
(defn union [left-df right-df] (.union left-df right-df))
259258

@@ -289,10 +288,11 @@
289288
(.setLogLevel context "ERROR")
290289
session)))
291290

292-
293291
(defonce dataframe
294292
(delay
295-
(cache (read-parquet! @spark "test/resources/melbourne_housing_snapshot.parquet"))))
293+
(cache (read-parquet!
294+
@spark
295+
"test/resources/melbourne_housing_snapshot.parquet"))))
296296

297297
(comment
298298

0 commit comments

Comments
 (0)