Skip to content

Commit 6f36415

Browse files
committed
[SPARK-44168] Add Apache Spark 3.4.1 Dockerfiles
### What changes were proposed in this pull request? Add Apache Spark 3.4.1 Dockerfiles. - Add 3.4.1 GPG key - Add .github/workflows/build_3.4.1.yaml - ./add-dockerfiles.sh 3.4.1 - Add version and tag info ### Why are the changes needed? Apache Spark 3.4.1 released: https://spark.apache.org/releases/spark-release-3-4-1.html ### Does this PR introduce _any_ user-facing change? Docker image will be published. ### How was this patch tested? Add workflow and CI passed Closes #46 from Yikun/3.4.1. Authored-by: Yikun Jiang <[email protected]> Signed-off-by: Yikun Jiang <[email protected]>
1 parent c07ae18 commit 6f36415

File tree

10 files changed

+372
-9
lines changed

10 files changed

+372
-9
lines changed

.github/workflows/build_3.4.1.yaml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build and Test (3.4.1)"
21+
22+
on:
23+
pull_request:
24+
branches:
25+
- 'master'
26+
paths:
27+
- '3.4.1/**'
28+
29+
jobs:
30+
run-build:
31+
strategy:
32+
matrix:
33+
image-type: ["all", "python", "scala", "r"]
34+
name: Run
35+
secrets: inherit
36+
uses: ./.github/workflows/main.yml
37+
with:
38+
spark: 3.4.1
39+
scala: 2.12
40+
java: 11
41+
image-type: ${{ matrix.image-type }}

.github/workflows/publish.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ on:
2525
spark:
2626
description: 'The Spark version of Spark image.'
2727
required: true
28-
default: '3.4.0'
28+
default: '3.4.1'
2929
type: choice
3030
options:
31+
- 3.4.1
3132
- 3.4.0
3233
- 3.3.2
3334
- 3.3.1

.github/workflows/test.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ on:
2525
spark:
2626
description: 'The Spark version of Spark image.'
2727
required: true
28-
default: '3.4.0'
28+
default: '3.4.1'
2929
type: choice
3030
options:
31+
- 3.4.1
3132
- 3.4.0
3233
- 3.3.2
3334
- 3.3.1
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
FROM spark:3.4.1-scala2.12-java11-ubuntu
18+
19+
USER root
20+
21+
RUN set -ex; \
22+
apt-get update; \
23+
apt install -y python3 python3-pip; \
24+
apt install -y r-base r-base-dev; \
25+
rm -rf /var/cache/apt/*; \
26+
rm -rf /var/lib/apt/lists/*
27+
28+
ENV R_HOME /usr/lib/R
29+
30+
USER spark
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
FROM spark:3.4.1-scala2.12-java11-ubuntu
18+
19+
USER root
20+
21+
RUN set -ex; \
22+
apt-get update; \
23+
apt install -y python3 python3-pip; \
24+
rm -rf /var/cache/apt/*; \
25+
rm -rf /var/lib/apt/lists/*
26+
27+
USER spark
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
FROM spark:3.4.1-scala2.12-java11-ubuntu
18+
19+
USER root
20+
21+
RUN set -ex; \
22+
apt-get update; \
23+
apt install -y r-base r-base-dev; \
24+
rm -rf /var/cache/apt/*; \
25+
rm -rf /var/lib/apt/lists/*
26+
27+
ENV R_HOME /usr/lib/R
28+
29+
USER spark
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
FROM eclipse-temurin:11-jre-focal
18+
19+
ARG spark_uid=185
20+
21+
RUN groupadd --system --gid=${spark_uid} spark && \
22+
useradd --system --uid=${spark_uid} --gid=spark spark
23+
24+
RUN set -ex; \
25+
apt-get update; \
26+
ln -s /lib /lib64; \
27+
apt install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \
28+
mkdir -p /opt/spark; \
29+
mkdir /opt/spark/python; \
30+
mkdir -p /opt/spark/examples; \
31+
mkdir -p /opt/spark/work-dir; \
32+
chmod g+w /opt/spark/work-dir; \
33+
touch /opt/spark/RELEASE; \
34+
chown -R spark:spark /opt/spark; \
35+
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
36+
rm -rf /var/cache/apt/*; \
37+
rm -rf /var/lib/apt/lists/*
38+
39+
# Install Apache Spark
40+
# https://downloads.apache.org/spark/KEYS
41+
ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz \
42+
SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz.asc \
43+
GPG_KEY=34F0FC5C
44+
45+
RUN set -ex; \
46+
export SPARK_TMP="$(mktemp -d)"; \
47+
cd $SPARK_TMP; \
48+
wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
49+
wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
50+
export GNUPGHOME="$(mktemp -d)"; \
51+
gpg --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
52+
gpg --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \
53+
gpg --batch --verify spark.tgz.asc spark.tgz; \
54+
gpgconf --kill all; \
55+
rm -rf "$GNUPGHOME" spark.tgz.asc; \
56+
\
57+
tar -xf spark.tgz --strip-components=1; \
58+
chown -R spark:spark .; \
59+
mv jars /opt/spark/; \
60+
mv bin /opt/spark/; \
61+
mv sbin /opt/spark/; \
62+
mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
63+
mv examples /opt/spark/; \
64+
mv kubernetes/tests /opt/spark/; \
65+
mv data /opt/spark/; \
66+
mv python/pyspark /opt/spark/python/pyspark/; \
67+
mv python/lib /opt/spark/python/lib/; \
68+
mv R /opt/spark/; \
69+
chmod a+x /opt/decom.sh; \
70+
cd ..; \
71+
rm -rf "$SPARK_TMP";
72+
73+
COPY entrypoint.sh /opt/
74+
75+
ENV SPARK_HOME /opt/spark
76+
77+
WORKDIR /opt/spark/work-dir
78+
79+
USER spark
80+
81+
ENTRYPOINT [ "/opt/entrypoint.sh" ]
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/bin/bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
attempt_setup_fake_passwd_entry() {
19+
# Check whether there is a passwd entry for the container UID
20+
local myuid; myuid="$(id -u)"
21+
# If there is no passwd entry for the container UID, attempt to fake one
22+
# You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523
23+
# It's to resolve OpenShift random UID case.
24+
# See also: https://github.com/docker-library/postgres/pull/448
25+
if ! getent passwd "$myuid" &> /dev/null; then
26+
local wrapper
27+
for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do
28+
if [ -s "$wrapper" ]; then
29+
NSS_WRAPPER_PASSWD="$(mktemp)"
30+
NSS_WRAPPER_GROUP="$(mktemp)"
31+
export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP
32+
local mygid; mygid="$(id -g)"
33+
printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD"
34+
printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP"
35+
break
36+
fi
37+
done
38+
fi
39+
}
40+
41+
if [ -z "$JAVA_HOME" ]; then
42+
JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}')
43+
fi
44+
45+
SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
46+
for v in "${!SPARK_JAVA_OPT_@}"; do
47+
SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" )
48+
done
49+
50+
if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
51+
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
52+
fi
53+
54+
if ! [ -z ${PYSPARK_PYTHON+x} ]; then
55+
export PYSPARK_PYTHON
56+
fi
57+
if ! [ -z ${PYSPARK_DRIVER_PYTHON+x} ]; then
58+
export PYSPARK_DRIVER_PYTHON
59+
fi
60+
61+
# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
62+
# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s.
63+
if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then
64+
export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)"
65+
fi
66+
67+
if ! [ -z ${HADOOP_CONF_DIR+x} ]; then
68+
SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
69+
fi
70+
71+
if ! [ -z ${SPARK_CONF_DIR+x} ]; then
72+
SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
73+
elif ! [ -z ${SPARK_HOME+x} ]; then
74+
SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
75+
fi
76+
77+
# Switch to spark if no USER specified (root by default) otherwise use USER directly
78+
switch_spark_if_root() {
79+
if [ $(id -u) -eq 0 ]; then
80+
echo gosu spark
81+
fi
82+
}
83+
84+
case "$1" in
85+
driver)
86+
shift 1
87+
CMD=(
88+
"$SPARK_HOME/bin/spark-submit"
89+
--conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
90+
--deploy-mode client
91+
"$@"
92+
)
93+
attempt_setup_fake_passwd_entry
94+
# Execute the container CMD under tini for better hygiene
95+
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
96+
;;
97+
executor)
98+
shift 1
99+
CMD=(
100+
${JAVA_HOME}/bin/java
101+
"${SPARK_EXECUTOR_JAVA_OPTS[@]}"
102+
-Xms$SPARK_EXECUTOR_MEMORY
103+
-Xmx$SPARK_EXECUTOR_MEMORY
104+
-cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
105+
org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend
106+
--driver-url $SPARK_DRIVER_URL
107+
--executor-id $SPARK_EXECUTOR_ID
108+
--cores $SPARK_EXECUTOR_CORES
109+
--app-id $SPARK_APPLICATION_ID
110+
--hostname $SPARK_EXECUTOR_POD_IP
111+
--resourceProfileId $SPARK_RESOURCE_PROFILE_ID
112+
--podName $SPARK_EXECUTOR_POD_NAME
113+
)
114+
attempt_setup_fake_passwd_entry
115+
# Execute the container CMD under tini for better hygiene
116+
exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
117+
;;
118+
119+
*)
120+
# Non-spark-on-k8s command provided, proceeding in pass-through mode...
121+
exec "$@"
122+
;;
123+
esac

tools/template.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
"3.3.2": "C56349D886F2B01F8CAE794C653C2301FEA493EE",
3131
# issuer "[email protected]"
3232
"3.4.0": "CC68B3D16FE33A766705160BA7E57908C7A4E1B1",
33+
# issuer "[email protected]"
34+
"3.4.1": "34F0FC5C"
3335
}
3436

3537

0 commit comments

Comments
 (0)