3
3
# spark-builder: provides client libs for spark-connect
4
4
FROM stackable/image/spark-k8s AS spark-builder
5
5
6
- FROM stackable/image/java-base
7
-
8
6
ARG PRODUCT
9
7
ARG PYTHON
10
8
ARG RELEASE
@@ -18,42 +16,36 @@ LABEL name="Stackable Spark Connect Examples" \
18
16
summary="Spark Connect Examples" \
19
17
description="Spark Connect client libraries for Python and the JVM, including some examples."
20
18
19
+ # Need root to install setuptools
20
+ USER root
21
21
22
- ENV HOME=/stackable
23
-
24
- COPY spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
25
- COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark/connect /stackable/spark/connect
22
+ COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
23
+ COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/.jupyter /stackable/.jupyter
26
24
27
25
RUN <<EOF
28
26
microdnf update
29
27
# python{version}-setuptools: needed to build the pyspark[connect] package
30
28
microdnf install --nodocs \
31
- "python${PYTHON}" \
32
- "python${PYTHON}-pip" \
33
29
"python${PYTHON}-setuptools"
34
30
microdnf clean all
35
31
rm -rf /var/cache/yum
36
32
37
- ln -s /usr/bin/python${PYTHON} /usr/bin/python
38
- ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
39
-
40
- # Install python libraries for the spark connect client
41
- # shellcheck disable=SC2102
42
- pip install --no-cache-dir pyspark[connect]==${PRODUCT}
43
-
44
33
# All files and folders owned by root group to support running as arbitrary users.
45
34
# This is best practice as all container users will belong to the root group (0).
46
35
chown -R ${STACKABLE_USER_UID}:0 /stackable
47
36
chmod -R g=u /stackable
48
37
EOF
49
38
50
- # ----------------------------------------
51
- # Attention: We are changing the group of all files in /stackable directly above
52
- # If you do any file based actions (copying / creating etc.) below this comment you
53
- # absolutely need to make sure that the correct permissions are applied!
54
- # chown ${STACKABLE_USER_UID}:0
55
- # ----------------------------------------
56
-
57
39
USER ${STACKABLE_USER_UID}
58
40
41
+ # Install python packages.
42
+ # Packages are intentionally installed in "user mode" to reduce the container attack surface.
43
+ # - pyspark[connect] = spark connect client libs
44
+ # - jupyterlab = notebook client used in demos
45
+ RUN pip install --no-cache-dir --user \
46
+ "pyspark[connect]==${PRODUCT}" \
47
+ "jupyterlab==4.4.1" \
48
+ "scikit-learn==1.3.1" \
49
+ "matplotlib==3.10.1"
50
+
59
51
WORKDIR /stackable/spark-connect-examples/python
0 commit comments