This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark-docker.git
The following commit(s) were added to refs/heads/master by this push:
new 763fbfd [SPARK-54748] Publish Apache Spark `4.1.0` to docker registry
763fbfd is described below
commit 763fbfda79c3bd5e43c68932c6f8bdd8c21d00ab
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Dec 17 23:01:59 2025 -0800
[SPARK-54748] Publish Apache Spark `4.1.0` to docker registry
### What changes were proposed in this pull request?
This PR aims to Publish Apache Spark `4.1.0` to docker registry.
### Why are the changes needed?
Apache Spark 4.1.0 is released officially.
- https://github.com/apache/spark/releases/tag/v4.1.0
- https://spark.apache.org/docs/4.1.0/
- https://dist.apache.org/repos/dist/release/spark/spark-4.1.0/
We need to provide a docker image of Apache Spark `4.1.0`.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass the CIs.
Closes #99 from dongjoon-hyun/SPARK-54748.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.github/workflows/build_4.1.0.yaml | 43 +++++++
4.1.0/scala2.13-java17-python3-r-ubuntu/Dockerfile | 29 +++++
4.1.0/scala2.13-java17-python3-ubuntu/Dockerfile | 26 +++++
4.1.0/scala2.13-java17-r-ubuntu/Dockerfile | 28 +++++
4.1.0/scala2.13-java17-ubuntu/Dockerfile | 81 +++++++++++++
4.1.0/scala2.13-java17-ubuntu/entrypoint.sh | 130 +++++++++++++++++++++
4.1.0/scala2.13-java21-python3-r-ubuntu/Dockerfile | 29 +++++
4.1.0/scala2.13-java21-python3-ubuntu/Dockerfile | 26 +++++
4.1.0/scala2.13-java21-r-ubuntu/Dockerfile | 28 +++++
4.1.0/scala2.13-java21-ubuntu/Dockerfile | 81 +++++++++++++
4.1.0/scala2.13-java21-ubuntu/entrypoint.sh | 130 +++++++++++++++++++++
tools/template.py | 4 +-
versions.json | 72 ++++++++++--
13 files changed, 697 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/build_4.1.0.yaml
b/.github/workflows/build_4.1.0.yaml
new file mode 100644
index 0000000..7d165db
--- /dev/null
+++ b/.github/workflows/build_4.1.0.yaml
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build and Test (4.1.0)"
+
+on:
+ pull_request:
+ branches:
+ - 'master'
+ paths:
+ - '4.1.0/**'
+
+jobs:
+ run-build:
+ strategy:
+ matrix:
+ image-type: ["all", "python", "scala", "r"]
+ java: [17, 21]
+ name: Run
+ secrets: inherit
+ uses: ./.github/workflows/main.yml
+ with:
+ spark: 4.1.0
+ scala: 2.13
+ java: ${{ matrix.java }}
+ image-type: ${{ matrix.image-type }}
+
diff --git a/4.1.0/scala2.13-java17-python3-r-ubuntu/Dockerfile
b/4.1.0/scala2.13-java17-python3-r-ubuntu/Dockerfile
new file mode 100644
index 0000000..cb3a6ee
--- /dev/null
+++ b/4.1.0/scala2.13-java17-python3-r-ubuntu/Dockerfile
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:4.1.0-scala2.13-java17-ubuntu
+
+USER root
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y python3 python3-pip; \
+ apt-get install -y r-base r-base-dev; \
+ rm -rf /var/lib/apt/lists/*
+
+ENV R_HOME=/usr/lib/R
+
+USER spark
diff --git a/4.1.0/scala2.13-java17-python3-ubuntu/Dockerfile
b/4.1.0/scala2.13-java17-python3-ubuntu/Dockerfile
new file mode 100644
index 0000000..ed219b9
--- /dev/null
+++ b/4.1.0/scala2.13-java17-python3-ubuntu/Dockerfile
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:4.1.0-scala2.13-java17-ubuntu
+
+USER root
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y python3 python3-pip; \
+ rm -rf /var/lib/apt/lists/*
+
+USER spark
diff --git a/4.1.0/scala2.13-java17-r-ubuntu/Dockerfile
b/4.1.0/scala2.13-java17-r-ubuntu/Dockerfile
new file mode 100644
index 0000000..12b5d8b
--- /dev/null
+++ b/4.1.0/scala2.13-java17-r-ubuntu/Dockerfile
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:4.1.0-scala2.13-java17-ubuntu
+
+USER root
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y r-base r-base-dev; \
+ rm -rf /var/lib/apt/lists/*
+
+ENV R_HOME=/usr/lib/R
+
+USER spark
diff --git a/4.1.0/scala2.13-java17-ubuntu/Dockerfile
b/4.1.0/scala2.13-java17-ubuntu/Dockerfile
new file mode 100644
index 0000000..dc27565
--- /dev/null
+++ b/4.1.0/scala2.13-java17-ubuntu/Dockerfile
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM eclipse-temurin:17-jammy
+
+ARG spark_uid=185
+
+RUN groupadd --system --gid=${spark_uid} spark && \
+ useradd --system --uid=${spark_uid} --gid=spark -d /nonexistent spark
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user
libnss3 procps net-tools gosu libnss-wrapper; \
+ mkdir -p /opt/spark; \
+ mkdir /opt/spark/python; \
+ mkdir -p /opt/spark/examples; \
+ mkdir -p /opt/spark/work-dir; \
+ chmod g+w /opt/spark/work-dir; \
+ touch /opt/spark/RELEASE; \
+ chown -R spark:spark /opt/spark; \
+ echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
+ rm -rf /var/lib/apt/lists/*
+
+# Install Apache Spark
+# https://downloads.apache.org/spark/KEYS
+ENV
SPARK_TGZ_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.1.0/spark-4.1.0-bin-hadoop3.tgz?action=download
\
+
SPARK_TGZ_ASC_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.1.0/spark-4.1.0-bin-hadoop3.tgz.asc?action=download
\
+ GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C
+
+RUN set -ex; \
+ export SPARK_TMP="$(mktemp -d)"; \
+ cd $SPARK_TMP; \
+ wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
+ wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
+ export GNUPGHOME="$(mktemp -d)"; \
+ gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
+ gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys
"$GPG_KEY"; \
+ gpg --batch --verify spark.tgz.asc spark.tgz; \
+ gpgconf --kill all; \
+ rm -rf "$GNUPGHOME" spark.tgz.asc; \
+ \
+ tar -xf spark.tgz --strip-components=1; \
+ chown -R spark:spark .; \
+ mv jars /opt/spark/; \
+ mv RELEASE /opt/spark/; \
+ mv bin /opt/spark/; \
+ mv sbin /opt/spark/; \
+ mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
+ mv examples /opt/spark/; \
+ ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)"
/opt/spark/examples/jars/spark-examples.jar; \
+ mv kubernetes/tests /opt/spark/; \
+ mv data /opt/spark/; \
+ mv python/pyspark /opt/spark/python/pyspark/; \
+ mv python/lib /opt/spark/python/lib/; \
+ mv R /opt/spark/; \
+ chmod a+x /opt/decom.sh; \
+ cd ..; \
+ rm -rf "$SPARK_TMP";
+
+COPY entrypoint.sh /opt/
+
+ENV SPARK_HOME=/opt/spark
+
+WORKDIR /opt/spark/work-dir
+
+USER spark
+
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/4.1.0/scala2.13-java17-ubuntu/entrypoint.sh
b/4.1.0/scala2.13-java17-ubuntu/entrypoint.sh
new file mode 100755
index 0000000..c576d8f
--- /dev/null
+++ b/4.1.0/scala2.13-java17-ubuntu/entrypoint.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Prevent any errors from being silently ignored
+set -eo pipefail
+
+attempt_setup_fake_passwd_entry() {
+ # Check whether there is a passwd entry for the container UID
+ local myuid; myuid="$(id -u)"
+ # If there is no passwd entry for the container UID, attempt to fake one
+ # You can also refer to the
https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523
+ # It's to resolve OpenShift random UID case.
+ # See also: https://github.com/docker-library/postgres/pull/448
+ if ! getent passwd "$myuid" &> /dev/null; then
+ local wrapper
+ for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do
+ if [ -s "$wrapper" ]; then
+ NSS_WRAPPER_PASSWD="$(mktemp)"
+ NSS_WRAPPER_GROUP="$(mktemp)"
+ export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP
+ local mygid; mygid="$(id -g)"
+ printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous
uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD"
+ printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP"
+ break
+ fi
+ done
+ fi
+}
+
+if [ -z "$JAVA_HOME" ]; then
+ JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep
'java.home' | awk '{print $3}')
+fi
+
+SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
+for v in "${!SPARK_JAVA_OPT_@}"; do
+ SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" )
+done
+
+if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
+ SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
+fi
+
+if ! [ -z "${PYSPARK_PYTHON+x}" ]; then
+ export PYSPARK_PYTHON
+fi
+if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then
+ export PYSPARK_DRIVER_PYTHON
+fi
+
+# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so
Hadoop jars are available to the executor.
+# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding
customizations of this value from elsewhere e.g. Docker/K8s.
+if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then
+ export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)"
+fi
+
+if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then
+ SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
+fi
+
+if ! [ -z "${SPARK_CONF_DIR+x}" ]; then
+ SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
+elif ! [ -z "${SPARK_HOME+x}" ]; then
+ SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
+fi
+
+# SPARK-43540: add current working directory into executor classpath
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD"
+
+# Switch to spark if no USER specified (root by default) otherwise use USER
directly
+switch_spark_if_root() {
+ if [ $(id -u) -eq 0 ]; then
+ echo gosu spark
+ fi
+}
+
+case "$1" in
+ driver)
+ shift 1
+ CMD=(
+ "$SPARK_HOME/bin/spark-submit"
+ --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+ --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS"
+ --deploy-mode client
+ "$@"
+ )
+ attempt_setup_fake_passwd_entry
+ # Execute the container CMD under tini for better hygiene
+ exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
+ ;;
+ executor)
+ shift 1
+ CMD=(
+ ${JAVA_HOME}/bin/java
+ "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
+ -Xms"$SPARK_EXECUTOR_MEMORY"
+ -Xmx"$SPARK_EXECUTOR_MEMORY"
+ -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
+ org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend
+ --driver-url "$SPARK_DRIVER_URL"
+ --executor-id "$SPARK_EXECUTOR_ID"
+ --cores "$SPARK_EXECUTOR_CORES"
+ --app-id "$SPARK_APPLICATION_ID"
+ --hostname "$SPARK_EXECUTOR_POD_IP"
+ --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID"
+ --podName "$SPARK_EXECUTOR_POD_NAME"
+ )
+ attempt_setup_fake_passwd_entry
+ # Execute the container CMD under tini for better hygiene
+ exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
+ ;;
+
+ *)
+ # Non-spark-on-k8s command provided, proceeding in pass-through mode...
+ exec "$@"
+ ;;
+esac
diff --git a/4.1.0/scala2.13-java21-python3-r-ubuntu/Dockerfile
b/4.1.0/scala2.13-java21-python3-r-ubuntu/Dockerfile
new file mode 100644
index 0000000..bb223c7
--- /dev/null
+++ b/4.1.0/scala2.13-java21-python3-r-ubuntu/Dockerfile
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:4.1.0-scala2.13-java21-ubuntu
+
+USER root
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y python3 python3-pip; \
+ apt-get install -y r-base r-base-dev; \
+ rm -rf /var/lib/apt/lists/*
+
+ENV R_HOME=/usr/lib/R
+
+USER spark
diff --git a/4.1.0/scala2.13-java21-python3-ubuntu/Dockerfile
b/4.1.0/scala2.13-java21-python3-ubuntu/Dockerfile
new file mode 100644
index 0000000..d58e9ad
--- /dev/null
+++ b/4.1.0/scala2.13-java21-python3-ubuntu/Dockerfile
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:4.1.0-scala2.13-java21-ubuntu
+
+USER root
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y python3 python3-pip; \
+ rm -rf /var/lib/apt/lists/*
+
+USER spark
diff --git a/4.1.0/scala2.13-java21-r-ubuntu/Dockerfile
b/4.1.0/scala2.13-java21-r-ubuntu/Dockerfile
new file mode 100644
index 0000000..586a6bb
--- /dev/null
+++ b/4.1.0/scala2.13-java21-r-ubuntu/Dockerfile
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM spark:4.1.0-scala2.13-java21-ubuntu
+
+USER root
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y r-base r-base-dev; \
+ rm -rf /var/lib/apt/lists/*
+
+ENV R_HOME=/usr/lib/R
+
+USER spark
diff --git a/4.1.0/scala2.13-java21-ubuntu/Dockerfile
b/4.1.0/scala2.13-java21-ubuntu/Dockerfile
new file mode 100644
index 0000000..c0181d8
--- /dev/null
+++ b/4.1.0/scala2.13-java21-ubuntu/Dockerfile
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+FROM eclipse-temurin:21-jammy
+
+ARG spark_uid=185
+
+RUN groupadd --system --gid=${spark_uid} spark && \
+ useradd --system --uid=${spark_uid} --gid=spark -d /nonexistent spark
+
+RUN set -ex; \
+ apt-get update; \
+ apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user
libnss3 procps net-tools gosu libnss-wrapper; \
+ mkdir -p /opt/spark; \
+ mkdir /opt/spark/python; \
+ mkdir -p /opt/spark/examples; \
+ mkdir -p /opt/spark/work-dir; \
+ chmod g+w /opt/spark/work-dir; \
+ touch /opt/spark/RELEASE; \
+ chown -R spark:spark /opt/spark; \
+ echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \
+ rm -rf /var/lib/apt/lists/*
+
+# Install Apache Spark
+# https://downloads.apache.org/spark/KEYS
+ENV
SPARK_TGZ_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.1.0/spark-4.1.0-bin-hadoop3.tgz?action=download
\
+
SPARK_TGZ_ASC_URL=https://www.apache.org/dyn/closer.lua/spark/spark-4.1.0/spark-4.1.0-bin-hadoop3.tgz.asc?action=download
\
+ GPG_KEY=F28C9C925C188C35E345614DEDA00CE834F0FC5C
+
+RUN set -ex; \
+ export SPARK_TMP="$(mktemp -d)"; \
+ cd $SPARK_TMP; \
+ wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \
+ wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \
+ export GNUPGHOME="$(mktemp -d)"; \
+ gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \
+ gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys
"$GPG_KEY"; \
+ gpg --batch --verify spark.tgz.asc spark.tgz; \
+ gpgconf --kill all; \
+ rm -rf "$GNUPGHOME" spark.tgz.asc; \
+ \
+ tar -xf spark.tgz --strip-components=1; \
+ chown -R spark:spark .; \
+ mv jars /opt/spark/; \
+ mv RELEASE /opt/spark/; \
+ mv bin /opt/spark/; \
+ mv sbin /opt/spark/; \
+ mv kubernetes/dockerfiles/spark/decom.sh /opt/; \
+ mv examples /opt/spark/; \
+ ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)"
/opt/spark/examples/jars/spark-examples.jar; \
+ mv kubernetes/tests /opt/spark/; \
+ mv data /opt/spark/; \
+ mv python/pyspark /opt/spark/python/pyspark/; \
+ mv python/lib /opt/spark/python/lib/; \
+ mv R /opt/spark/; \
+ chmod a+x /opt/decom.sh; \
+ cd ..; \
+ rm -rf "$SPARK_TMP";
+
+COPY entrypoint.sh /opt/
+
+ENV SPARK_HOME=/opt/spark
+
+WORKDIR /opt/spark/work-dir
+
+USER spark
+
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/4.1.0/scala2.13-java21-ubuntu/entrypoint.sh
b/4.1.0/scala2.13-java21-ubuntu/entrypoint.sh
new file mode 100755
index 0000000..c576d8f
--- /dev/null
+++ b/4.1.0/scala2.13-java21-ubuntu/entrypoint.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Prevent any errors from being silently ignored
+set -eo pipefail
+
+attempt_setup_fake_passwd_entry() {
+ # Check whether there is a passwd entry for the container UID
+ local myuid; myuid="$(id -u)"
+ # If there is no passwd entry for the container UID, attempt to fake one
+ # You can also refer to the
https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523
+ # It's to resolve OpenShift random UID case.
+ # See also: https://github.com/docker-library/postgres/pull/448
+ if ! getent passwd "$myuid" &> /dev/null; then
+ local wrapper
+ for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do
+ if [ -s "$wrapper" ]; then
+ NSS_WRAPPER_PASSWD="$(mktemp)"
+ NSS_WRAPPER_GROUP="$(mktemp)"
+ export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP
+ local mygid; mygid="$(id -g)"
+ printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous
uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD"
+ printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP"
+ break
+ fi
+ done
+ fi
+}
+
+if [ -z "$JAVA_HOME" ]; then
+ JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep
'java.home' | awk '{print $3}')
+fi
+
+SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
+for v in "${!SPARK_JAVA_OPT_@}"; do
+ SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" )
+done
+
+if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
+ SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
+fi
+
+if ! [ -z "${PYSPARK_PYTHON+x}" ]; then
+ export PYSPARK_PYTHON
+fi
+if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then
+ export PYSPARK_DRIVER_PYTHON
+fi
+
+# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so
Hadoop jars are available to the executor.
+# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding
customizations of this value from elsewhere e.g. Docker/K8s.
+if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then
+ export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)"
+fi
+
+if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then
+ SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
+fi
+
+if ! [ -z "${SPARK_CONF_DIR+x}" ]; then
+ SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
+elif ! [ -z "${SPARK_HOME+x}" ]; then
+ SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
+fi
+
+# SPARK-43540: add current working directory into executor classpath
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD"
+
+# Switch to spark if no USER specified (root by default) otherwise use USER
directly
+switch_spark_if_root() {
+ if [ $(id -u) -eq 0 ]; then
+ echo gosu spark
+ fi
+}
+
+case "$1" in
+ driver)
+ shift 1
+ CMD=(
+ "$SPARK_HOME/bin/spark-submit"
+ --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+ --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS"
+ --deploy-mode client
+ "$@"
+ )
+ attempt_setup_fake_passwd_entry
+ # Execute the container CMD under tini for better hygiene
+ exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
+ ;;
+ executor)
+ shift 1
+ CMD=(
+ ${JAVA_HOME}/bin/java
+ "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
+ -Xms"$SPARK_EXECUTOR_MEMORY"
+ -Xmx"$SPARK_EXECUTOR_MEMORY"
+ -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
+ org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend
+ --driver-url "$SPARK_DRIVER_URL"
+ --executor-id "$SPARK_EXECUTOR_ID"
+ --cores "$SPARK_EXECUTOR_CORES"
+ --app-id "$SPARK_APPLICATION_ID"
+ --hostname "$SPARK_EXECUTOR_POD_IP"
+ --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID"
+ --podName "$SPARK_EXECUTOR_POD_NAME"
+ )
+ attempt_setup_fake_passwd_entry
+ # Execute the container CMD under tini for better hygiene
+ exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}"
+ ;;
+
+ *)
+ # Non-spark-on-k8s command provided, proceeding in pass-through mode...
+ exec "$@"
+ ;;
+esac
diff --git a/tools/template.py b/tools/template.py
index 88dc1b4..2890429 100755
--- a/tools/template.py
+++ b/tools/template.py
@@ -71,7 +71,9 @@ GPG_KEY_DICT = {
# issuer "[email protected]"
"4.1.0-preview3": "0FE4571297AB84440673665669600C8338F65970",
# issuer "[email protected]"
- "4.1.0-preview4": "F28C9C925C188C35E345614DEDA00CE834F0FC5C"
+ "4.1.0-preview4": "F28C9C925C188C35E345614DEDA00CE834F0FC5C",
+ # issuer "[email protected]"
+ "4.1.0": "F28C9C925C188C35E345614DEDA00CE834F0FC5C"
}
diff --git a/versions.json b/versions.json
index ac8cfc0..4456f5f 100644
--- a/versions.json
+++ b/versions.json
@@ -1,5 +1,64 @@
{
"versions": [
+ {
+ "path": "4.1.0/scala2.13-java21-python3-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java21-python3-ubuntu",
+ "4.1.0-java21-python3",
+ "4.1.0-python3",
+ "4.1.0",
+ "python3",
+ "latest"
+ ]
+ },
+ {
+ "path": "4.1.0/scala2.13-java21-r-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java21-r-ubuntu",
+ "4.1.0-java21-r",
+ "4.1.0-r",
+ "r"
+ ]
+ },
+ {
+ "path": "4.1.0/scala2.13-java21-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java21-ubuntu",
+ "4.1.0-java21-scala",
+ "4.1.0-scala",
+ "scala"
+ ]
+ },
+ {
+ "path": "4.1.0/scala2.13-java21-python3-r-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java21-python3-r-ubuntu"
+ ]
+ },
+ {
+ "path": "4.1.0/scala2.13-java17-python3-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java17-python3-ubuntu",
+ "4.1.0-java17",
+ "python3-java17"
+ ]
+ },
+ {
+ "path": "4.1.0/scala2.13-java17-r-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java17-r-ubuntu",
+ "4.1.0-java17-r",
+ "r-java17"
+ ]
+ },
+ {
+ "path": "4.1.0/scala2.13-java17-ubuntu",
+ "tags": [
+ "4.1.0-scala2.13-java17-ubuntu",
+ "4.1.0-scala-java17",
+ "scala-java17"
+ ]
+ },
{
"path": "4.1.0-preview4/scala2.13-java21-python3-ubuntu",
"tags": [
@@ -211,9 +270,7 @@
"tags": [
"4.0.1-scala2.13-java21-python3-ubuntu",
"4.0.1-java21-python3",
- "4.0.1-java21",
- "python3",
- "latest"
+ "4.0.1-java21"
]
},
{
@@ -241,24 +298,21 @@
"tags": [
"4.0.1-scala2.13-java17-python3-ubuntu",
"4.0.1-python3",
- "4.0.1",
- "python3-java17"
+ "4.0.1"
]
},
{
"path": "4.0.1/scala2.13-java17-r-ubuntu",
"tags": [
"4.0.1-scala2.13-java17-r-ubuntu",
- "4.0.1-r",
- "r"
+ "4.0.1-r"
]
},
{
"path": "4.0.1/scala2.13-java17-ubuntu",
"tags": [
"4.0.1-scala2.13-java17-ubuntu",
- "4.0.1-scala",
- "scala"
+ "4.0.1-scala"
]
},
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]