This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3b51e19e955f [SPARK-53834][INFRA] Add a separate docker file for
Python 3.14 daily build
3b51e19e955f is described below
commit 3b51e19e955f5dc2977b4c3d805a2784c81ec638
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Oct 8 08:07:42 2025 -0700
[SPARK-53834][INFRA] Add a separate docker file for Python 3.14 daily build
### What changes were proposed in this pull request?
This PR aims to add a separate docker file for `Python 3.14` daily build.
### Why are the changes needed?
To prepare Python 3.14 test coverage for Apache Spark 4.1.0. Note that
1. SPARK-53835 is filed to handle `pyarrow/mlflow/torch/torchvision`
package installation later when they are ready.
2. This PR will expose two kind of Python UT failures in order to help us
be ready during Apache Spark 4.1.0 preparation.
- Python 3.14 related failures
- Python Package (PyArrow/MLFlow/Torch/TorchVision) related failures
- Both `Classic` and `Connect` mode-related failures
### Does this PR introduce _any_ user-facing change?
No, this is a new test infra.
### How was this patch tested?
Manual review.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52544 from dongjoon-hyun/SPARK-53834.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.github/workflows/build_infra_images_cache.yml | 14 +++++
.github/workflows/build_python_3.14.yml | 47 +++++++++++++++
dev/spark-test-image/python-314/Dockerfile | 79 ++++++++++++++++++++++++++
3 files changed, 140 insertions(+)
diff --git a/.github/workflows/build_infra_images_cache.yml
b/.github/workflows/build_infra_images_cache.yml
index 9ec93a4af52c..430903b570ea 100644
--- a/.github/workflows/build_infra_images_cache.yml
+++ b/.github/workflows/build_infra_images_cache.yml
@@ -39,6 +39,7 @@ on:
- 'dev/spark-test-image/python-312/Dockerfile'
- 'dev/spark-test-image/python-313/Dockerfile'
- 'dev/spark-test-image/python-313-nogil/Dockerfile'
+ - 'dev/spark-test-image/python-314/Dockerfile'
- 'dev/spark-test-image/numpy-213/Dockerfile'
- '.github/workflows/build_infra_images_cache.yml'
# Create infra image when cutting down branches/tags
@@ -230,6 +231,19 @@ jobs:
- name: Image digest (PySpark with Python 3.13 no GIL)
if: hashFiles('dev/spark-test-image/python-313-nogil/Dockerfile') != ''
run: echo ${{
steps.docker_build_pyspark_python_313_nogil.outputs.digest }}
+ - name: Build and push (PySpark with Python 3.14)
+ if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != ''
+ id: docker_build_pyspark_python_314
+ uses: docker/build-push-action@v6
+ with:
+ context: ./dev/spark-test-image/python-314/
+ push: true
+ tags:
ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{
github.ref_name }}-static
+ cache-from:
type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{
github.ref_name }}
+ cache-to:
type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{
github.ref_name }},mode=max
+ - name: Image digest (PySpark with Python 3.14)
+ if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != ''
+ run: echo ${{ steps.docker_build_pyspark_python_314.outputs.digest }}
- name: Build and push (PySpark with Numpy 2.1.3)
if: hashFiles('dev/spark-test-image/numpy-213/Dockerfile') != ''
id: docker_build_pyspark_numpy_213
diff --git a/.github/workflows/build_python_3.14.yml
b/.github/workflows/build_python_3.14.yml
new file mode 100644
index 000000000000..45ea43f1d491
--- /dev/null
+++ b/.github/workflows/build_python_3.14.yml
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python 3.14)"
+
+on:
+ schedule:
+ - cron: '0 21 * * *'
+ workflow_dispatch:
+
+jobs:
+ run-build:
+ permissions:
+ packages: write
+ name: Run
+ uses: ./.github/workflows/build_and_test.yml
+ if: github.repository == 'apache/spark'
+ with:
+ java: 17
+ branch: master
+ hadoop: hadoop3
+ envs: >-
+ {
+ "PYSPARK_IMAGE_TO_TEST": "python-314",
+ "PYTHON_TO_TEST": "python3.14"
+ }
+ jobs: >-
+ {
+ "pyspark": "true",
+ "pyspark-pandas": "true"
+ }
diff --git a/dev/spark-test-image/python-314/Dockerfile
b/dev/spark-test-image/python-314/Dockerfile
new file mode 100644
index 000000000000..842a228f05b7
--- /dev/null
+++ b/dev/spark-test-image/python-314/Dockerfile
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project
<[email protected]>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark
with Python 3.14"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20251007
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+ build-essential \
+ ca-certificates \
+ curl \
+ gfortran \
+ git \
+ gnupg \
+ libcurl4-openssl-dev \
+ libfontconfig1-dev \
+ libfreetype6-dev \
+ libfribidi-dev \
+ libgit2-dev \
+ libharfbuzz-dev \
+ libjpeg-dev \
+ liblapack-dev \
+ libopenblas-dev \
+ libpng-dev \
+ libpython3-dev \
+ libssl-dev \
+ libtiff5-dev \
+ libxml2-dev \
+ openjdk-17-jdk-headless \
+ pkg-config \
+ qpdf \
+ tzdata \
+ software-properties-common \
+ wget \
+ zlib1g-dev
+
+# Install Python 3.14
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+ python3.14 \
+ && apt-get autoremove --purge -y \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="numpy six==1.16.0 pandas==2.3.3 scipy plotly<6.0.0
coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.75.1 grpcio-status==1.71.2 protobuf==5.29.5
googleapis-common-protos==1.65.0 graphviz==0.20.3"
+
+# Install Python 3.14 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.14
+RUN python3.14 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs
this
+RUN python3.14 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting
$CONNECT_PIP_PKGS lxml && \
+ python3.14 -m pip install torcheval && \
+ python3.14 -m pip cache purge
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]