This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 105570b34ea8 [SPARK-52525][PYTHON][INFRA] Refresh testing images for
pyarrow 20
105570b34ea8 is described below
commit 105570b34ea806b71d450db54315b2a37631983f
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Jun 19 08:23:06 2025 +0800
[SPARK-52525][PYTHON][INFRA] Refresh testing images for pyarrow 20
### What changes were proposed in this pull request?
Refresh testing images for pyarrow 20
### Why are the changes needed?
to test against latest pyarrow
### Does this PR introduce _any_ user-facing change?
no, infra-only
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #51212 from zhengruifeng/pyarrow_20.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
dev/spark-test-image/lint/Dockerfile | 4 ++--
dev/spark-test-image/python-309/Dockerfile | 4 ++--
dev/spark-test-image/python-310/Dockerfile | 4 ++--
dev/spark-test-image/python-311-classic-only/Dockerfile | 4 ++--
dev/spark-test-image/python-311/Dockerfile | 4 ++--
dev/spark-test-image/python-312/Dockerfile | 4 ++--
dev/spark-test-image/python-313-nogil/Dockerfile | 4 ++--
dev/spark-test-image/python-313/Dockerfile | 4 ++--
8 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/dev/spark-test-image/lint/Dockerfile
b/dev/spark-test-image/lint/Dockerfile
index e43ca46d1409..d9172b199f28 100644
--- a/dev/spark-test-image/lint/Dockerfile
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image for Linter"
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250519
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -93,7 +93,7 @@ RUN python3.11 -m pip install \
'pandas' \
'pandas-stubs==1.2.0.53' \
'plotly>=4.8' \
- 'pyarrow>=19.0.0' \
+ 'pyarrow>=20.0.0' \
'pytest-mypy-plugins==1.9.3' \
'pytest==7.1.3' \
&& python3.11 -m pip install torch torchvision --index-url
https://download.pytorch.org/whl/cpu \
diff --git a/dev/spark-test-image/python-309/Dockerfile
b/dev/spark-test-image/python-309/Dockerfile
index 83ba24a7b799..305e7ea1973a 100644
--- a/dev/spark-test-image/python-309/Dockerfile
+++ b/dev/spark-test-image/python-309/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark wi
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250312
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-310/Dockerfile
b/dev/spark-test-image/python-310/Dockerfile
index a3b68c574f15..d0c26aa8c7c6 100644
--- a/dev/spark-test-image/python-310/Dockerfile
+++ b/dev/spark-test-image/python-310/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark wi
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250312
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-311-classic-only/Dockerfile
b/dev/spark-test-image/python-311-classic-only/Dockerfile
index cf4f177637f5..5c2f8a4f3d38 100644
--- a/dev/spark-test-image/python-311-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-311-classic-only/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark Cl
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250424
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 pandas==2.3.0 plotly<6.0.0
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 pandas==2.3.0 plotly<6.0.0
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy
scikit-learn>=1.3.2"
ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
# Install Python 3.11 packages
diff --git a/dev/spark-test-image/python-311/Dockerfile
b/dev/spark-test-image/python-311/Dockerfile
index 9309b0a9733e..38cba230f4bd 100644
--- a/dev/spark-test-image/python-311/Dockerfile
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark wi
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250312
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-312/Dockerfile
b/dev/spark-test-image/python-312/Dockerfile
index 375572a597c0..3f66635009dd 100644
--- a/dev/spark-test-image/python-312/Dockerfile
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark wi
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250312
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile
b/dev/spark-test-image/python-313-nogil/Dockerfile
index 895d9ebb7e24..f05d85691346 100644
--- a/dev/spark-test-image/python-313-nogil/Dockerfile
+++ b/dev/spark-test-image/python-313-nogil/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark wi
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250407
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1
googleapis-common-protos==1.65.0 graphviz==0.20.3"
diff --git a/dev/spark-test-image/python-313/Dockerfile
b/dev/spark-test-image/python-313/Dockerfile
index fdb4fe739c13..1ef08da0f40d 100644
--- a/dev/spark-test-image/python-313/Dockerfile
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image For PySpark wi
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20250312
+ENV FULL_REFRESH_DATE=20250618
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
-ARG BASIC_PIP_PKGS="numpy pyarrow>=19.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.0 scipy
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0
scikit-learn>=1.3.2"
# Python deps for Spark Connect
ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1
googleapis-common-protos==1.65.0 graphviz==0.20.3"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]