This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c15e36b70b7c [SPARK-55705][PYTHON][INFRA] Upgrade PyArrow to 23
c15e36b70b7c is described below

commit c15e36b70b7c72909a46ad32006a3cf5a6ee0c1c
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Feb 26 09:05:40 2026 -0800

    [SPARK-55705][PYTHON][INFRA] Upgrade PyArrow to 23
    
    ### What changes were proposed in this pull request?
    Upgrade PyArrow to 23
    
    ### Why are the changes needed?
    refresh the images to test against latest pyarrow
    
    ### Does this PR introduce _any_ user-facing change?
    no, infra-only
    
    ### How was this patch tested?
    1, for changes in lint/doc/python3-12, the PR builder should cover;
    2, for other places, will monitor the scheduled jobs
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #54504 from zhengruifeng/upgrade_pa_23.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .github/workflows/python_hosted_runner_test.yml         | 2 +-
 dev/spark-test-image/docs/Dockerfile                    | 2 +-
 dev/spark-test-image/lint/Dockerfile                    | 2 +-
 dev/spark-test-image/python-310/Dockerfile              | 2 +-
 dev/spark-test-image/python-311/Dockerfile              | 2 +-
 dev/spark-test-image/python-312-classic-only/Dockerfile | 2 +-
 dev/spark-test-image/python-312-pandas-3/Dockerfile     | 2 +-
 dev/spark-test-image/python-312/Dockerfile              | 2 +-
 dev/spark-test-image/python-313/Dockerfile              | 2 +-
 dev/spark-test-image/python-314-nogil/Dockerfile        | 2 +-
 dev/spark-test-image/python-314/Dockerfile              | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/python_hosted_runner_test.yml 
b/.github/workflows/python_hosted_runner_test.yml
index 235d8e0064ca..a5aff7ac5ce0 100644
--- a/.github/workflows/python_hosted_runner_test.yml
+++ b/.github/workflows/python_hosted_runner_test.yml
@@ -154,7 +154,7 @@ jobs:
         run: |
           python${{matrix.python}} -m pip install --ignore-installed 
'blinker>=1.6.2'
           python${{matrix.python}} -m pip install --ignore-installed 
'six==1.16.0'
-          python${{matrix.python}} -m pip install numpy 'pyarrow>=22.0.0' 
'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage 
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 
unittest-xml-reporting && \
+          python${{matrix.python}} -m pip install numpy 'pyarrow>=23.0.0' 
'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage 
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 
unittest-xml-reporting && \
           python${{matrix.python}} -m pip install 'grpcio==1.76.0' 
'grpcio-status==1.76.0' 'protobuf==6.33.5' 'googleapis-common-protos==1.71.0' 
'zstandard==0.25.0' 'graphviz==0.20.3' && \
           python${{matrix.python}} -m pip cache purge
       - name: List Python packages
diff --git a/dev/spark-test-image/docs/Dockerfile 
b/dev/spark-test-image/docs/Dockerfile
index 347192b3c334..1cdfde2d046c 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -88,7 +88,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 # See 'ipython_genutils' in SPARK-38517
 # See 'docutils<0.18.0' in SPARK-39421
 RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 
markupsafe \
-  ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 
'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
+  ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 
'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
   'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 
'black==23.12.1' \
   'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.5' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
   'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 
'sphinxcontrib-serializinghtml==1.1.5' \
diff --git a/dev/spark-test-image/lint/Dockerfile 
b/dev/spark-test-image/lint/Dockerfile
index 6fa318ab5d03..ac95c002f491 100644
--- a/dev/spark-test-image/lint/Dockerfile
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -95,7 +95,7 @@ RUN python3.12 -m pip install \
     'pandas' \
     'pandas-stubs' \
     'plotly>=4.8' \
-    'pyarrow>=22.0.0' \
+    'pyarrow>=23.0.0' \
     'pytest-mypy-plugins==1.9.3' \
     'pytest==7.1.3' \
     'scipy>=1.8.0' \
diff --git a/dev/spark-test-image/python-310/Dockerfile 
b/dev/spark-test-image/python-310/Dockerfile
index cd1aa79dac5f..c8752c8bb340 100644
--- a/dev/spark-test-image/python-310/Dockerfile
+++ b/dev/spark-test-image/python-310/Dockerfile
@@ -63,7 +63,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.10 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
 RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS && \
diff --git a/dev/spark-test-image/python-311/Dockerfile 
b/dev/spark-test-image/python-311/Dockerfile
index 47354e2866f0..9c65caff2efa 100644
--- a/dev/spark-test-image/python-311/Dockerfile
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -60,7 +60,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.11 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
 RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS && \
diff --git a/dev/spark-test-image/python-312-classic-only/Dockerfile 
b/dev/spark-test-image/python-312-classic-only/Dockerfile
index 0a2b7b291a44..ba7d34625cc8 100644
--- a/dev/spark-test-image/python-312-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-312-classic-only/Dockerfile
@@ -56,7 +56,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.12 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 pandas==2.3.3 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 pandas==2.3.3 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
 
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
diff --git a/dev/spark-test-image/python-312-pandas-3/Dockerfile 
b/dev/spark-test-image/python-312-pandas-3/Dockerfile
index 101a5884a5a2..6b9844372b46 100644
--- a/dev/spark-test-image/python-312-pandas-3/Dockerfile
+++ b/dev/spark-test-image/python-312-pandas-3/Dockerfile
@@ -59,7 +59,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.12 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas>=3 scipy 
plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas>=3 scipy 
plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
 RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS lxml && \
diff --git a/dev/spark-test-image/python-312/Dockerfile 
b/dev/spark-test-image/python-312/Dockerfile
index 14cdf08621fb..ad030f4cd5fb 100644
--- a/dev/spark-test-image/python-312/Dockerfile
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -56,7 +56,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.12 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
 RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS lxml && \
diff --git a/dev/spark-test-image/python-313/Dockerfile 
b/dev/spark-test-image/python-313/Dockerfile
index dc051d953a1b..5d4dc161bc1b 100644
--- a/dev/spark-test-image/python-313/Dockerfile
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -60,7 +60,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.13 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
 RUN python3.13 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS lxml && \
diff --git a/dev/spark-test-image/python-314-nogil/Dockerfile 
b/dev/spark-test-image/python-314-nogil/Dockerfile
index c2687ae1c7ef..3cc92f9e63f0 100644
--- a/dev/spark-test-image/python-314-nogil/Dockerfile
+++ b/dev/spark-test-image/python-314-nogil/Dockerfile
@@ -62,5 +62,5 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
 # TODO: Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS when it supports Python 3.14 
free threaded
 # TODO: Add lxml, grpcio, grpcio-status back when they support Python 3.14 
free threaded
-RUN python3.14t -m pip install 'numpy>=2.1' 'pyarrow>=19.0.0' 'six==1.16.0' 
'pandas==2.3.3' 'pystack>=1.6.0' scipy coverage matplotlib openpyxl jinja2 
psutil && \
+RUN python3.14t -m pip install 'numpy>=2.1' 'pyarrow>=23.0.0' 'six==1.16.0' 
'pandas==2.3.3' 'pystack>=1.6.0' scipy coverage matplotlib openpyxl jinja2 
psutil && \
     python3.14t -m pip cache purge
diff --git a/dev/spark-test-image/python-314/Dockerfile 
b/dev/spark-test-image/python-314/Dockerfile
index eb245ec42438..57154301fbac 100644
--- a/dev/spark-test-image/python-314/Dockerfile
+++ b/dev/spark-test-image/python-314/Dockerfile
@@ -60,7 +60,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.14 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=23.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
 RUN python3.14 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS lxml && \


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to