This is an automated email from the ASF dual-hosted git repository.

dongjoon-hyun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 51ff9a146293 [SPARK-56928][PYTHON] Increase minimum numpy version to 
1.23.2
51ff9a146293 is described below

commit 51ff9a14629360469e6b62a8d08f6b782bf54ec0
Author: Tian Gao <[email protected]>
AuthorDate: Mon May 18 17:24:56 2026 -0700

    [SPARK-56928][PYTHON] Increase minimum numpy version to 1.23.2
    
    ### What changes were proposed in this pull request?
    
    Increase minimum version of `numpy` to 1.23.2.
    
    ### Why are the changes needed?
    
    1.23.2 is the first numpy version that supports 3.11. We now remove the 
support for 3.10, it doesn't make sense to keep numpy version that low. This is 
actually breaking our CI - 
https://github.com/apache/spark/actions/runs/26047920278/job/76576848283
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    CI.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #55959 from gaogaotiantian/upgrade-min-numpy-ver.
    
    Authored-by: Tian Gao <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .github/workflows/build_and_test.yml           | 2 +-
 .github/workflows/maven_test.yml               | 2 +-
 .github/workflows/pages.yml                    | 2 +-
 dev/create-release/spark-rm/Dockerfile         | 2 +-
 dev/requirements.txt                           | 2 +-
 dev/spark-test-image/docs/Dockerfile           | 2 +-
 dev/spark-test-image/python-minimum/Dockerfile | 2 +-
 python/docs/source/getting_started/install.rst | 4 ++--
 python/packaging/classic/setup.py              | 2 +-
 python/packaging/client/setup.py               | 2 +-
 python/packaging/connect/setup.py              | 2 +-
 python/pyspark/sql/pandas/utils.py             | 2 +-
 12 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index d722444379ac..d8e5df4f9a88 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -397,7 +397,7 @@ jobs:
     - name: Install Python packages (Python 3.12)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 
'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 
'yarn')
       run: |
-        python3.12 -m pip install 'numpy>=1.22' pyarrow 'pandas==2.3.3' pyyaml 
scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.76.0' 
'grpcio-status==1.76.0' 'protobuf==6.33.5' 'zstandard==0.25.0'
+        python3.12 -m pip install 'numpy>=1.23.2' pyarrow 'pandas==2.3.3' 
pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.76.0' 
'grpcio-status==1.76.0' 'protobuf==6.33.5' 'zstandard==0.25.0'
         python3.12 -m pip list
     # Run the tests.
     - name: Run tests
diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index 2aaa0e2f3031..357d869d1b88 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -182,7 +182,7 @@ jobs:
       - name: Install Python packages (Python 3.12)
         if: contains(matrix.modules, 'resource-managers#yarn') || 
(contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
         run: |
-          python3.12 -m pip install 'numpy>=1.22' pyarrow 'pandas==2.3.3' 
pyyaml scipy unittest-xml-reporting 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.5' 'zstandard==0.25.0'
+          python3.12 -m pip install 'numpy>=1.23.2' pyarrow 'pandas==2.3.3' 
pyyaml scipy unittest-xml-reporting 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.5' 'zstandard==0.25.0'
           python3.12 -m pip list
       # Run the tests using script command.
       # BSD's script command doesn't support -c option, and the usage is 
different from Linux's one.
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 10ac00860a20..6130d0271770 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -61,7 +61,7 @@ jobs:
       - name: Install Python dependencies
         run: |
          pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' 
sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
-            ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 
pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
+            ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.23.2' 
pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
             'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 
'pytest-mypy-plugins==1.9.3' 'ruff==0.14.8' \
             'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.5' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
             'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 
'sphinxcontrib-serializinghtml==1.1.5'
diff --git a/dev/create-release/spark-rm/Dockerfile 
b/dev/create-release/spark-rm/Dockerfile
index a4c4c9619333..fffe7e387a31 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -59,7 +59,7 @@ RUN python3.10 -m pip install --ignore-installed 
'blinker>=1.6.2' && \
 # See 'ipython_genutils' in SPARK-38517, 'docutils<0.18.0' in SPARK-39421
 RUN python3.10 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' \
     sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
-    ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 
pandas \
+    ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.23.2' pyarrow 
pandas \
     'plotly>=4.8' 'docutils<0.18.0' 'flake8==3.9.0' 'mypy==1.19.1' 
'pytest==7.1.3' \
     'pytest-mypy-plugins==1.9.3' 'ruff==0.14.8' 'pandas-stubs==1.2.0.53' \
     'grpcio==1.76.0' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 525353a2b5f8..37c6a3b89d0d 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -2,7 +2,7 @@
 py4j>=0.10.9.9
 
 # PySpark dependencies (optional)
-numpy>=1.22
+numpy>=1.23.2
 pyarrow>=18.0.0
 pandas>=2.2.0
 scipy>=1.8.0
diff --git a/dev/spark-test-image/docs/Dockerfile 
b/dev/spark-test-image/docs/Dockerfile
index 3b02e2ae1cff..7e3f63b05acd 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -92,7 +92,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 # See 'ipython_genutils' in SPARK-38517
 # See 'docutils<0.18.0' in SPARK-39421
 RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 
markupsafe \
-  ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 
'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
+  ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.23.2' 
'pyarrow>=23.0.0' 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
   'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 
'ruff==0.14.8' \
   'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.5' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
   'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 
'sphinxcontrib-serializinghtml==1.1.5' \
diff --git a/dev/spark-test-image/python-minimum/Dockerfile 
b/dev/spark-test-image/python-minimum/Dockerfile
index 3d791bcb881e..4d110600d826 100644
--- a/dev/spark-test-image/python-minimum/Dockerfile
+++ b/dev/spark-test-image/python-minimum/Dockerfile
@@ -63,7 +63,7 @@ ENV VIRTUAL_ENV=/opt/spark-venv
 RUN python3.11 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 
scipy scikit-learn coverage unittest-xml-reporting psutil"
+ARG BASIC_PIP_PKGS="numpy==1.23.2 pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 
scipy scikit-learn coverage unittest-xml-reporting psutil"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 
protobuf==6.33.5"
 
 RUN python3.11 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \
diff --git a/python/docs/source/getting_started/install.rst 
b/python/docs/source/getting_started/install.rst
index 11c610b69277..ba0da2d61428 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -290,7 +290,7 @@ Installable with ``pip install "pyspark[ml]"``.
 ======= ================= ======================================
 Package Supported version Note
 ======= ================= ======================================
-`numpy` >=1.22            Required for MLlib DataFrame-based API
+`numpy` >=1.23.2          Required for MLlib DataFrame-based API
 ======= ================= ======================================
 
 Additional libraries that enhance functionality but are not included in the 
installation packages:
@@ -310,7 +310,7 @@ Installable with ``pip install "pyspark[mllib]"``.
 ======= ================= ==================
 Package Supported version Note
 ======= ================= ==================
-`numpy` >=1.22            Required for MLlib
+`numpy` >=1.23.2          Required for MLlib
 ======= ================= ==================
 
 Declarative Pipelines
diff --git a/python/packaging/classic/setup.py 
b/python/packaging/classic/setup.py
index 95eea42f5cd9..bee34e06d5c3 100755
--- a/python/packaging/classic/setup.py
+++ b/python/packaging/classic/setup.py
@@ -152,7 +152,7 @@ if in_spark:
 # python/docs/source/tutorial/sql/arrow_pandas.rst,
 # python/packaging/client/setup.py, and python/packaging/connect/setup.py
 _minimum_pandas_version = "2.2.0"
-_minimum_numpy_version = "1.21"
+_minimum_numpy_version = "1.23.2"
 _minimum_pyarrow_version = "18.0.0"
 _minimum_grpc_version = "1.76.0"
 _minimum_googleapis_common_protos_version = "1.71.0"
diff --git a/python/packaging/client/setup.py b/python/packaging/client/setup.py
index b903765ecb63..564478af8cd8 100755
--- a/python/packaging/client/setup.py
+++ b/python/packaging/client/setup.py
@@ -135,7 +135,7 @@ try:
     # python/docs/source/tutorial/sql/arrow_pandas.rst,
     # python/packaging/classic/setup.py, and python/packaging/connect/setup.py
     _minimum_pandas_version = "2.2.0"
-    _minimum_numpy_version = "1.21"
+    _minimum_numpy_version = "1.23.2"
     _minimum_pyarrow_version = "18.0.0"
     _minimum_grpc_version = "1.76.0"
     _minimum_googleapis_common_protos_version = "1.71.0"
diff --git a/python/packaging/connect/setup.py 
b/python/packaging/connect/setup.py
index 0cc7fed6d5f3..3df383f23a04 100755
--- a/python/packaging/connect/setup.py
+++ b/python/packaging/connect/setup.py
@@ -88,7 +88,7 @@ try:
     # python/docs/source/tutorial/sql/arrow_pandas.rst,
     # python/packaging/classic/setup.py, and python/packaging/client/setup.py
     _minimum_pandas_version = "2.2.0"
-    _minimum_numpy_version = "1.21"
+    _minimum_numpy_version = "1.23.2"
     _minimum_pyarrow_version = "18.0.0"
     _minimum_grpc_version = "1.76.0"
     _minimum_googleapis_common_protos_version = "1.71.0"
diff --git a/python/pyspark/sql/pandas/utils.py 
b/python/pyspark/sql/pandas/utils.py
index 0f5a73ce3558..875c77dedada 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -108,7 +108,7 @@ def require_minimum_pyarrow_version() -> None:
 
 def require_minimum_numpy_version() -> None:
     """Raise ImportError if minimum version of NumPy is not installed"""
-    minimum_numpy_version = "1.22"
+    minimum_numpy_version = "1.23.2"
 
     try:
         import numpy


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to