This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 51c6649e2868 [SPARK-54839][PYTHON] Upgrade the minimum version of 
`numpy` to 2.0.0
51c6649e2868 is described below

commit 51c6649e28682e2d659f4967c9c926896ea8dfb8
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Dec 25 18:48:01 2025 +0800

    [SPARK-54839][PYTHON] Upgrade the minimum version of `numpy` to 2.0.0
    
    ### What changes were proposed in this pull request?
    Upgrade the minimum version of `numpy` to 2.0.0
    
    ### Why are the changes needed?
    Numpy 1.22 was released at 1 Jan, 2022, 2.0.0 was relased at 16 Jun, 2024, 
and the latest version is 2.4.0
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #53603 from zhengruifeng/bump_numpy_mini.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 .github/workflows/build_and_test.yml              | 2 +-
 .github/workflows/maven_test.yml                  | 2 +-
 .github/workflows/pages.yml                       | 2 +-
 dev/requirements.txt                              | 2 +-
 dev/spark-test-image/python-minimum/Dockerfile    | 4 ++--
 dev/spark-test-image/python-ps-minimum/Dockerfile | 4 ++--
 python/docs/source/getting_started/install.rst    | 4 ++--
 python/pyspark/sql/pandas/utils.py                | 2 +-
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index c990337dc939..999d6f4cd86a 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -368,7 +368,7 @@ jobs:
     - name: Install Python packages (Python 3.11)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 
'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 
'yarn')
       run: |
-        python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy 
unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.0' 'zstandard==0.25.0'
+        python3.11 -m pip install 'numpy>=2.0.0' pyarrow pandas pyyaml scipy 
unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.0' 'zstandard==0.25.0'
         python3.11 -m pip list
     # Run the tests.
     - name: Run tests
diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index a3f20bdbd910..e136910e9aa3 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -181,7 +181,7 @@ jobs:
       - name: Install Python packages (Python 3.11)
         if: contains(matrix.modules, 'resource-managers#yarn') || 
(contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
         run: |
-          python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy 
unittest-xml-reporting 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.0' 'zstandard==0.25.0'
+          python3.11 -m pip install 'numpy>=2.0.0' pyarrow pandas pyyaml scipy 
unittest-xml-reporting 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.0' 'zstandard==0.25.0'
           python3.11 -m pip list
       # Run the tests using script command.
       # BSD's script command doesn't support -c option, and the usage is 
different from Linux's one.
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 2bba3dcaf176..dbb378f1d297 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -61,7 +61,7 @@ jobs:
       - name: Install Python dependencies
         run: |
          pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' 
sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
-            ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' 
pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
+            ipython ipython_genutils sphinx_plotly_directive 'numpy>=2.0.0' 
pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
             'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 
'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
             'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.0' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
             'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 
'sphinxcontrib-serializinghtml==1.1.5'
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 2508b79d5e16..46bbdb61d230 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -2,7 +2,7 @@
 py4j>=0.10.9.9
 
 # PySpark dependencies (optional)
-numpy>=1.22
+numpy>=2.0.0
 pyarrow>=15.0.0
 six==1.16.0
 pandas>=2.2.0
diff --git a/dev/spark-test-image/python-minimum/Dockerfile 
b/dev/spark-test-image/python-minimum/Dockerfile
index 575b4afdd02c..501dc15ca912 100644
--- a/dev/spark-test-image/python-minimum/Dockerfile
+++ b/dev/spark-test-image/python-minimum/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra 
Image For PySpark wi
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE=20250703
+ENV FULL_REFRESH_DATE=20251224
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -62,7 +62,7 @@ RUN apt-get update && apt-get install -y \
     wget \
     zlib1g-dev
 
-ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 
scipy scikit-learn coverage unittest-xml-reporting"
+ARG BASIC_PIP_PKGS="numpy==2.0.0 pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 
scipy scikit-learn coverage unittest-xml-reporting"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf"
 
diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile 
b/dev/spark-test-image/python-ps-minimum/Dockerfile
index 5142d46cc3eb..668ec97aa833 100644
--- a/dev/spark-test-image/python-ps-minimum/Dockerfile
+++ b/dev/spark-test-image/python-ps-minimum/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra 
Image For Pandas API
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE=20250708
+ENV FULL_REFRESH_DATE=20251224
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \
     zlib1g-dev
 
 
-ARG BASIC_PIP_PKGS="pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 numpy scipy 
coverage unittest-xml-reporting"
+ARG BASIC_PIP_PKGS="numpy==2.0.0 pyarrow==15.0.0 pandas==2.2.0 six==1.16.0 
scipy coverage unittest-xml-reporting"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf"
 
diff --git a/python/docs/source/getting_started/install.rst 
b/python/docs/source/getting_started/install.rst
index 6b5a09205e4a..d42123f3f248 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -278,7 +278,7 @@ Installable with ``pip install "pyspark[ml]"``.
 ======= ================= ======================================
 Package Supported version Note
 ======= ================= ======================================
-`numpy` >=1.22            Required for MLlib DataFrame-based API
+`numpy` >=2.0.0           Required for MLlib DataFrame-based API
 ======= ================= ======================================
 
 Additional libraries that enhance functionality but are not included in the 
installation packages:
@@ -298,7 +298,7 @@ Installable with ``pip install "pyspark[mllib]"``.
 ======= ================= ==================
 Package Supported version Note
 ======= ================= ==================
-`numpy` >=1.22            Required for MLlib
+`numpy` >=2.0.0           Required for MLlib
 ======= ================= ==================
 
 Declarative Pipelines
diff --git a/python/pyspark/sql/pandas/utils.py 
b/python/pyspark/sql/pandas/utils.py
index c37665c719c8..5db6cfa1e6b1 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -98,7 +98,7 @@ def require_minimum_pyarrow_version() -> None:
 
 def require_minimum_numpy_version() -> None:
     """Raise ImportError if minimum version of NumPy is not installed"""
-    minimum_numpy_version = "1.22"
+    minimum_numpy_version = "2.0.0"
 
     try:
         import numpy


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to