This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 243af2fe684e [SPARK-52483][INFRA] Upgrade to Python 3.11 in doc image
243af2fe684e is described below

commit 243af2fe684e868ede422ec662fef5f2853bfbc6
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Jun 17 10:37:40 2025 +0800

    [SPARK-52483][INFRA] Upgrade to Python 3.11 in doc image
    
    ### What changes were proposed in this pull request?
    Upgrade to Python 3.11 in doc image
    
    ### Why are the changes needed?
    Python 3.9 is reaching the EOL soon
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    CI
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #51150 from zhengruifeng/infra_doc_311.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 .github/workflows/build_and_test.yml       | 33 ++++++++++++++++++++++++++++--
 dev/run-tests.py                           |  2 +-
 dev/spark-test-image/docs/Dockerfile       | 16 +++++++++------
 python/pyspark/pandas/supported_api_gen.py |  2 +-
 python/run-tests.py                        |  4 ++--
 5 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index ff005103a246..948140b93663 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1000,8 +1000,12 @@ jobs:
         python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 
pyarrow pandas 'plotly<6.0.0'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
-    - name: List Python packages
+    - name: List Python packages for branch-3.5 and branch-4.0
+      if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
       run: python3.9 -m pip list
+    - name: List Python packages
+      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+      run: python3.11 -m pip list
     - name: Install dependencies for documentation generation
       run: |
         # Keep the version of Bundler here in sync with the following 
locations:
@@ -1010,7 +1014,8 @@ jobs:
         gem install bundler -v 2.4.22
         cd docs
         bundle install --retry=100
-    - name: Run documentation build
+    - name: Run documentation build for branch-3.5 and branch-4.0
+      if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
       run: |
         # We need this link to make sure `python3` points to `python3.9` which 
contains the prerequisite packages.
         ln -s "$(which python3.9)" "/usr/local/bin/python3"
@@ -1031,6 +1036,30 @@ jobs:
         echo "SKIP_SQLDOC: $SKIP_SQLDOC"
         cd docs
         bundle exec jekyll build
+    - name: Run documentation build
+      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+      run: |
+        # We need this link to make sure `python3` points to `python3.11` 
which contains the prerequisite packages.
+        ln -s "$(which python3.11)" "/usr/local/bin/python3"
+        # Build docs first with SKIP_API to ensure they are buildable without 
requiring any
+        # language docs to be built beforehand.
+        cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
+        if [ -f "./dev/is-changed.py" ]; then
+          # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
+          pyspark_modules=`cd dev && python3.11 -c "import 
sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if 
m.name.startswith('pyspark')))"`
+          if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then 
export SKIP_PYTHONDOC=1; fi
+          if [ `./dev/is-changed.py -m sparkr` = false ]; then export 
SKIP_RDOC=1; fi
+        fi
+        export PYSPARK_DRIVER_PYTHON=python3.11
+        export PYSPARK_PYTHON=python3.11
+        # Print the values of environment variables `SKIP_ERRORDOC`, 
`SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
+        echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
+        echo "SKIP_SCALADOC: $SKIP_SCALADOC"
+        echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
+        echo "SKIP_RDOC: $SKIP_RDOC"
+        echo "SKIP_SQLDOC: $SKIP_SQLDOC"
+        cd docs
+        bundle exec jekyll build
     - name: Tar documentation
       if: github.repository != 'apache/spark'
       run: tar cjf site.tar.bz2 docs/_site
diff --git a/dev/run-tests.py b/dev/run-tests.py
index eb760139f9b6..32c46e0d2b94 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -427,7 +427,7 @@ def parse_opts():
     parser.add_argument(
         "--python-executables",
         type=str,
-        default="python3.9",
+        default="python3.11",
         help="A comma-separated list of Python executables to test against 
(default: %(default)s)",
     )
     parser.add_argument(
diff --git a/dev/spark-test-image/docs/Dockerfile 
b/dev/spark-test-image/docs/Dockerfile
index f1e33763df46..3ab1430cedd3 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra 
Image for Documentat
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE=20241029
+ENV FULL_REFRESH_DATE=20250616
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -56,6 +56,7 @@ RUN apt-get update && apt-get install -y \
     pandoc \
     pkg-config \
     qpdf \
+    tzdata \
     r-base \
     ruby \
     ruby-dev \
@@ -74,18 +75,21 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 
'markdown', 'rmarkdown',
 # See more in SPARK-39735
 ENV 
R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
-# Install Python 3.9
+# Install Python 3.11
 RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt-get update && apt-get install -y python3.9 python3.9-distutils \
+RUN apt-get update && apt-get install -y \
+    python3.11 \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 
 # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
 # See 'ipython_genutils' in SPARK-38517
 # See 'docutils<0.18.0' in SPARK-39421
-RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 
markupsafe 'pyzmq<24.0.0' \
+RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 
markupsafe 'pyzmq<24.0.0' \
   ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 
pandas 'plotly>=4.8' 'docutils<0.18.0' \
   'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 
'black==23.12.1' \
   'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 
'protobuf==5.29.1' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
   'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 
'sphinxcontrib-serializinghtml==1.1.5' \
-  && python3.9 -m pip cache purge
+  && python3.11 -m pip cache purge
diff --git a/python/pyspark/pandas/supported_api_gen.py 
b/python/pyspark/pandas/supported_api_gen.py
index f2a73cb1c1ad..0272311d7f35 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -38,7 +38,7 @@ from pyspark.pandas.exceptions import 
PandasNotImplementedError
 MAX_MISSING_PARAMS_SIZE = 5
 COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
 MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
-PANDAS_LATEST_VERSION = "2.2.3"
+PANDAS_LATEST_VERSION = "2.3.0"
 
 RST_HEADER = """
 =====================
diff --git a/python/run-tests.py b/python/run-tests.py
index 091fcfe73ac1..773f68654985 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -212,9 +212,9 @@ def run_individual_python_test(target_dir, test_name, 
pyspark_python, keep_test_
 
 
 def get_default_python_executables():
-    python_execs = [x for x in ["python3.9", "pypy3"] if which(x)]
+    python_execs = [x for x in ["python3.11", "pypy3"] if which(x)]
 
-    if "python3.9" not in python_execs:
+    if "python3.11" not in python_execs:
         p = which("python3")
         if not p:
             LOGGER.error("No python3 executable found.  Exiting!")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to