This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 243af2fe684e [SPARK-52483][INFRA] Upgrade to Python 3.11 in doc image
243af2fe684e is described below
commit 243af2fe684e868ede422ec662fef5f2853bfbc6
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Jun 17 10:37:40 2025 +0800
[SPARK-52483][INFRA] Upgrade to Python 3.11 in doc image
### What changes were proposed in this pull request?
Upgrade to Python 3.11 in doc image
### Why are the changes needed?
Python 3.9 is reaching the EOL soon
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #51150 from zhengruifeng/infra_doc_311.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.github/workflows/build_and_test.yml | 33 ++++++++++++++++++++++++++++--
dev/run-tests.py | 2 +-
dev/spark-test-image/docs/Dockerfile | 16 +++++++++------
python/pyspark/pandas/supported_api_gen.py | 2 +-
python/run-tests.py | 4 ++--
5 files changed, 45 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/build_and_test.yml
b/.github/workflows/build_and_test.yml
index ff005103a246..948140b93663 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1000,8 +1000,12 @@ jobs:
python3.9 -m pip install ipython_genutils # See SPARK-38517
python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0'
pyarrow pandas 'plotly<6.0.0'
python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
- - name: List Python packages
+ - name: List Python packages for branch-3.5 and branch-4.0
+ if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
run: python3.9 -m pip list
+ - name: List Python packages
+ if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+ run: python3.11 -m pip list
- name: Install dependencies for documentation generation
run: |
# Keep the version of Bundler here in sync with the following
locations:
@@ -1010,7 +1014,8 @@ jobs:
gem install bundler -v 2.4.22
cd docs
bundle install --retry=100
- - name: Run documentation build
+ - name: Run documentation build for branch-3.5 and branch-4.0
+ if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
run: |
# We need this link to make sure `python3` points to `python3.9` which
contains the prerequisite packages.
ln -s "$(which python3.9)" "/usr/local/bin/python3"
@@ -1031,6 +1036,30 @@ jobs:
echo "SKIP_SQLDOC: $SKIP_SQLDOC"
cd docs
bundle exec jekyll build
+ - name: Run documentation build
+ if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+ run: |
+ # We need this link to make sure `python3` points to `python3.11`
which contains the prerequisite packages.
+ ln -s "$(which python3.11)" "/usr/local/bin/python3"
+ # Build docs first with SKIP_API to ensure they are buildable without
requiring any
+ # language docs to be built beforehand.
+ cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
+ if [ -f "./dev/is-changed.py" ]; then
+ # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
+ pyspark_modules=`cd dev && python3.11 -c "import
sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if
m.name.startswith('pyspark')))"`
+ if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then
export SKIP_PYTHONDOC=1; fi
+ if [ `./dev/is-changed.py -m sparkr` = false ]; then export
SKIP_RDOC=1; fi
+ fi
+ export PYSPARK_DRIVER_PYTHON=python3.11
+ export PYSPARK_PYTHON=python3.11
+ # Print the values of environment variables `SKIP_ERRORDOC`,
`SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
+ echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
+ echo "SKIP_SCALADOC: $SKIP_SCALADOC"
+ echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
+ echo "SKIP_RDOC: $SKIP_RDOC"
+ echo "SKIP_SQLDOC: $SKIP_SQLDOC"
+ cd docs
+ bundle exec jekyll build
- name: Tar documentation
if: github.repository != 'apache/spark'
run: tar cjf site.tar.bz2 docs/_site
diff --git a/dev/run-tests.py b/dev/run-tests.py
index eb760139f9b6..32c46e0d2b94 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -427,7 +427,7 @@ def parse_opts():
parser.add_argument(
"--python-executables",
type=str,
- default="python3.9",
+ default="python3.11",
help="A comma-separated list of Python executables to test against
(default: %(default)s)",
)
parser.add_argument(
diff --git a/dev/spark-test-image/docs/Dockerfile
b/dev/spark-test-image/docs/Dockerfile
index f1e33763df46..3ab1430cedd3 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra
Image for Documentat
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
LABEL org.opencontainers.image.version=""
-ENV FULL_REFRESH_DATE=20241029
+ENV FULL_REFRESH_DATE=20250616
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -56,6 +56,7 @@ RUN apt-get update && apt-get install -y \
pandoc \
pkg-config \
qpdf \
+ tzdata \
r-base \
ruby \
ruby-dev \
@@ -74,18 +75,21 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr',
'markdown', 'rmarkdown',
# See more in SPARK-39735
ENV
R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
-# Install Python 3.9
+# Install Python 3.11
RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt-get update && apt-get install -y python3.9 python3.9-distutils \
+RUN apt-get update && apt-get install -y \
+ python3.11 \
+ && apt-get autoremove --purge -y \
+ && apt-get clean \
&& rm -rf /var/lib/apt/lists/*
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
# See 'ipython_genutils' in SPARK-38517
# See 'docutils<0.18.0' in SPARK-39421
-RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2
markupsafe 'pyzmq<24.0.0' \
+RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2
markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow
pandas 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3'
'black==23.12.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0'
'protobuf==5.29.1' 'grpc-stubs==1.24.11'
'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2'
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3'
'sphinxcontrib-serializinghtml==1.1.5' \
- && python3.9 -m pip cache purge
+ && python3.11 -m pip cache purge
diff --git a/python/pyspark/pandas/supported_api_gen.py
b/python/pyspark/pandas/supported_api_gen.py
index f2a73cb1c1ad..0272311d7f35 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -38,7 +38,7 @@ from pyspark.pandas.exceptions import
PandasNotImplementedError
MAX_MISSING_PARAMS_SIZE = 5
COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
-PANDAS_LATEST_VERSION = "2.2.3"
+PANDAS_LATEST_VERSION = "2.3.0"
RST_HEADER = """
=====================
diff --git a/python/run-tests.py b/python/run-tests.py
index 091fcfe73ac1..773f68654985 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -212,9 +212,9 @@ def run_individual_python_test(target_dir, test_name,
pyspark_python, keep_test_
def get_default_python_executables():
- python_execs = [x for x in ["python3.9", "pypy3"] if which(x)]
+ python_execs = [x for x in ["python3.11", "pypy3"] if which(x)]
- if "python3.9" not in python_execs:
+ if "python3.11" not in python_execs:
p = which("python3")
if not p:
LOGGER.error("No python3 executable found. Exiting!")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]