This is an automated email from the ASF dual-hosted git repository.

pdallig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new ed958b538d [ZEPPELIN-5945] Build Zeppelin-Distribution with JDK11 and 
use Spark 3.3 as default (#4639)
ed958b538d is described below

commit ed958b538d67539cc8bc232668cce1163f51c468
Author: Philipp Dallig <philipp.dal...@gmail.com>
AuthorDate: Tue Aug 22 08:45:44 2023 +0200

    [ZEPPELIN-5945] Build Zeppelin-Distribution with JDK11 and use Spark 3.3 as 
default (#4639)
---
 Dockerfile                                         |  6 +--
 scripts/docker/zeppelin-interpreter/Dockerfile     | 52 +++++++++-------------
 .../docker/zeppelin-interpreter/conda_packages.txt | 22 ---------
 scripts/docker/zeppelin-interpreter/condarc        |  5 +++
 .../zeppelin-interpreter/env_python_3_with_R.yml   | 38 ++++++++++++++++
 .../docker/zeppelin-interpreter/pip_packages.txt   |  1 -
 scripts/docker/zeppelin-server/Dockerfile          |  6 +--
 7 files changed, 70 insertions(+), 60 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 5d78aff9a5..5d97d7dcc0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,14 +14,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-FROM openjdk:8 as builder
+FROM openjdk:11 as builder
 ADD . /workspace/zeppelin
 WORKDIR /workspace/zeppelin
 ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m 
-XX:-UseGCOverheadLimit 
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
 # Allow npm and bower to run with root privileges
 RUN echo "unsafe-perm=true" > ~/.npmrc && \
     echo '{ "allow_root": true }' > ~/.bowerrc && \
-    ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop 
-Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \
+    ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.3 -Pinclude-hadoop 
-Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \
     # Example with doesn't compile all interpreters
     # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop 
-Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist -pl 
'!groovy,!submarine,!livy,!hbase,!file,!flink' && \
     mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* 
/opt/zeppelin/ && \
@@ -29,5 +29,5 @@ RUN echo "unsafe-perm=true" > ~/.npmrc && \
     rm -rf ~/.m2 && \
     rm -rf /workspace/zeppelin/*
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 COPY --from=builder /opt/zeppelin /opt/zeppelin
diff --git a/scripts/docker/zeppelin-interpreter/Dockerfile 
b/scripts/docker/zeppelin-interpreter/Dockerfile
index 8779982acb..453411bb45 100644
--- a/scripts/docker/zeppelin-interpreter/Dockerfile
+++ b/scripts/docker/zeppelin-interpreter/Dockerfile
@@ -16,7 +16,7 @@
 ARG ZEPPELIN_DISTRIBUTION_IMAGE=zeppelin-distribution:latest
 FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 
 LABEL maintainer="Apache Software Foundation <d...@zeppelin.apache.org>"
 
@@ -25,13 +25,16 @@ ARG version="0.10.0"
 ENV VERSION="${version}" \
     ZEPPELIN_HOME="/opt/zeppelin"
 
+# Install Java for zeppelin interpreter
+# Install micromamba to install a python environment via conda
 RUN set -ex && \
-    apt-get -y update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jre-headless 
wget tini && \
+    /usr/bin/apt-get update && \
+    DEBIAN_FRONTEND=noninteractive /usr/bin/apt-get install -y 
openjdk-11-jre-headless wget tini bzip2 && \
+    /usr/bin/wget -qO- 
https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj 
bin/micromamba && \
     # Cleanup
-    rm -rf /var/lib/apt/lists/* && \
-    apt-get autoclean && \
-    apt-get clean
+    /usr/bin/apt-get clean && \
+    /bin/rm -rf /var/lib/apt/lists/*
+
 
 COPY --from=zeppelin-distribution /opt/zeppelin/bin ${ZEPPELIN_HOME}/bin
 COPY log4j.properties ${ZEPPELIN_HOME}/conf/
@@ -46,33 +49,20 @@ COPY --from=zeppelin-distribution /opt/zeppelin/interpreter 
${ZEPPELIN_HOME}/int
 ### COPY --from=zeppelin-distribution 
/opt/zeppelin/interpreter/${interpreter_name}  
${ZEPPELIN_HOME}/interpreter/${interpreter_name}
 
 
-# Decide: Install conda to manage python and R packages. Maybe adjust the 
packages in pip_packages.txt or conda_packages.txt
-ARG miniconda_version="py38_4.8.3"
-ARG 
miniconda_sha256="879457af6a0bf5b34b48c12de31d4df0ee2f06a8e68768e5758c3293b2daf688"
+# Decide: Install conda to manage python and R packages. Maybe adjust the 
packages env_python_3_with_R
 # Install python and R packages via conda
-COPY conda_packages.txt /conda_packages.txt
-# Some python packages are not available via conda, so we are using pip
-COPY pip_packages.txt /pip_packages.txt
+COPY env_python_3_with_R.yml /env_python_3_with_R.yml
+# To improve the build time, the Zeppelin team recommends a conda proxy
+# COPY condarc /etc/conda/condarc
 RUN set -ex && \
-    wget -nv 
https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh
 -O miniconda.sh && \
-    echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \
-    sha256sum --strict -c anaconda.sha256 && \
-    bash miniconda.sh -b -p /opt/conda && \
-    export PATH=/opt/conda/bin:$PATH && \
-    conda config --set always_yes yes --set changeps1 no && \
-    conda info -a && \
-    conda config --add channels conda-forge && \
-    conda install -y --quiet --file /conda_packages.txt && \
-    pip install -q -r /pip_packages.txt  && \
-    # Cleanup
-    rm -v miniconda.sh anaconda.sha256  && \
-    # Cleanup based on 
https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383
-    find /opt/conda/ -follow -type f -name '*.a' -delete && \
-    find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
-    conda clean -ay
-    # Allow to modify conda packages. This allows malicious code to be 
injected into other interpreter sessions, therefore it is disabled by default
-    # chmod -R ug+rwX /opt/conda
-ENV PATH /opt/conda/bin:$PATH
+    micromamba create -y -p /opt/conda -f env_python_3_with_R.yml && \
+    micromamba clean -ay
+
+ENV PATH=/opt/conda/bin:$PATH \
+    SPARK_HOME=/opt/conda/lib/python3.9/site-packages/pyspark
+
+# Allow to modify conda packages. This allows malicious code to be injected 
into other interpreter sessions, therefore it is disabled by default
+# chmod -R ug+rwX /opt/conda
 
 RUN mkdir -p "${ZEPPELIN_HOME}/logs" "${ZEPPELIN_HOME}/run" 
"${ZEPPELIN_HOME}/local-repo" && \
      # Allow process to edit /etc/passwd, to create a user entry for zeppelin
diff --git a/scripts/docker/zeppelin-interpreter/conda_packages.txt 
b/scripts/docker/zeppelin-interpreter/conda_packages.txt
deleted file mode 100644
index 3be8519342..0000000000
--- a/scripts/docker/zeppelin-interpreter/conda_packages.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# python packages
-pycodestyle
-numpy
-pandas
-scipy
-grpcio
-hvplot
-protobuf
-pandasql
-ipython
-matplotlib
-ipykernel
-jupyter_client
-bokeh
-
-# R packages
-r-evaluate
-r-base64enc
-r-knitr
-r-ggplot2
-r-shiny
-r-googlevis
diff --git a/scripts/docker/zeppelin-interpreter/condarc 
b/scripts/docker/zeppelin-interpreter/condarc
new file mode 100644
index 0000000000..6992fdf59e
--- /dev/null
+++ b/scripts/docker/zeppelin-interpreter/condarc
@@ -0,0 +1,5 @@
+# Example to use an proxy for conda
+channel_alias: https://leandi.avm.de/repository/anaconda-proxy
+default_channels:
+  - https://proxy.mycompany.com/repository/anaconda-proxy/main
+  - https://proxy.mycompany.com/repository/anaconda-proxy/r
\ No newline at end of file
diff --git a/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml 
b/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml
new file mode 100644
index 0000000000..09ed9a3901
--- /dev/null
+++ b/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml
@@ -0,0 +1,38 @@
+name: python_3_with_R
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python >=3.9,<3.10
+  - pyspark=3.3.2
+  - pycodestyle
+  - scipy
+  - numpy
+  - grpcio
+  - protobuf
+  - pandasql
+  - ipython
+  - ipykernel
+  - jupyter_client
+  - hvplot
+  - plotnine
+  - seaborn
+  - intake
+  - intake-parquet
+  - intake-xarray
+  - altair
+  - vega_datasets
+  - plotly
+  - pip
+  - pip:
+    # works for regular pip packages
+    - bkzep==0.6.1
+  - r-base=3
+  - r-data.table
+  - r-evaluate
+  - r-base64enc
+  - r-knitr
+  - r-ggplot2
+  - r-irkernel
+  - r-shiny
+  - r-googlevis
diff --git a/scripts/docker/zeppelin-interpreter/pip_packages.txt 
b/scripts/docker/zeppelin-interpreter/pip_packages.txt
deleted file mode 100644
index 9123189b05..0000000000
--- a/scripts/docker/zeppelin-interpreter/pip_packages.txt
+++ /dev/null
@@ -1 +0,0 @@
-bkzep==0.6.1
\ No newline at end of file
diff --git a/scripts/docker/zeppelin-server/Dockerfile 
b/scripts/docker/zeppelin-server/Dockerfile
index 1e1c9c374e..6a56062067 100644
--- a/scripts/docker/zeppelin-server/Dockerfile
+++ b/scripts/docker/zeppelin-server/Dockerfile
@@ -18,19 +18,19 @@ FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution
 
 # Prepare all interpreter settings for Zeppelin server
 # This steps are not needed, if you you add only specific interpreters 
settings to your image
-FROM alpine:3.11 AS interpreter-settings
+FROM alpine:3.13 AS interpreter-settings
 COPY --from=zeppelin-distribution /opt/zeppelin/interpreter /tmp/interpreter
 RUN mkdir -p /opt/zeppelin/interpreter && \
     cd /tmp/interpreter && \
     find . -name 'interpreter-setting.json' -exec cp --parents \{\} 
/opt/zeppelin/interpreter \;
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 LABEL maintainer="Apache Software Foundation <d...@zeppelin.apache.org>"
 
 RUN set -ex && \
     apt-get -y update && \
     # Install language and other base packages
-    DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en 
openjdk-8-jre-headless tini wget && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en 
openjdk-11-jre-headless tini wget && \
     # Cleanup
     rm -rf /var/lib/apt/lists/* && \
     apt-get autoclean && \

Reply via email to