This is an automated email from the ASF dual-hosted git repository. jongyoul pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new 18ffb3441f [ZEPPELIN-6040] Run mode "docker" not working properly (#4780) 18ffb3441f is described below commit 18ffb3441fffda2c97c9ca1ab7178e9756e7c632 Author: ChanHo Lee <chanho0...@gmail.com> AuthorDate: Sun Aug 25 14:07:54 2024 +0900 [ZEPPELIN-6040] Run mode "docker" not working properly (#4780) * Allow Different ZEPPELIN_HOME Variables for Host and Container * Avoid Injecting Host JAVA_HOME into Container Environments * Avoid injecting host PATH env variable into container envs * Fix interpreter dockerfile for docker run mode * Increased initial sleep time to ensure Docker interpreter process starts successfully * Fix interpreter Dockefile base image tag Co-authored-by: Jongyoul Lee <jongy...@gmail.com> * Fix checkstyle * Bump Spark version in dockerfile to 3.5.1 * Update scripts/docker/interpreter/Dockerfile MAINTAINER Co-authored-by: Cheng Pan <pan3...@gmail.com> * Change dockerfile MAINTAINER instruction to LABEL instruction - MAINTAINER instruction is deprecated * Change default zeppelin.docker.container.spark.home configuration to '/opt/spark' - To align it with Spark on K8S mode * Change Spark version and bin name variables in interpreter dockerfile * Fix interpreter dockerfile --------- Co-authored-by: Jongyoul Lee <jongy...@gmail.com> Co-authored-by: Cheng Pan <pan3...@gmail.com> --- scripts/docker/interpreter/Dockerfile | 20 +++++----- .../zeppelin/conf/ZeppelinConfiguration.java | 3 +- .../launcher/DockerInterpreterProcess.java | 46 +++++++++++++++------- .../launcher/DockerInterpreterProcessTest.java | 2 +- 4 files changed, 45 insertions(+), 26 deletions(-) diff --git a/scripts/docker/interpreter/Dockerfile b/scripts/docker/interpreter/Dockerfile index e88686520f..ab7f9668e1 100644 --- a/scripts/docker/interpreter/Dockerfile +++ b/scripts/docker/interpreter/Dockerfile @@ -13,11 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM apache/zeppelin:0.8.0 -MAINTAINER Apache Software Foundation <d...@zeppelin.apache.org> +FROM apache/zeppelin:0.11.2-SNAPSHOT +LABEL maintainer="Apache Zeppelin Community <d...@zeppelin.apache.org>" -ENV SPARK_VERSION=2.3.3 -ENV HADOOP_VERSION=2.7 +ARG SPARK_VERSION=3.5.1 +ARG SPARK_BIN_NAME=hadoop3 + +USER root # support Kerberos certification RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -yq krb5-user libpam-krb5 && apt-get clean @@ -25,10 +27,10 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install - RUN apt-get update && apt-get install -y curl unzip wget grep sed vim tzdata && apt-get clean # auto upload zeppelin interpreter lib -RUN rm -rf /zeppelin +RUN rm -rf /opt/zeppelin RUN rm -rf /spark -RUN wget https://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -RUN tar zxvf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -RUN mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark -RUN rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz +RUN wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME}.tgz +RUN tar zxvf spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME}.tgz +RUN mv spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME} /opt/spark +RUN rm spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME}.tgz diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java index 5fa0a9d00b..566242145f 100644 --- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java +++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java @@ -1106,8 +1106,9 @@ public class ZeppelinConfiguration { // Used by K8s and Docker plugin ZEPPELIN_DOCKER_CONTAINER_IMAGE("zeppelin.docker.container.image", "apache/zeppelin:" + Util.getVersion()), + ZEPPELIN_DOCKER_CONTAINER_HOME("zeppelin.docker.container.home", "/opt/zeppelin"), - ZEPPELIN_DOCKER_CONTAINER_SPARK_HOME("zeppelin.docker.container.spark.home", "/spark"), + ZEPPELIN_DOCKER_CONTAINER_SPARK_HOME("zeppelin.docker.container.spark.home", "/opt/spark"), ZEPPELIN_DOCKER_UPLOAD_LOCAL_LIB_TO_CONTAINTER("zeppelin.docker.upload.local.lib.to.container", true), ZEPPELIN_DOCKER_HOST("zeppelin.docker.host", "http://0.0.0.0:2375"), ZEPPELIN_DOCKER_TIME_ZONE("zeppelin.docker.time.zone", TimeZone.getDefault().getID()), diff --git a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java index 8009c865d1..643afb2061 100644 --- a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java +++ b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java @@ -96,6 +96,7 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { private ZeppelinConfiguration zConf; private String zeppelinHome; + private final String containerZeppelinHome; @VisibleForTesting final String containerSparkHome; @@ -130,6 +131,7 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { this.zConf = zConf; this.containerName = interpreterGroupId.toLowerCase(); + containerZeppelinHome = zConf.getString(ConfVars.ZEPPELIN_DOCKER_CONTAINER_HOME); containerSparkHome = zConf.getString(ConfVars.ZEPPELIN_DOCKER_CONTAINER_SPARK_HOME); uploadLocalLibToContainter = zConf.getBoolean( ConfVars.ZEPPELIN_DOCKER_UPLOAD_LOCAL_LIB_TO_CONTAINTER); @@ -190,7 +192,7 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { // check if the interpreter process exit script // if interpreter process exit, then container need exit StringBuilder sbStartCmd = new StringBuilder(); - sbStartCmd.append("sleep 10; "); + sbStartCmd.append("sleep 20; "); sbStartCmd.append("process=RemoteInterpreterServer; "); sbStartCmd.append("RUNNING_PIDS=$(ps x | grep $process | grep -v grep | awk '{print $1}'); "); sbStartCmd.append("while [ ! -z \"$RUNNING_PIDS\" ]; "); @@ -241,6 +243,7 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { long timeoutTime = startTime + getConnectTimeout(); // wait until interpreter send dockerStarted message through thrift rpc synchronized (dockerStarted) { + LOGGER.info("Waiting for interpreter container to be ready"); while (!dockerStarted.get() && !Thread.currentThread().isInterrupted()) { long timeToTimeout = timeoutTime - System.currentTimeMillis(); if (timeToTimeout <= 0) { @@ -293,7 +296,7 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { Properties dockerProperties = new Properties(); // docker template properties - dockerProperties.put("CONTAINER_ZEPPELIN_HOME", zeppelinHome); + dockerProperties.put("CONTAINER_ZEPPELIN_HOME", containerZeppelinHome); dockerProperties.put("zeppelin.interpreter.container.image", containerImage); dockerProperties.put("zeppelin.interpreter.group.id", interpreterGroupId); dockerProperties.put("zeppelin.interpreter.group.name", interpreterGroupName); @@ -313,11 +316,17 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { @VisibleForTesting List<String> getListEnvs() { // environment variables - envs.put("ZEPPELIN_HOME", zeppelinHome); - envs.put("ZEPPELIN_CONF_DIR", zeppelinHome + "/conf"); + envs.put("ZEPPELIN_HOME", containerZeppelinHome); + envs.put("ZEPPELIN_CONF_DIR", containerZeppelinHome + "/conf"); envs.put("ZEPPELIN_FORCE_STOP", "true"); envs.put("SPARK_HOME", this.containerSparkHome); + // remove JAVA_HOME from envs to avoid misconfiguration in container + envs.remove("JAVA_HOME"); + + // remove PATH from envs to avoid misconfiguration in container + envs.remove("PATH"); + // set container time zone envs.put("TZ", zConf.getString(ConfVars.ZEPPELIN_DOCKER_TIME_ZONE)); @@ -441,18 +450,20 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { HashMap<String, String> copyFiles = new HashMap<>(); // Rebuild directory - rmInContainer(containerId, zeppelinHome); - mkdirInContainer(containerId, zeppelinHome); + rmInContainer(containerId, containerZeppelinHome); + mkdirInContainer(containerId, containerZeppelinHome); // 1) zeppelin-site.xml is uploaded to `${CONTAINER_ZEPPELIN_HOME}` directory in the container String confPath = "/conf"; String zeplConfPath = getPathByHome(zeppelinHome, confPath); - mkdirInContainer(containerId, zeplConfPath); - copyFiles.put(zeplConfPath + "/zeppelin-site.xml", zeplConfPath + "/zeppelin-site.xml"); - copyFiles.put(zeplConfPath + "/log4j.properties", zeplConfPath + "/log4j.properties"); + mkdirInContainer(containerId, containerZeppelinHome); + String containerZeplConfPath = containerZeppelinHome + confPath; + copyFiles.put( + zeplConfPath + "/zeppelin-site.xml", containerZeplConfPath + "/zeppelin-site.xml"); + copyFiles.put(zeplConfPath + "/log4j.properties", containerZeplConfPath + "/log4j.properties"); copyFiles.put(zeplConfPath + "/log4j_yarn_cluster.properties", - zeplConfPath + "/log4j_yarn_cluster.properties"); + containerZeplConfPath + "/log4j_yarn_cluster.properties"); // 2) upload krb5.conf to container String krb5conf = "/etc/krb5.conf"; @@ -512,26 +523,31 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess { // directory in the container String binPath = "/bin"; String zeplBinPath = getPathByHome(zeppelinHome, binPath); - mkdirInContainer(containerId, zeplBinPath); - docker.copyToContainer(new File(zeplBinPath).toPath(), containerId, zeplBinPath); + String containerZeplBinPath = containerZeppelinHome + binPath; + mkdirInContainer(containerId, containerZeplBinPath); + docker.copyToContainer(new File(zeplBinPath).toPath(), containerId, containerZeplBinPath); // 7) ${ZEPPELIN_HOME}/interpreter/spark is uploaded to `${CONTAINER_ZEPPELIN_HOME}` // directory in the container String intpGrpPath = "/interpreter/" + interpreterGroupName; String intpGrpAllPath = getPathByHome(zeppelinHome, intpGrpPath); - mkdirInContainer(containerId, intpGrpAllPath); - docker.copyToContainer(new File(intpGrpAllPath).toPath(), containerId, intpGrpAllPath); + String containerIntpGrpPath = containerZeppelinHome + intpGrpPath; + mkdirInContainer(containerId, containerIntpGrpPath); + docker.copyToContainer(new File(intpGrpAllPath).toPath(), containerId, containerIntpGrpPath); // 8) ${ZEPPELIN_HOME}/lib/interpreter/zeppelin-interpreter-shaded-<version>.jar // is uploaded to `${CONTAINER_ZEPPELIN_HOME}` directory in the container String intpPath = "/interpreter"; String intpAllPath = getPathByHome(zeppelinHome, intpPath); + String containerIntpAllPath = containerZeppelinHome + intpPath; Collection<File> listFiles = FileUtils.listFiles(new File(intpAllPath), FileFilterUtils.suffixFileFilter("jar"), null); for (File jarfile : listFiles) { String jarfilePath = jarfile.getAbsolutePath(); + String jarfileName = jarfile.getName(); + String containerJarfilePath = containerIntpAllPath + "/" + jarfileName; if (!StringUtils.isBlank(jarfilePath)) { - copyFiles.putIfAbsent(jarfilePath, jarfilePath); + copyFiles.putIfAbsent(jarfilePath, containerJarfilePath); } } } diff --git a/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java b/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java index 6f0c7c3375..1e9ad7d0bd 100644 --- a/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java +++ b/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java @@ -55,7 +55,7 @@ class DockerInterpreterProcessTest { DockerInterpreterProcess interpreterProcess = (DockerInterpreterProcess) client; assertEquals("name", interpreterProcess.getInterpreterSettingName()); - assertEquals("/spark", interpreterProcess.containerSparkHome); + assertEquals("/opt/spark", interpreterProcess.containerSparkHome); assertTrue(interpreterProcess.uploadLocalLibToContainter); assertNotEquals("http://my-docker-host:2375", interpreterProcess.dockerHost); }