This is an automated email from the ASF dual-hosted git repository.
jongyoul pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push:
new 18ffb3441f [ZEPPELIN-6040] Run mode "docker" not working properly
(#4780)
18ffb3441f is described below
commit 18ffb3441fffda2c97c9ca1ab7178e9756e7c632
Author: ChanHo Lee <[email protected]>
AuthorDate: Sun Aug 25 14:07:54 2024 +0900
[ZEPPELIN-6040] Run mode "docker" not working properly (#4780)
* Allow Different ZEPPELIN_HOME Variables for Host and Container
* Avoid Injecting Host JAVA_HOME into Container Environments
* Avoid injecting host PATH env variable into container envs
* Fix interpreter dockerfile for docker run mode
* Increased initial sleep time to ensure Docker interpreter process starts
successfully
* Fix interpreter Dockefile base image tag
Co-authored-by: Jongyoul Lee <[email protected]>
* Fix checkstyle
* Bump Spark version in dockerfile to 3.5.1
* Update scripts/docker/interpreter/Dockerfile MAINTAINER
Co-authored-by: Cheng Pan <[email protected]>
* Change dockerfile MAINTAINER instruction to LABEL instruction
- MAINTAINER instruction is deprecated
* Change default zeppelin.docker.container.spark.home configuration to
'/opt/spark'
- To align it with Spark on K8S mode
* Change Spark version and bin name variables in interpreter dockerfile
* Fix interpreter dockerfile
---------
Co-authored-by: Jongyoul Lee <[email protected]>
Co-authored-by: Cheng Pan <[email protected]>
---
scripts/docker/interpreter/Dockerfile | 20 +++++-----
.../zeppelin/conf/ZeppelinConfiguration.java | 3 +-
.../launcher/DockerInterpreterProcess.java | 46 +++++++++++++++-------
.../launcher/DockerInterpreterProcessTest.java | 2 +-
4 files changed, 45 insertions(+), 26 deletions(-)
diff --git a/scripts/docker/interpreter/Dockerfile
b/scripts/docker/interpreter/Dockerfile
index e88686520f..ab7f9668e1 100644
--- a/scripts/docker/interpreter/Dockerfile
+++ b/scripts/docker/interpreter/Dockerfile
@@ -13,11 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-FROM apache/zeppelin:0.8.0
-MAINTAINER Apache Software Foundation <[email protected]>
+FROM apache/zeppelin:0.11.2-SNAPSHOT
+LABEL maintainer="Apache Zeppelin Community <[email protected]>"
-ENV SPARK_VERSION=2.3.3
-ENV HADOOP_VERSION=2.7
+ARG SPARK_VERSION=3.5.1
+ARG SPARK_BIN_NAME=hadoop3
+
+USER root
# support Kerberos certification
RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install
-yq krb5-user libpam-krb5 && apt-get clean
@@ -25,10 +27,10 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update
&& apt-get install -
RUN apt-get update && apt-get install -y curl unzip wget grep sed vim tzdata
&& apt-get clean
# auto upload zeppelin interpreter lib
-RUN rm -rf /zeppelin
+RUN rm -rf /opt/zeppelin
RUN rm -rf /spark
-RUN wget
https://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
-RUN tar zxvf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
-RUN mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
-RUN rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
+RUN wget
https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME}.tgz
+RUN tar zxvf spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME}.tgz
+RUN mv spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME} /opt/spark
+RUN rm spark-${SPARK_VERSION}-bin-${SPARK_BIN_NAME}.tgz
diff --git
a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index 5fa0a9d00b..566242145f 100644
---
a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++
b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -1106,8 +1106,9 @@ public class ZeppelinConfiguration {
// Used by K8s and Docker plugin
ZEPPELIN_DOCKER_CONTAINER_IMAGE("zeppelin.docker.container.image",
"apache/zeppelin:" + Util.getVersion()),
+ ZEPPELIN_DOCKER_CONTAINER_HOME("zeppelin.docker.container.home",
"/opt/zeppelin"),
-
ZEPPELIN_DOCKER_CONTAINER_SPARK_HOME("zeppelin.docker.container.spark.home",
"/spark"),
+
ZEPPELIN_DOCKER_CONTAINER_SPARK_HOME("zeppelin.docker.container.spark.home",
"/opt/spark"),
ZEPPELIN_DOCKER_UPLOAD_LOCAL_LIB_TO_CONTAINTER("zeppelin.docker.upload.local.lib.to.container",
true),
ZEPPELIN_DOCKER_HOST("zeppelin.docker.host", "http://0.0.0.0:2375"),
ZEPPELIN_DOCKER_TIME_ZONE("zeppelin.docker.time.zone",
TimeZone.getDefault().getID()),
diff --git
a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
index 8009c865d1..643afb2061 100644
---
a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
+++
b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
@@ -96,6 +96,7 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
private ZeppelinConfiguration zConf;
private String zeppelinHome;
+ private final String containerZeppelinHome;
@VisibleForTesting
final String containerSparkHome;
@@ -130,6 +131,7 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
this.zConf = zConf;
this.containerName = interpreterGroupId.toLowerCase();
+ containerZeppelinHome =
zConf.getString(ConfVars.ZEPPELIN_DOCKER_CONTAINER_HOME);
containerSparkHome =
zConf.getString(ConfVars.ZEPPELIN_DOCKER_CONTAINER_SPARK_HOME);
uploadLocalLibToContainter = zConf.getBoolean(
ConfVars.ZEPPELIN_DOCKER_UPLOAD_LOCAL_LIB_TO_CONTAINTER);
@@ -190,7 +192,7 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
// check if the interpreter process exit script
// if interpreter process exit, then container need exit
StringBuilder sbStartCmd = new StringBuilder();
- sbStartCmd.append("sleep 10; ");
+ sbStartCmd.append("sleep 20; ");
sbStartCmd.append("process=RemoteInterpreterServer; ");
sbStartCmd.append("RUNNING_PIDS=$(ps x | grep $process | grep -v grep |
awk '{print $1}'); ");
sbStartCmd.append("while [ ! -z \"$RUNNING_PIDS\" ]; ");
@@ -241,6 +243,7 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
long timeoutTime = startTime + getConnectTimeout();
// wait until interpreter send dockerStarted message through thrift rpc
synchronized (dockerStarted) {
+ LOGGER.info("Waiting for interpreter container to be ready");
while (!dockerStarted.get() && !Thread.currentThread().isInterrupted()) {
long timeToTimeout = timeoutTime - System.currentTimeMillis();
if (timeToTimeout <= 0) {
@@ -293,7 +296,7 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
Properties dockerProperties = new Properties();
// docker template properties
- dockerProperties.put("CONTAINER_ZEPPELIN_HOME", zeppelinHome);
+ dockerProperties.put("CONTAINER_ZEPPELIN_HOME", containerZeppelinHome);
dockerProperties.put("zeppelin.interpreter.container.image",
containerImage);
dockerProperties.put("zeppelin.interpreter.group.id", interpreterGroupId);
dockerProperties.put("zeppelin.interpreter.group.name",
interpreterGroupName);
@@ -313,11 +316,17 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
@VisibleForTesting
List<String> getListEnvs() {
// environment variables
- envs.put("ZEPPELIN_HOME", zeppelinHome);
- envs.put("ZEPPELIN_CONF_DIR", zeppelinHome + "/conf");
+ envs.put("ZEPPELIN_HOME", containerZeppelinHome);
+ envs.put("ZEPPELIN_CONF_DIR", containerZeppelinHome + "/conf");
envs.put("ZEPPELIN_FORCE_STOP", "true");
envs.put("SPARK_HOME", this.containerSparkHome);
+ // remove JAVA_HOME from envs to avoid misconfiguration in container
+ envs.remove("JAVA_HOME");
+
+ // remove PATH from envs to avoid misconfiguration in container
+ envs.remove("PATH");
+
// set container time zone
envs.put("TZ", zConf.getString(ConfVars.ZEPPELIN_DOCKER_TIME_ZONE));
@@ -441,18 +450,20 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
HashMap<String, String> copyFiles = new HashMap<>();
// Rebuild directory
- rmInContainer(containerId, zeppelinHome);
- mkdirInContainer(containerId, zeppelinHome);
+ rmInContainer(containerId, containerZeppelinHome);
+ mkdirInContainer(containerId, containerZeppelinHome);
// 1) zeppelin-site.xml is uploaded to `${CONTAINER_ZEPPELIN_HOME}`
directory in the container
String confPath = "/conf";
String zeplConfPath = getPathByHome(zeppelinHome, confPath);
- mkdirInContainer(containerId, zeplConfPath);
- copyFiles.put(zeplConfPath + "/zeppelin-site.xml", zeplConfPath +
"/zeppelin-site.xml");
- copyFiles.put(zeplConfPath + "/log4j.properties", zeplConfPath +
"/log4j.properties");
+ mkdirInContainer(containerId, containerZeppelinHome);
+ String containerZeplConfPath = containerZeppelinHome + confPath;
+ copyFiles.put(
+ zeplConfPath + "/zeppelin-site.xml", containerZeplConfPath +
"/zeppelin-site.xml");
+ copyFiles.put(zeplConfPath + "/log4j.properties", containerZeplConfPath +
"/log4j.properties");
copyFiles.put(zeplConfPath + "/log4j_yarn_cluster.properties",
- zeplConfPath + "/log4j_yarn_cluster.properties");
+ containerZeplConfPath + "/log4j_yarn_cluster.properties");
// 2) upload krb5.conf to container
String krb5conf = "/etc/krb5.conf";
@@ -512,26 +523,31 @@ public class DockerInterpreterProcess extends
RemoteInterpreterProcess {
// directory in the container
String binPath = "/bin";
String zeplBinPath = getPathByHome(zeppelinHome, binPath);
- mkdirInContainer(containerId, zeplBinPath);
- docker.copyToContainer(new File(zeplBinPath).toPath(), containerId,
zeplBinPath);
+ String containerZeplBinPath = containerZeppelinHome + binPath;
+ mkdirInContainer(containerId, containerZeplBinPath);
+ docker.copyToContainer(new File(zeplBinPath).toPath(), containerId,
containerZeplBinPath);
// 7) ${ZEPPELIN_HOME}/interpreter/spark is uploaded to
`${CONTAINER_ZEPPELIN_HOME}`
// directory in the container
String intpGrpPath = "/interpreter/" + interpreterGroupName;
String intpGrpAllPath = getPathByHome(zeppelinHome, intpGrpPath);
- mkdirInContainer(containerId, intpGrpAllPath);
- docker.copyToContainer(new File(intpGrpAllPath).toPath(), containerId,
intpGrpAllPath);
+ String containerIntpGrpPath = containerZeppelinHome + intpGrpPath;
+ mkdirInContainer(containerId, containerIntpGrpPath);
+ docker.copyToContainer(new File(intpGrpAllPath).toPath(), containerId,
containerIntpGrpPath);
// 8)
${ZEPPELIN_HOME}/lib/interpreter/zeppelin-interpreter-shaded-<version>.jar
// is uploaded to `${CONTAINER_ZEPPELIN_HOME}` directory in the
container
String intpPath = "/interpreter";
String intpAllPath = getPathByHome(zeppelinHome, intpPath);
+ String containerIntpAllPath = containerZeppelinHome + intpPath;
Collection<File> listFiles = FileUtils.listFiles(new File(intpAllPath),
FileFilterUtils.suffixFileFilter("jar"), null);
for (File jarfile : listFiles) {
String jarfilePath = jarfile.getAbsolutePath();
+ String jarfileName = jarfile.getName();
+ String containerJarfilePath = containerIntpAllPath + "/" + jarfileName;
if (!StringUtils.isBlank(jarfilePath)) {
- copyFiles.putIfAbsent(jarfilePath, jarfilePath);
+ copyFiles.putIfAbsent(jarfilePath, containerJarfilePath);
}
}
}
diff --git
a/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java
b/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java
index 6f0c7c3375..1e9ad7d0bd 100644
---
a/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java
+++
b/zeppelin-plugins/launcher/docker/src/test/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcessTest.java
@@ -55,7 +55,7 @@ class DockerInterpreterProcessTest {
DockerInterpreterProcess interpreterProcess = (DockerInterpreterProcess)
client;
assertEquals("name", interpreterProcess.getInterpreterSettingName());
- assertEquals("/spark", interpreterProcess.containerSparkHome);
+ assertEquals("/opt/spark", interpreterProcess.containerSparkHome);
assertTrue(interpreterProcess.uploadLocalLibToContainter);
assertNotEquals("http://my-docker-host:2375",
interpreterProcess.dockerHost);
}