This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new 768bbf9 [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter 768bbf9 is described below commit 768bbf9ff4f735c2f3e65085cb8fdc6eabff779f Author: Jeff Zhang <zjf...@apache.org> AuthorDate: Tue Mar 10 23:39:11 2020 +0800 [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter ### What is this PR for? Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So in this PR, I introduce one configuration `zeppelin.spark.only_yarn_cluster`, by default it is turned off. When it is turned on, only yarn-cluster mode can be used. ### What type of PR is it? [ Improvement ] ### Todos * [ ] - Task ### What is the Jira issue? * https://issues.apache.org/jira/browse/ZEPPELIN-4673 ### How should this be tested? * Manually tested ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Jeff Zhang <zjf...@apache.org> Closes #3697 from zjffdu/ZEPPELIN-4673 and squashes the following commits: d51e423a8 [Jeff Zhang] [ZEPPELIN-4673]. Add option to only allow yarn-cluster mode for spark interpreter --- conf/zeppelin-site.xml.template | 6 ++++++ docs/interpreter/spark.md | 2 ++ .../main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java | 7 ++++++- .../zeppelin/interpreter/launcher/SparkInterpreterLauncher.java | 4 ++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index 8ef4edc..6d33433 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -718,4 +718,10 @@ Disable it can save lots of memory</description> </property> +<property> + <name>zeppelin.spark.only_yarn_cluster</name> + <value>false</value> + <description>Whether only allow yarn cluster mode</description> +</property> + </configuration> diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index 277efb3..3c07e01 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -259,6 +259,8 @@ For the further information about Spark & Zeppelin version compatibility, please > Note that without exporting `SPARK_HOME`, it's running in local mode with > included version of Spark. The included version may vary depending on the > build profile. +> Yarn client mode and local mode will run driver in the same machine with zeppelin server, this would be dangerous for production. Because it may run out of memory when there's many spark interpreters running at the same time. So we suggest you only allow yarn-cluster mode via setting `zeppelin.spark.only_yarn_cluster` in `zeppelin-site.xml`. + ## SparkContext, SQLContext, SparkSession, ZeppelinContext SparkContext, SQLContext, SparkSession (for spark 2.x) and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext`, `spark` and `z`, respectively, in Scala, Kotlin, Python and R environments. diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java index dfcf840..cbd894e 100644 --- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java +++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java @@ -739,6 +739,10 @@ public class ZeppelinConfiguration extends XMLConfiguration { return getRelativeDir(ConfVars.ZEPPELIN_SEARCH_INDEX_PATH); } + public Boolean isOnlyYarnCluster() { + return getBoolean(ConfVars.ZEPPELIN_SPARK_ONLY_YARN_CLUSTER); + } + public String getClusterAddress() { return getString(ConfVars.ZEPPELIN_CLUSTER_ADDR); } @@ -998,7 +1002,8 @@ public class ZeppelinConfiguration extends XMLConfiguration { ZEPPELIN_SEARCH_INDEX_REBUILD("zeppelin.search.index.rebuild", false), ZEPPELIN_SEARCH_USE_DISK("zeppelin.search.use.disk", true), ZEPPELIN_SEARCH_INDEX_PATH("zeppelin.search.index.path", "/tmp/zeppelin-index"), - ZEPPELIN_JOBMANAGER_ENABLE("zeppelin.jobmanager.enable", true); + ZEPPELIN_JOBMANAGER_ENABLE("zeppelin.jobmanager.enable", false), + ZEPPELIN_SPARK_ONLY_YARN_CLUSTER("zeppelin.spark.only_yarn_cluster", false); private String varName; @SuppressWarnings("rawtypes") diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java index 5252eeb..9255c98 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java @@ -76,6 +76,10 @@ public class SparkInterpreterLauncher extends StandardInterpreterLauncher { if (isYarnMode() && getDeployMode().equals("cluster")) { env.put("ZEPPELIN_SPARK_YARN_CLUSTER", "true"); sparkProperties.setProperty("spark.yarn.submit.waitAppCompletion", "false"); + } else if (zConf.isOnlyYarnCluster()){ + throw new IOException("Only yarn-cluster mode is allowed, please set " + + ZeppelinConfiguration.ConfVars.ZEPPELIN_SPARK_ONLY_YARN_CLUSTER.getVarName() + + " to false if you want to use other modes."); } StringBuilder sparkConfBuilder = new StringBuilder();