This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new aff90f4 [ZEPPELIN-5126]. Allow user to specify spark.yarn.keytab and spark.yarn.principal for user impersonation aff90f4 is described below commit aff90f46c86bc5a4a986d984018a9d27f6319a40 Author: Jeff Zhang <zjf...@apache.org> AuthorDate: Tue Nov 10 10:26:07 2020 +0800 [ZEPPELIN-5126]. Allow user to specify spark.yarn.keytab and spark.yarn.principal for user impersonation ### What is this PR for? This is to improve the spark kerbose support in both non-user impersonation and user impersonation case. User just need to specify either specify `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` in zeppelin-site.xml or specify the spark standard setting `spark.yarn.keytab` and `spark.yarn.principal` in spark interpreter setting. ### What type of PR is it? [Improvement] ### Todos * [ ] - Task ### What is the Jira issue? * https://issues.apache.org/jira/browse/ZEPPELIN-5126 ### How should this be tested? * CI pass ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Jeff Zhang <zjf...@apache.org> Closes #3967 from zjffdu/ZEPPELIN-5126 and squashes the following commits: 3fc83ee7b [Jeff Zhang] [ZEPPELIN-5126]. Allow user to specify spark.yarn.keytab and spark.yarn.principal for user impersonation --- docs/interpreter/spark.md | 18 ++++++++++++++---- .../interpreter/launcher/SparkInterpreterLauncher.java | 17 ++++++++++------- .../launcher/SparkInterpreterLauncherTest.java | 10 ++++------ 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index 537ae60..105cc74 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -456,6 +456,20 @@ e.g. Zeppelin automatically injects `ZeppelinContext` as variable `z` in your Scala/Python environment. `ZeppelinContext` provides some additional functions and utilities. See [Zeppelin-Context](../usage/other_features/zeppelin_context.html) for more details. +## Setting up Zeppelin with Kerberos +Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN: + +<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/kdc_zeppelin.png"> + +There're several ways to make spark work with kerberos enabled hadoop cluster in Zeppelin. + +1. Share one single hadoop cluster. +In this case you just need to specify `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal` in zeppelin-site.xml, Spark interpreter will use these setting by default. + +2. Work with multiple hadoop clusters. +In this case you can specify `spark.yarn.keytab` and `spark.yarn.principal` to override `zeppelin.server.kerberos.keytab` and `zeppelin.server.kerberos.principal`. + + ## User Impersonation In yarn mode, the user who launch the zeppelin server will be used to launch the spark yarn application. This is not a good practise. @@ -482,10 +496,6 @@ you need to enable user impersonation for more security control. In order the en impersonate in `zeppelin-site.xml`. -## Setting up Zeppelin with Kerberos -Logical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN: - -<img src="{{BASE_PATH}}/assets/themes/zeppelin/img/docs-img/kdc_zeppelin.png"> ## Deprecate Spark 2.2 and earlier versions Starting from 0.9, Zeppelin deprecate Spark 2.2 and earlier versions. So you will see a warning message when you use Spark 2.2 and earlier. diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java index dba9b03..e25e7da 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncher.java @@ -163,12 +163,14 @@ public class SparkInterpreterLauncher extends StandardInterpreterLauncher { } } - for (String name : sparkProperties.stringPropertyNames()) { - sparkConfBuilder.append(" --conf " + name + "=" + sparkProperties.getProperty(name)); - } - if (context.getOption().isUserImpersonate() && zConf.getZeppelinImpersonateSparkProxyUser()) { sparkConfBuilder.append(" --proxy-user " + context.getUserName()); + sparkProperties.remove("spark.yarn.keytab"); + sparkProperties.remove("spark.yarn.principal"); + } + + for (String name : sparkProperties.stringPropertyNames()) { + sparkConfBuilder.append(" --conf " + name + "=" + sparkProperties.getProperty(name)); } env.put("ZEPPELIN_SPARK_CONF", sparkConfBuilder.toString()); @@ -185,9 +187,10 @@ public class SparkInterpreterLauncher extends StandardInterpreterLauncher { } } - String keytab = zConf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_SERVER_KERBEROS_KEYTAB); - String principal = - zConf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_SERVER_KERBEROS_PRINCIPAL); + String keytab = properties.getProperty("spark.yarn.keytab", + zConf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_SERVER_KERBEROS_KEYTAB)); + String principal = properties.getProperty("spark.yarn.principal", + zConf.getString(ZeppelinConfiguration.ConfVars.ZEPPELIN_SERVER_KERBEROS_PRINCIPAL)); if (!StringUtils.isBlank(keytab) && !StringUtils.isBlank(principal)) { env.put("ZEPPELIN_SERVER_KERBEROS_KEYTAB", keytab); diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java index 5195710..6aff86a 100644 --- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/launcher/SparkInterpreterLauncherTest.java @@ -253,14 +253,13 @@ public class SparkInterpreterLauncherTest { zeppelinHome + "/interpreter/zeppelin-interpreter-shaded-" + Util.getVersion() + ".jar"; String sparkrZip = sparkHome + "/R/lib/sparkr.zip#sparkr"; String sparkFiles = "file_1," + zeppelinHome + "/conf/log4j_yarn_cluster.properties"; - assertEquals(" --conf spark.yarn.dist.archives=" + sparkrZip + + assertEquals(" --proxy-user user1 --conf spark.yarn.dist.archives=" + sparkrZip + " --conf spark.yarn.isPython=true --conf spark.app.name=intpGroupId" + " --conf spark.yarn.maxAppAttempts=1" + " --conf spark.master=yarn" + " --conf spark.files=" + sparkFiles + " --conf spark.jars=" + sparkJars + " --conf spark.submit.deployMode=cluster" + - " --conf spark.yarn.submit.waitAppCompletion=false" + - " --proxy-user user1", + " --conf spark.yarn.submit.waitAppCompletion=false", interpreterProcess.getEnv().get("ZEPPELIN_SPARK_CONF")); Files.deleteIfExists(Paths.get(localRepoPath.toAbsolutePath().toString(), "test.jar")); FileUtils.deleteDirectory(localRepoPath.toFile()); @@ -302,15 +301,14 @@ public class SparkInterpreterLauncherTest { String sparkrZip = sparkHome + "/R/lib/sparkr.zip#sparkr"; // escape special characters String sparkFiles = "{}," + zeppelinHome + "/conf/log4j_yarn_cluster.properties"; - assertEquals(" --conf spark.yarn.dist.archives=" + sparkrZip + + assertEquals(" --proxy-user user1 --conf spark.yarn.dist.archives=" + sparkrZip + " --conf spark.yarn.isPython=true" + " --conf spark.app.name=intpGroupId" + " --conf spark.yarn.maxAppAttempts=1" + " --conf spark.master=yarn" + " --conf spark.files=" + sparkFiles + " --conf spark.jars=" + sparkJars + " --conf spark.submit.deployMode=cluster" + - " --conf spark.yarn.submit.waitAppCompletion=false" + - " --proxy-user user1", + " --conf spark.yarn.submit.waitAppCompletion=false", interpreterProcess.getEnv().get("ZEPPELIN_SPARK_CONF")); FileUtils.deleteDirectory(localRepoPath.toFile()); }