Repository: spark Updated Branches: refs/heads/master e464f0ac2 -> 915f8eeb3
[SPARK-4584] [yarn] Remove security manager from Yarn AM. The security manager adds a lot of overhead to the runtime of the app, and causes a severe performance regression. Even stubbing out all unneeded methods (all except checkExit()) does not help. So, instead, penalize users who do an explicit System.exit() by leaving them in "undefined behavior" territory: if they do that, the Yarn backend won't be able to report the final app status to the RM. The result is that the final status of the application might not match the user's expectations. One side-effect of the change is that users who do an explicit System.exit() will lose the AM retry functionality. Since there is no way to know if the exit was because of success or failure, the AM right now errs on the side of it being a successful exit. Author: Marcelo Vanzin <[email protected]> Closes #3484 from vanzin/SPARK-4584 and squashes the following commits: 21f2502 [Marcelo Vanzin] Do not retry apps that use System.exit(). 4198b3b [Marcelo Vanzin] [SPARK-4584] [yarn] Remove security manager from Yarn AM. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/915f8eeb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/915f8eeb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/915f8eeb Branch: refs/heads/master Commit: 915f8eeb3a493a0bb4b8d05d795ddd21f373d2ff Parents: e464f0a Author: Marcelo Vanzin <[email protected]> Authored: Fri Nov 28 15:15:30 2014 -0500 Committer: Patrick Wendell <[email protected]> Committed: Fri Nov 28 15:16:05 2014 -0500 ---------------------------------------------------------------------- .../spark/deploy/yarn/ApplicationMaster.scala | 60 +++++--------------- 1 file changed, 14 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/915f8eeb/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala ---------------------------------------------------------------------- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index e90672c..987b337 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, @volatile private var exitCode = 0 @volatile private var unregistered = false @volatile private var finished = false - @volatile private var finalStatus = FinalApplicationStatus.UNDEFINED + @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED @volatile private var finalMsg: String = "" @volatile private var userClassThread: Thread = _ @@ -106,10 +106,14 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts if (!finished) { - // this shouldn't ever happen, but if it does assume weird failure - finish(FinalApplicationStatus.FAILED, - ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION, - "shutdown hook called without cleanly finishing") + // This happens when the user application calls System.exit(). We have the choice + // of either failing or succeeding at this point. We report success to avoid + // retrying applications that have succeeded (System.exit(0)), which means that + // applications that explicitly exit with a non-zero status will also show up as + // succeeded in the RM UI. + finish(finalStatus, + ApplicationMaster.EXIT_SUCCESS, + "Shutdown hook called before final status was reported.") } if (!unregistered) { @@ -164,17 +168,18 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, final def finish(status: FinalApplicationStatus, code: Int, msg: String = null) = synchronized { if (!finished) { + val inShutdown = Utils.inShutdown() logInfo(s"Final app status: ${status}, exitCode: ${code}" + Option(msg).map(msg => s", (reason: $msg)").getOrElse("")) exitCode = code finalStatus = status finalMsg = msg finished = true - if (Thread.currentThread() != reporterThread && reporterThread != null) { + if (!inShutdown && Thread.currentThread() != reporterThread && reporterThread != null) { logDebug("shutting down reporter thread") reporterThread.interrupt() } - if (Thread.currentThread() != userClassThread && userClassThread != null) { + if (!inShutdown && Thread.currentThread() != userClassThread && userClassThread != null) { logDebug("shutting down user thread") userClassThread.interrupt() } @@ -214,7 +219,6 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, private def runDriver(securityMgr: SecurityManager): Unit = { addAmIpFilter() - setupSystemSecurityManager() userClassThread = startUserClass() // This a bit hacky, but we need to wait until the spark.driver.port property has @@ -403,45 +407,9 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, } /** - * This system security manager applies to the entire process. - * It's main purpose is to handle the case if the user code does a System.exit. - * This allows us to catch that and properly set the YARN application status and - * cleanup if needed. - */ - private def setupSystemSecurityManager(): Unit = { - try { - var stopped = false - System.setSecurityManager(new java.lang.SecurityManager() { - override def checkExit(paramInt: Int) { - if (!stopped) { - logInfo("In securityManager checkExit, exit code: " + paramInt) - if (paramInt == 0) { - finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS) - } else { - finish(FinalApplicationStatus.FAILED, - paramInt, - "User class exited with non-zero exit code") - } - stopped = true - } - } - // required for the checkExit to work properly - override def checkPermission(perm: java.security.Permission): Unit = {} - }) - } - catch { - case e: SecurityException => - finish(FinalApplicationStatus.FAILED, - ApplicationMaster.EXIT_SECURITY, - "Error in setSecurityManager") - logError("Error in setSecurityManager:", e) - } - } - - /** * Start the user class, which contains the spark driver, in a separate Thread. - * If the main routine exits cleanly or exits with System.exit(0) we - * assume it was successful, for all other cases we assume failure. + * If the main routine exits cleanly or exits with System.exit(N) for any N + * we assume it was successful, for all other cases we assume failure. * * Returns the user thread that was started. */ --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
