Repository: spark Updated Branches: refs/heads/master bc7041a42 -> 3bbeca648
[SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error The spark.local.dir is configured as a list of multiple paths as follows /data1/sparkenv/local,/data2/sparkenv/local. If the disk data2 of the driver node has error, the application will exit since DiskBlockManager exits directly at createLocalDirs. If the disk data2 of the worker node has error, the executor will exit either. DiskBlockManager should not exit directly at createLocalDirs if one of spark.local.dir has error. Since spark.local.dir has multiple paths, a problem should not affect the overall situation. I think DiskBlockManager could ignore the bad directory at createLocalDirs. Author: yantangzhai <[email protected]> Closes #1274 from YanTangZhai/SPARK-2324 and squashes the following commits: 609bf48 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error df08673 [yantangzhai] [SPARK-2324] SparkContext should not exit directly when spark.local.dir is a list of multiple paths and one of them has error Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3bbeca64 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3bbeca64 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3bbeca64 Branch: refs/heads/master Commit: 3bbeca648985b32bdf1eedef779cb2817eb6dfa4 Parents: bc7041a Author: yantangzhai <[email protected]> Authored: Thu Jul 3 10:14:35 2014 -0700 Committer: Aaron Davidson <[email protected]> Committed: Thu Jul 3 10:14:35 2014 -0700 ---------------------------------------------------------------------- .../org/apache/spark/storage/DiskBlockManager.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3bbeca64/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala index 2ec46d4..673fc19 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala @@ -44,6 +44,10 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD * directory, create multiple subdirectories that we will hash files into, in order to avoid * having really large inodes at the top level. */ private val localDirs: Array[File] = createLocalDirs() + if (localDirs.isEmpty) { + logError("Failed to create any local dir.") + System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR) + } private val subDirs = Array.fill(localDirs.length)(new Array[File](subDirsPerLocalDir)) private var shuffleSender : ShuffleSender = null @@ -116,7 +120,7 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD private def createLocalDirs(): Array[File] = { logDebug(s"Creating local directories at root dirs '$rootDirs'") val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss") - rootDirs.split(",").map { rootDir => + rootDirs.split(",").flatMap { rootDir => var foundLocalDir = false var localDir: File = null var localDirId: String = null @@ -136,11 +140,13 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD } } if (!foundLocalDir) { - logError(s"Failed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir") - System.exit(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR) + logError(s"Failed $MAX_DIR_CREATION_ATTEMPTS attempts to create local dir in $rootDir." + + " Ignoring this directory.") + None + } else { + logInfo(s"Created local directory at $localDir") + Some(localDir) } - logInfo(s"Created local directory at $localDir") - localDir } }
