This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3946b24 [SPARK-31011][CORE] Log better message if SIGPWR is not
supported while setting up decommission
3946b24 is described below
commit 3946b243284fbd3bd98b456115ae194ad49fe8fe
Author: Jungtaek Lim (HeartSaVioR) <[email protected]>
AuthorDate: Wed Mar 11 20:27:00 2020 -0700
[SPARK-31011][CORE] Log better message if SIGPWR is not supported while
setting up decommission
### What changes were proposed in this pull request?
This patch changes to log better message (at least relevant to
decommission) when registering signal handler for SIGPWR fails. SIGPWR is
non-POSIX and not all unix-like OS support it; we can easily find the case,
macOS.
### Why are the changes needed?
Spark already logs message on failing to register handler for SIGPWR, but
the error message is too general which doesn't give the information of the
impact. End users should be noticed that failing to register handler for SIGPWR
effectively "disables" the feature of decommission.
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
Manually tested via running standalone master/worker in macOS 10.14.6, with
`spark.worker.decommission.enabled= true`, and submit an example application to
run executors.
(NOTE: the message may be different a bit, as the message can be updated in
review phase.)
For worker log:
```
20/03/06 17:19:13 INFO Worker: Registering SIGPWR handler to trigger
decommissioning.
20/03/06 17:19:13 INFO SignalUtils: Registering signal handler for PWR
20/03/06 17:19:13 WARN SignalUtils: Failed to register SIGPWR - disabling
worker decommission.
java.lang.IllegalArgumentException: Unknown signal: PWR
at java.base/jdk.internal.misc.Signal.<init>(Signal.java:148)
at jdk.unsupported/sun.misc.Signal.<init>(Signal.java:139)
at
org.apache.spark.util.SignalUtils$.$anonfun$registerSignal$1(SignalUtils.scala:95)
at
scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)
at
org.apache.spark.util.SignalUtils$.registerSignal(SignalUtils.scala:93)
at org.apache.spark.util.SignalUtils$.register(SignalUtils.scala:81)
at org.apache.spark.deploy.worker.Worker.<init>(Worker.scala:73)
at
org.apache.spark.deploy.worker.Worker$.startRpcEnvAndEndpoint(Worker.scala:887)
at org.apache.spark.deploy.worker.Worker$.main(Worker.scala:855)
at org.apache.spark.deploy.worker.Worker.main(Worker.scala)
```
For executor:
```
20/03/06 17:21:52 INFO CoarseGrainedExecutorBackend: Registering PWR
handler.
20/03/06 17:21:52 INFO SignalUtils: Registering signal handler for PWR
20/03/06 17:21:52 WARN SignalUtils: Failed to register SIGPWR - disabling
decommission feature.
java.lang.IllegalArgumentException: Unknown signal: PWR
at java.base/jdk.internal.misc.Signal.<init>(Signal.java:148)
at jdk.unsupported/sun.misc.Signal.<init>(Signal.java:139)
at
org.apache.spark.util.SignalUtils$.$anonfun$registerSignal$1(SignalUtils.scala:95)
at
scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)
at
org.apache.spark.util.SignalUtils$.registerSignal(SignalUtils.scala:93)
at org.apache.spark.util.SignalUtils$.register(SignalUtils.scala:81)
at
org.apache.spark.executor.CoarseGrainedExecutorBackend.onStart(CoarseGrainedExecutorBackend.scala:86)
at
org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:120)
at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:203)
at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100)
at
org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75)
at
org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41)
at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
```
Closes #27832 from HeartSaVioR/SPARK-31011.
Authored-by: Jungtaek Lim (HeartSaVioR) <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/deploy/worker/Worker.scala | 3 +-
.../executor/CoarseGrainedExecutorBackend.scala | 3 +-
.../scala/org/apache/spark/util/SignalUtils.scala | 43 +++++++++++++++++-----
3 files changed, 37 insertions(+), 12 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 738caf9..aa8c46f 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -69,7 +69,8 @@ private[deploy] class Worker(
// If worker decommissioning is enabled register a handler on PWR to
shutdown.
if (conf.get(WORKER_DECOMMISSION_ENABLED)) {
logInfo("Registering SIGPWR handler to trigger decommissioning.")
- SignalUtils.register("PWR")(decommissionSelf)
+ SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
+ "disabling worker decommission feature.")(decommissionSelf)
} else {
logInfo("Worker decommissioning not enabled, SIGPWR will result in
exiting.")
}
diff --git
a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index faf03a6..6625457 100644
---
a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++
b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -82,7 +82,8 @@ private[spark] class CoarseGrainedExecutorBackend(
override def onStart(): Unit = {
logInfo("Registering PWR handler.")
- SignalUtils.register("PWR")(decommissionSelf)
+ SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
+ "disabling decommission feature.")(decommissionSelf)
logInfo("Connecting to driver: " + driverUrl)
try {
diff --git a/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
b/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
index 230195d..36ecb42 100644
--- a/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
@@ -56,17 +56,40 @@ private[spark] object SignalUtils extends Logging {
*
* All actions for a given signal are run in a separate thread.
*/
- def register(signal: String)(action: => Boolean): Unit = synchronized {
+ def register(signal: String)(action: => Boolean): Unit = {
if (SystemUtils.IS_OS_UNIX) {
- try {
- val handler = handlers.getOrElseUpdate(signal, {
- logInfo("Registering signal handler for " + signal)
- new ActionHandler(new Signal(signal))
- })
- handler.register(action)
- } catch {
- case ex: Exception => logWarning(s"Failed to register signal handler
for " + signal, ex)
- }
+ register(signal, s"Failed to register signal handler for $signal",
+ logStackTrace = true)(action)
+ }
+ }
+
+ /**
+ * Adds an action to be run when a given signal is received by this process.
+ *
+ * This method receives failMessage as additional parameter, which would be
logged when it fails
+ * to register the signal. Here the failures include the cases 1) OS doesn't
support signal at
+ * all 2) OS doesn't support given signal (Could be possible with non-POSIX
signals)
+ *
+ * All actions for a given signal are run in a separate thread.
+ */
+ def register(
+ signal: String,
+ failMessage: String,
+ logStackTrace: Boolean = true)(
+ action: => Boolean): Unit = synchronized {
+ try {
+ val handler = handlers.getOrElseUpdate(signal, {
+ logInfo(s"Registering signal handler for $signal")
+ new ActionHandler(new Signal(signal))
+ })
+ handler.register(action)
+ } catch {
+ case ex: Exception =>
+ if (logStackTrace) {
+ logWarning(failMessage, ex)
+ } else {
+ logWarning(failMessage)
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]