This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 847b65eac370 [SPARK-46456][CORE] Add `spark.ui.jettyStopTimeout` to
set Jetty server stop timeout to unblock SparkContext shutdown
847b65eac370 is described below
commit 847b65eac370fc8ef98c617a2934b2fa0fcee250
Author: Kent Yao <[email protected]>
AuthorDate: Wed Dec 20 15:04:45 2023 -0800
[SPARK-46456][CORE] Add `spark.ui.jettyStopTimeout` to set Jetty server
stop timeout to unblock SparkContext shutdown
### What changes were proposed in this pull request?
The `_stopTimeout` sets a graceful stop time for each ContainerLifeCycle.
This pull request aims to address the issue of interrupting shutdown hooks
during the shutdown process. By setting the _stopTimeout to 5 seconds, we can
reduce the risk of causing modules such as MapOutputTracker and BlockManager in
the SparkContext to not be properly stopped, resulting in uncleaned resources.
-
https://github.com/jetty/jetty.project/blob/1f34ece62b918a006231258474f5fa370c49df29/jetty-util/src/main/java/org/eclipse/jetty/util/component/AbstractLifeCycle.java#L53
```
private long _stopTimeout = 30000;
```
This pull request reduces the value to 5 seconds, taking into account the
value from the
[QueuedThreadPool](https://git.eclipse.org/c/jetty/org.eclipse.jetty.project.git/tree/jetty-util/src/main/java/org/eclipse/jetty/util/thread/QueuedThreadPool.java#n96)
### Why are the changes needed?
In Jetty, the ContainerLifeCycle implementation manages a collection of
contained beans. For managed beans, it stops them one by one and waits for each
to stop for a specified time(30s). A single bean can result in the shutdown
hook timeout.
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
This can be reproduced easily by local-cluster with proxied SparkUI.
#### Before
```
23/12/19 17:07:40 DEBUG QueuedThreadPool: Waiting for
Thread[MasterUI-81,5,main] for 14999
23/12/19 17:07:55 DEBUG QueuedThreadPool: Waiting for
Thread[MasterUI-81,5,main] for 14999
```
```
23/12/19 17:08:09 WARN ShutdownHookManager: ShutdownHook '' timeout,
java.util.concurrent.TimeoutException
java.util.concurrent.TimeoutException
at
java.base/java.util.concurrent.FutureTask.get(FutureTask.java:204)
at
org.apache.hadoop.util.ShutdownHookManager.executeShutdown(ShutdownHookManager.java:124)
at
org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:95)
23/12/19 17:08:09 ERROR Utils: Uncaught exception in thread shutdown-hook-0
java.lang.InterruptedException
at
java.base/java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:1679)
at
java.base/java.util.concurrent.ThreadPoolExecutor.awaitTermination(ThreadPoolExecutor.java:1464)
at org.apache.spark.rpc.netty.MessageLoop.stop(MessageLoop.scala:60)
at org.apache.spark.rpc.netty.Dispatcher.stop(Dispatcher.scala:205)
at
org.apache.spark.rpc.netty.NettyRpcEnv.cleanup(NettyRpcEnv.scala:333)
at
org.apache.spark.rpc.netty.NettyRpcEnv.shutdown(NettyRpcEnv.scala:311)
at
org.apache.spark.deploy.LocalSparkCluster.$anonfun$stop$4(LocalSparkCluster.scala:97)
at
org.apache.spark.deploy.LocalSparkCluster.$anonfun$stop$4$adapted(LocalSparkCluster.scala:97)
at scala.collection.IterableOnceOps.foreach(IterableOnce.scala:576)
at scala.collection.IterableOnceOps.foreach$(IterableOnce.scala:574)
at scala.collection.AbstractIterable.foreach(Iterable.scala:933)
at
org.apache.spark.deploy.LocalSparkCluster.stop(LocalSparkCluster.scala:97)
at
org.apache.spark.SparkContext$.$anonfun$createTaskScheduler$2(SparkContext.scala:3233)
at
org.apache.spark.SparkContext$.$anonfun$createTaskScheduler$2$adapted(SparkContext.scala:3232)
at
org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend.org$apache$spark$scheduler$cluster$StandaloneSchedulerBackend$$stop(StandaloneSchedulerBackend.scala:280)
at
org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend.stop(StandaloneSchedulerBackend.scala:143)
at
org.apache.spark.scheduler.SchedulerBackend.stop(SchedulerBackend.scala:34)
at
org.apache.spark.scheduler.SchedulerBackend.stop$(SchedulerBackend.scala:34)
at
org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.stop(CoarseGrainedSchedulerBackend.scala:55)
at
org.apache.spark.scheduler.TaskSchedulerImpl.$anonfun$stop$2(TaskSchedulerImpl.scala:992)
at
org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1288)
at
org.apache.spark.scheduler.TaskSchedulerImpl.stop(TaskSchedulerImpl.scala:992)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$stop$4(DAGScheduler.scala:3005)
at
org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1288)
at
org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:3005)
at
org.apache.spark.SparkContext.$anonfun$stop$12(SparkContext.scala:2293)
at
org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1288)
at org.apache.spark.SparkContext.stop(SparkContext.scala:2293)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.stop(SparkSQLEnv.scala:88)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.$anonfun$main$2(SparkSQLCLIDriver.scala:151)
at
org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214)
at
org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at
org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1842)
at
org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at scala.util.Try$.apply(Try.scala:210)
at
org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at
org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539)
at
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:840)
```
#### After
```
23/12/19 17:38:04 DEBUG QueuedThreadPool: Waiting for
Thread[MasterUI-78,5,main] for 2499
23/12/19 17:38:06 DEBUG QueuedThreadPool: Waiting for
Thread[MasterUI-78,5,main] for 2499
```
```
23/12/19 17:38:09 DEBUG QueuedThreadPool: Waiting for
Thread[MasterUI-81,5,main] for -3
23/12/19 17:38:09 WARN QueuedThreadPool: Couldn't stop
Thread[MasterUI-78,5,main]
at java.base17.0.9/sun.nio.ch.KQueue.poll(Native Method)
at
java.base17.0.9/sun.nio.ch.KQueueSelectorImpl.doSelect(KQueueSelectorImpl.java:122)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146)
at
app//org.sparkproject.jetty.io.ManagedSelector.nioSelect(ManagedSelector.java:183)
at
app//org.sparkproject.jetty.io.ManagedSelector.select(ManagedSelector.java:190)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.select(ManagedSelector.java:606)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.produce(ManagedSelector.java:543)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produceTask(EatWhatYouKill.java:362)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:186)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
at
app//org.sparkproject.jetty.io.ManagedSelector$$Lambda$775/0x000000c801527460.run(Unknown
Source)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
at java.base17.0.9/java.lang.Thread.run(Thread.java:840)
23/12/19 17:38:09 WARN QueuedThreadPool: Couldn't stop
Thread[MasterUI-79,5,main]
at java.base17.0.9/sun.nio.ch.KQueue.poll(Native Method)
at
java.base17.0.9/sun.nio.ch.KQueueSelectorImpl.doSelect(KQueueSelectorImpl.java:122)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146)
at
app//org.sparkproject.jetty.io.ManagedSelector.nioSelect(ManagedSelector.java:183)
at
app//org.sparkproject.jetty.io.ManagedSelector.select(ManagedSelector.java:190)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.select(ManagedSelector.java:606)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.produce(ManagedSelector.java:543)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produceTask(EatWhatYouKill.java:362)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:186)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
at
app//org.sparkproject.jetty.io.ManagedSelector$$Lambda$775/0x000000c801527460.run(Unknown
Source)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
at java.base17.0.9/java.lang.Thread.run(Thread.java:840)
23/12/19 17:38:09 WARN QueuedThreadPool: Couldn't stop
Thread[MasterUI-77,5,main]
at java.base17.0.9/sun.nio.ch.KQueue.poll(Native Method)
at
java.base17.0.9/sun.nio.ch.KQueueSelectorImpl.doSelect(KQueueSelectorImpl.java:122)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146)
at
app//org.sparkproject.jetty.io.ManagedSelector.nioSelect(ManagedSelector.java:183)
at
app//org.sparkproject.jetty.io.ManagedSelector.select(ManagedSelector.java:190)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.select(ManagedSelector.java:606)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.produce(ManagedSelector.java:543)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produceTask(EatWhatYouKill.java:362)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:186)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
at
app//org.sparkproject.jetty.io.ManagedSelector$$Lambda$775/0x000000c801527460.run(Unknown
Source)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
at java.base17.0.9/java.lang.Thread.run(Thread.java:840)
23/12/19 17:38:09 WARN QueuedThreadPool: Couldn't stop
Thread[MasterUI-82,5,main]
at java.base17.0.9/sun.nio.ch.KQueue.poll(Native Method)
at
java.base17.0.9/sun.nio.ch.KQueueSelectorImpl.doSelect(KQueueSelectorImpl.java:122)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146)
at
app//org.sparkproject.jetty.io.ManagedSelector.nioSelect(ManagedSelector.java:183)
at
app//org.sparkproject.jetty.io.ManagedSelector.select(ManagedSelector.java:190)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.select(ManagedSelector.java:606)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.produce(ManagedSelector.java:543)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produceTask(EatWhatYouKill.java:362)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:186)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
at
app//org.sparkproject.jetty.io.ManagedSelector$$Lambda$775/0x000000c801527460.run(Unknown
Source)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
at java.base17.0.9/java.lang.Thread.run(Thread.java:840)
23/12/19 17:38:09 WARN QueuedThreadPool: Couldn't stop
Thread[MasterUI-80,5,main]
at java.base17.0.9/sun.nio.ch.KQueue.poll(Native Method)
at
java.base17.0.9/sun.nio.ch.KQueueSelectorImpl.doSelect(KQueueSelectorImpl.java:122)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146)
at
app//org.sparkproject.jetty.io.ManagedSelector.nioSelect(ManagedSelector.java:183)
at
app//org.sparkproject.jetty.io.ManagedSelector.select(ManagedSelector.java:190)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.select(ManagedSelector.java:606)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.produce(ManagedSelector.java:543)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produceTask(EatWhatYouKill.java:362)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:186)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
at
app//org.sparkproject.jetty.io.ManagedSelector$$Lambda$775/0x000000c801527460.run(Unknown
Source)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
at java.base17.0.9/java.lang.Thread.run(Thread.java:840)
23/12/19 17:38:09 WARN QueuedThreadPool: Couldn't stop
Thread[MasterUI-81,5,main]
at java.base17.0.9/sun.nio.ch.KQueue.poll(Native Method)
at
java.base17.0.9/sun.nio.ch.KQueueSelectorImpl.doSelect(KQueueSelectorImpl.java:122)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:129)
at
java.base17.0.9/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:146)
at
app//org.sparkproject.jetty.io.ManagedSelector.nioSelect(ManagedSelector.java:183)
at
app//org.sparkproject.jetty.io.ManagedSelector.select(ManagedSelector.java:190)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.select(ManagedSelector.java:606)
at
app//org.sparkproject.jetty.io.ManagedSelector$SelectorProducer.produce(ManagedSelector.java:543)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produceTask(EatWhatYouKill.java:362)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:186)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at
app//org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
at
app//org.sparkproject.jetty.io.ManagedSelector$$Lambda$775/0x000000c801527460.run(Unknown
Source)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at
app//org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
at java.base17.0.9/java.lang.Thread.run(Thread.java:840)
23/12/19 17:38:09 INFO MapOutputTrackerMasterEndpoint:
MapOutputTrackerMasterEndpoint stopped!
23/12/19 17:38:09 INFO MemoryStore: MemoryStore cleared
23/12/19 17:38:09 INFO BlockManager: BlockManager stopped
23/12/19 17:38:09 INFO BlockManagerMaster: BlockManagerMaster stopped
23/12/19 17:38:09 INFO
OutputCommitCoordinator$OutputCommitCoordinatorEndpoint:
OutputCommitCoordinator stopped!
23/12/19 17:38:09 INFO SparkContext: Successfully stopped SparkContext
23/12/19 17:38:09 INFO ShutdownHookManager: Shutdown hook called
23/12/19 17:38:09 INFO ShutdownHookManager: Deleting directory
/private/var/folders/84/dgr9ykwn6yndcmq1kjxqvk200000gn/T/spark-8eabc592-87f7-4a3c-8884-594076b25df1
23/12/19 17:38:09 INFO ShutdownHookManager: Deleting directory
/private/var/folders/84/dgr9ykwn6yndcmq1kjxqvk200000gn/T/spark-04ca9e0a-819f-41bb-b67a-80356c4dcdd7
```
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #44413 from yaooqinn/SPARK-46456.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
core/src/main/scala/org/apache/spark/internal/config/UI.scala | 7 +++++++
core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 3 +++
2 files changed, 10 insertions(+)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index f983308667e3..320808d5018c 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -247,4 +247,11 @@ private[spark] object UI {
.version("3.4.0")
.booleanConf
.createWithDefault(true)
+
+ val UI_JETTY_STOP_TIMEOUT = ConfigBuilder("spark.ui.jettyStopTimeout")
+ .internal()
+ .doc("Timeout for Jetty servers started in UIs, such as SparkUI,
HistoryUI, etc, to stop.")
+ .version("4.0.0")
+ .timeConf(TimeUnit.MILLISECONDS)
+ .createWithDefaultString("30s")
}
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 50251975d733..849ee14c0afb 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -246,6 +246,7 @@ private[spark] object JettyUtils extends Logging {
serverName: String = "",
poolSize: Int = 200): ServerInfo = {
+ val stopTimeout = conf.get(UI_JETTY_STOP_TIMEOUT)
logInfo(s"Start Jetty $hostName:$port for $serverName")
// Start the server first, with no connectors.
val pool = new QueuedThreadPool(poolSize)
@@ -276,6 +277,7 @@ private[spark] object JettyUtils extends Logging {
val serverExecutor = new
ScheduledExecutorScheduler(s"$serverName-JettyScheduler", true)
try {
+ server.setStopTimeout(stopTimeout)
server.start()
// As each acceptor and each selector will use one thread, the number of
threads should at
@@ -298,6 +300,7 @@ private[spark] object JettyUtils extends Logging {
connector.setReuseAddress(!Utils.isWindows)
// spark-45248: set the idle timeout to prevent slow DoS
connector.setIdleTimeout(8000)
+ connector.setStopTimeout(stopTimeout)
// Currently we only use "SelectChannelConnector"
// Limit the max acceptor number to 8 so that we don't waste a lot of
threads
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]