This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new f1a93d19ca63 [SPARK-54184][K8S] Support
`spark.kubernetes.executor.deletedExecutorsCacheTimeout`
f1a93d19ca63 is described below
commit f1a93d19ca63f2df316f1834a3ccd08934018ef3
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Tue Nov 4 21:56:51 2025 -0800
[SPARK-54184][K8S] Support
`spark.kubernetes.executor.deletedExecutorsCacheTimeout`
### What changes were proposed in this pull request?
This PR aims to support
`spark.kubernetes.executor.deletedExecutorsCacheTimeout`.
### Why are the changes needed?
To allow users to control the TTL for the deleted executors cache.
Previously, it has a hard-coded value. In some very slow clusters, we need
to remember longer than 3 minutes.
https://github.com/apache/spark/blob/a8e35c407bc5340f83b35e5a2f0b0767c6baadb0/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala#L54-L57
### Does this PR introduce _any_ user-facing change?
No behavior change because the default value is the same.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52884 from dongjoon-hyun/SPARK-54184.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit ada19082cfe7f091fe9d74b6aee6144673a444ef)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala | 9 +++++++++
.../scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala | 2 +-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index f4d708f30b43..0ae9b4a302fb 100644
---
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -716,6 +716,15 @@ private[spark] object Config extends Logging {
.booleanConf
.createWithDefault(true)
+ val KUBERNETES_DELETED_EXECUTORS_CACHE_TIMEOUT =
+ ConfigBuilder("spark.kubernetes.executor.deletedExecutorsCacheTimeout")
+ .internal()
+ .doc("Time-to-live (TTL) value for the cache for deleted executors")
+ .version("4.1.0")
+ .timeConf(TimeUnit.SECONDS)
+ .checkValue(_ >= 0, "deletedExecutorsCacheTimeout must be non-negative")
+ .createWithDefault(180)
+
val KUBERNETES_EXECUTOR_TERMINATION_GRACE_PERIOD_SECONDS =
ConfigBuilder("spark.kubernetes.executor.terminationGracePeriodSeconds")
.doc("Time to wait for graceful termination of executor pods.")
diff --git
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
index 35386aff4a80..9aa99a6c5984 100644
---
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
+++
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
@@ -53,7 +53,7 @@ private[spark] class ExecutorPodsLifecycleManager(
// bounds.
private lazy val removedExecutorsCache =
CacheBuilder.newBuilder()
- .expireAfterWrite(3, TimeUnit.MINUTES)
+ .expireAfterWrite(conf.get(KUBERNETES_DELETED_EXECUTORS_CACHE_TIMEOUT),
TimeUnit.SECONDS)
.build[java.lang.Long, java.lang.Long]()
private var lastFullSnapshotTs: Long = 0
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]