(spark) branch master updated: [SPARK-45771][CORE] Enable `spark.eventLog.rolling.enabled` by default

dongjoon Thu, 02 Nov 2023 13:15:17 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 653b31e18b3f [SPARK-45771][CORE] Enable 
`spark.eventLog.rolling.enabled` by default
653b31e18b3f is described below

commit 653b31e18b3fc2546bd6b13b384459f8afddabdc
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Nov 2 13:14:04 2023 -0700

    [SPARK-45771][CORE] Enable `spark.eventLog.rolling.enabled` by default
    
    ### What changes were proposed in this pull request?
    
    This PR aims to enable `spark.eventLog.rolling.enabled` by default for 
Apache Spark 4.0.0.
    
    ### Why are the changes needed?
    
    Since Apache Spark 3.0.0, we have been using event log rolling not only for 
**long-running jobs**, but also for **some failed jobs** to archive the partial 
event logs incrementally.
    - https://github.com/apache/spark/pull/25670
    
    ### Does this PR introduce _any_ user-facing change?
    
    - No because `spark.eventLog.enabled` is disabled by default.
    - For the users with `spark.eventLog.enabled=true`, yes, `spark-events` 
directory will have different layouts. However, all 3.3+ `Spark History Server` 
can read both old and new event logs. I believe that the event log users are 
already using this configuration to avoid the loss of event logs for 
long-running jobs and some failed jobs.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43638 from dongjoon-hyun/SPARK-45771.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 core/src/main/scala/org/apache/spark/internal/config/package.scala     | 2 +-
 core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala     | 1 +
 .../org/apache/spark/deploy/history/EventLogFileWritersSuite.scala     | 2 +-
 .../scala/org/apache/spark/deploy/history/EventLogTestHelper.scala     | 1 +
 .../scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala   | 3 ++-
 docs/core-migration-guide.md                                           | 2 ++
 6 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala 
b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 7b0fcf3433cf..143dd0c44ce8 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -238,7 +238,7 @@ package object config {
         "each event log file to the configured size.")
       .version("3.0.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val EVENT_LOG_ROLLING_MAX_FILE_SIZE =
     ConfigBuilder("spark.eventLog.rolling.maxFileSize")
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 88f015f864de..7ebb0165e620 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -731,6 +731,7 @@ class SparkSubmitSuite
         "--conf", "spark.master.rest.enabled=false",
         "--conf", 
"spark.executorEnv.HADOOP_CREDSTORE_PASSWORD=secret_password",
         "--conf", "spark.eventLog.enabled=true",
+        "--conf", "spark.eventLog.rolling.enabled=false",
         "--conf", "spark.eventLog.testing=true",
         "--conf", s"spark.eventLog.dir=${testDirPath.toUri.toString}",
         "--conf", "spark.hadoop.fs.defaultFS=unsupported://example.com",
diff --git 
a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
 
b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
index 455e2e18b11e..b575cbc080c0 100644
--- 
a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
@@ -66,7 +66,7 @@ abstract class EventLogFileWritersSuite extends SparkFunSuite 
with LocalSparkCon
     conf.set(EVENT_LOG_DIR, testDir.toString)
 
     // default config
-    buildWriterAndVerify(conf, classOf[SingleEventLogFileWriter])
+    buildWriterAndVerify(conf, classOf[RollingEventLogFilesWriter])
 
     conf.set(EVENT_LOG_ENABLE_ROLLING, true)
     buildWriterAndVerify(conf, classOf[RollingEventLogFilesWriter])
diff --git 
a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala 
b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
index ea8da0108592..ac89f60955ee 100644
--- 
a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
+++ 
b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
@@ -38,6 +38,7 @@ object EventLogTestHelper {
   def getLoggingConf(logDir: Path, compressionCodec: Option[String] = None): 
SparkConf = {
     val conf = new SparkConf
     conf.set(EVENT_LOG_ENABLED, true)
+    conf.set(EVENT_LOG_ENABLE_ROLLING, false)
     conf.set(EVENT_LOG_BLOCK_UPDATES, true)
     conf.set(EVENT_LOG_TESTING, true)
     conf.set(EVENT_LOG_DIR, logDir.toString)
diff --git 
a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
 
b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index cd8fac2c6570..939923e12b8e 100644
--- 
a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.{EventLogFileReader, 
SingleEventLogFileWriter}
 import org.apache.spark.deploy.history.EventLogTestHelper._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
-import org.apache.spark.internal.config.{EVENT_LOG_COMPRESS, EVENT_LOG_DIR, 
EVENT_LOG_ENABLED}
+import org.apache.spark.internal.config.{EVENT_LOG_COMPRESS, EVENT_LOG_DIR, 
EVENT_LOG_ENABLE_ROLLING, EVENT_LOG_ENABLED}
 import org.apache.spark.io._
 import org.apache.spark.metrics.{ExecutorMetricType, MetricsSystem}
 import org.apache.spark.resource.ResourceProfile
@@ -163,6 +163,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with 
LocalSparkContext wit
   test("SPARK-31764: isBarrier should be logged in event log") {
     val conf = new SparkConf()
     conf.set(EVENT_LOG_ENABLED, true)
+    conf.set(EVENT_LOG_ENABLE_ROLLING, false)
     conf.set(EVENT_LOG_COMPRESS, false)
     conf.set(EVENT_LOG_DIR, testDirPath.toString)
     val sc = new SparkContext("local", "test-SPARK-31764", conf)
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index fb9471d0c1ae..09ba4b474e97 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -24,6 +24,8 @@ license: |
 
 ## Upgrading from Core 3.5 to 4.0
 
+- Since Spark 4.0, Spark will roll event logs to archive them incrementally. 
To restore the behavior before Spark 4.0, you can set 
`spark.eventLog.rolling.enabled` to `false`.
+
 - Since Spark 4.0, Spark will compress event logs. To restore the behavior 
before Spark 4.0, you can set `spark.eventLog.compress` to `false`.
 
 - Since Spark 4.0, `spark.shuffle.service.db.backend` is set to `ROCKSDB` by 
default which means Spark will use RocksDB store for shuffle service. To 
restore the behavior before Spark 4.0, you can set 
`spark.shuffle.service.db.backend` to `LEVELDB`.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-45771][CORE] Enable `spark.eventLog.rolling.enabled` by default

Reply via email to