This is an automated email from the ASF dual-hosted git repository.
mridulm80 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 70c9b94218b9 [SPARK-49268][CORE] Log IO exceptions in SHS history
provider
70c9b94218b9 is described below
commit 70c9b94218b9e2c9dded31f1b08c1a441043e9a0
Author: Rob Reeves <[email protected]>
AuthorDate: Fri Aug 23 02:33:55 2024 -0500
[SPARK-49268][CORE] Log IO exceptions in SHS history provider
### What changes were proposed in this pull request?
This PR logs the IOException details in FileHistoryProvider, used by SHS.
### Why are the changes needed?
Before this change, when an IOException is caught generic messages are
logged without the exception details. This makes it hard to troubleshoot
because the root cause is lost. This enhancement will make it easier to
troubleshoot IO issues.
### Does this PR introduce _any_ user-facing change?
Yes, the logging is changed to include the exception information.
### How was this patch tested?
Manual testing
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47794 from robreeves/rob/fhp_logging.
Authored-by: Rob Reeves <[email protected]>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
.../spark/deploy/history/FsHistoryProvider.scala | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 95b23c0f894f..ec227d40f21a 100644
---
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -1286,12 +1286,12 @@ private[history] class FsHistoryProvider(conf:
SparkConf, clock: Clock)
rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
hybridStore = store
} catch {
- case _: IOException if !retried =>
+ case ioe: IOException if !retried =>
// compaction may touch the file(s) which app rebuild wants to read
// compaction wouldn't run in short interval, so try again...
- logWarning(log"Exception occurred while rebuilding log path " +
+ logInfo(log"Exception occurred while rebuilding log path " +
log"${MDC(PATH, attempt.logPath)} - " +
- log"trying again...")
+ log"trying again...", ioe)
store.close()
memoryManager.release(appId, attempt.info.attemptId)
retried = true
@@ -1359,11 +1359,11 @@ private[history] class FsHistoryProvider(conf:
SparkConf, clock: Clock)
}
newStorePath = lease.commit(appId, attempt.info.attemptId)
} catch {
- case _: IOException if !retried =>
+ case ioe: IOException if !retried =>
// compaction may touch the file(s) which app rebuild wants to read
// compaction wouldn't run in short interval, so try again...
- logWarning(log"Exception occurred while rebuilding app ${MDC(APP_ID,
appId)} - " +
- log"trying again...")
+ logInfo(log"Exception occurred while rebuilding app ${MDC(APP_ID,
appId)} - " +
+ log"trying again...", ioe)
lease.rollback()
retried = true
@@ -1387,11 +1387,11 @@ private[history] class FsHistoryProvider(conf:
SparkConf, clock: Clock)
rebuildAppStore(s, reader, attempt.info.lastUpdated.getTime())
store = s
} catch {
- case _: IOException if !retried =>
+ case ioe: IOException if !retried =>
// compaction may touch the file(s) which app rebuild wants to read
// compaction wouldn't run in short interval, so try again...
- logWarning(log"Exception occurred while rebuilding log path " +
- log"${MDC(LogKeys.PATH, attempt.logPath)} - trying again...")
+ logInfo(log"Exception occurred while rebuilding log path " +
+ log"${MDC(LogKeys.PATH, attempt.logPath)} - trying again...", ioe)
retried = true
case e: Exception =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]