This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new 42e7d66b663 [SPARK-42403][CORE] JsonProtocol should handle null JSON
strings
42e7d66b663 is described below
commit 42e7d66b66337539317bea399540792def45292c
Author: Josh Rosen <[email protected]>
AuthorDate: Fri Feb 10 21:54:28 2023 -0800
[SPARK-42403][CORE] JsonProtocol should handle null JSON strings
### What changes were proposed in this pull request?
This PR fixes a regression introduced by #36885 which broke JsonProtocol's
ability to parse `null` string values: the old Json4S-based parser would
correctly parse null literals, whereas the new code rejects them via an
overly-strict type check.
This PR solves this problem by relaxing the type checking in
`extractString` so that `null` literals in JSON can be parsed as `null` strings.
### Why are the changes needed?
Fix a regression which prevents the history server from parsing certain
types of event logs which contain null strings, including stacktraces
containing generated code frames and ExceptionFailure messages where the
exception message is `null`.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added new unit test in JsonProtocolSuite.
Closes #39973 from
JoshRosen/SPARK-42403-handle-null-strings-in-json-protocol-read-path.
Authored-by: Josh Rosen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 84ddd409c11e4da769c5b1f496f2b61c3d928c07)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../scala/org/apache/spark/util/JsonProtocol.scala | 2 +-
.../org/apache/spark/util/JsonProtocolSuite.scala | 32 ++++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 75dab8dc535..6b75971fc25 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -1611,7 +1611,7 @@ private[spark] object JsonProtocol {
}
def extractString: String = {
- require(json.isTextual, s"Expected string, got ${json.getNodeType}")
+ require(json.isTextual || json.isNull, s"Expected string or NULL, got
${json.getNodeType}")
json.textValue
}
}
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index be8a165d2d2..ea71a4b3f1b 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -778,6 +778,38 @@ class JsonProtocolSuite extends SparkFunSuite {
|}""".stripMargin
assert(JsonProtocol.sparkEventFromJson(unknownFieldsJson) === expected)
}
+
+ test("SPARK-42403: properly handle null string values") {
+ // Null string values can appear in a few different event types,
+ // so we test multiple known cases here:
+ val stackTraceJson =
+ """
+ |[
+ | {
+ | "Declaring Class": "someClass",
+ | "Method Name": "someMethod",
+ | "File Name": null,
+ | "Line Number": -1
+ | }
+ |]
+ |""".stripMargin
+ val stackTrace = JsonProtocol.stackTraceFromJson(stackTraceJson)
+ assert(stackTrace === Array(new StackTraceElement("someClass",
"someMethod", null, -1)))
+
+ val exceptionFailureJson =
+ """
+ |{
+ | "Reason": "ExceptionFailure",
+ | "Class Name": "java.lang.Exception",
+ | "Description": null,
+ | "Stack Trace": [],
+ | "Accumulator Updates": []
+ |}
+ |""".stripMargin
+ val exceptionFailure =
+
JsonProtocol.taskEndReasonFromJson(exceptionFailureJson).asInstanceOf[ExceptionFailure]
+ assert(exceptionFailure.description == null)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]