This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7d46fdb5cea2 [SPARK-48148][FOLLOWUP] Fix JSON parser feature flag
7d46fdb5cea2 is described below
commit 7d46fdb5cea2ac74a1c51a6e247a65949e947482
Author: Chenhao Li <[email protected]>
AuthorDate: Mon Dec 2 08:37:46 2024 +0900
[SPARK-48148][FOLLOWUP] Fix JSON parser feature flag
### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/46408 attempts to set the feature flag
`INCLUDE_SOURCE_IN_LOCATION` in the JSON parser and reverts the flag to the
original value. The reverting code is incorrect and accidentally sets the
`AUTO_CLOSE_SOURCE` feature to false. The reason is that
`overrideStdFeatures(value, mask)` sets the feature flags selected by `mask` to
`value`. `originalMask` is a value of 0/1. When it is 1, it selects
`AUTO_CLOSE_SOURCE`, whose ordinal is 0 ([reference](https [...]
### Why are the changes needed?
Perform the originally intended feature, and avoid memory leak.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New unit test. It would fail without the change in the PR.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #49018 from chenhao-db/fix_json_parser_flag.
Authored-by: Chenhao Li <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../org/apache/spark/sql/catalyst/json/JacksonParser.scala | 11 +++--------
.../apache/spark/sql/catalyst/json/JacksonParserSuite.scala | 13 +++++++++++++
2 files changed, 16 insertions(+), 8 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 13129d44fe0c..19e2c4228236 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -292,13 +292,8 @@ class JacksonParser(
case _: StringType => (parser: JsonParser) => {
// This must be enabled if we will retrieve the bytes directly from the
raw content:
- val includeSourceInLocation =
JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION
- val originalMask = if
(includeSourceInLocation.enabledIn(parser.getFeatureMask)) {
- 1
- } else {
- 0
- }
- parser.overrideStdFeatures(includeSourceInLocation.getMask,
includeSourceInLocation.getMask)
+ val oldFeature = parser.getFeatureMask
+ parser.setFeatureMask(oldFeature |
JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION.getMask)
val result = parseJsonToken[UTF8String](parser, dataType) {
case VALUE_STRING =>
UTF8String.fromString(parser.getText)
@@ -344,7 +339,7 @@ class JacksonParser(
}
}
// Reset back to the original configuration:
- parser.overrideStdFeatures(includeSourceInLocation.getMask, originalMask)
+ parser.setFeatureMask(oldFeature)
result
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
index 587e22e787b8..89cdd38a3e7b 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
@@ -24,6 +24,19 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.unsafe.types.UTF8String
class JacksonParserSuite extends SparkFunSuite {
+ test("feature mask should remain unchanged") {
+ val options = new JSONOptions(Map.empty[String, String], "GMT", "")
+ val parser = new JacksonParser(StructType.fromDDL("a string"), options,
false, Nil)
+ val input = """{"a": {"b": 1}}""".getBytes
+ // The creating function is usually called inside `parser.parse`, but we
need the JSON parser
+ // here for testing purpose.
+ val jsonParser = options.buildJsonFactory().createParser(input)
+ val oldFeature = jsonParser.getFeatureMask
+ val result = parser.parse[Array[Byte]](input, (_, _) => jsonParser,
UTF8String.fromBytes)
+ assert(result === Seq(InternalRow(UTF8String.fromString("""{"b": 1}"""))))
+ assert(jsonParser.getFeatureMask == oldFeature)
+ }
+
test("skipping rows using pushdown filters") {
def check(
input: String = """{"i":1, "s": "a"}""",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]