This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d46fdb5cea2 [SPARK-48148][FOLLOWUP] Fix JSON parser feature flag
7d46fdb5cea2 is described below

commit 7d46fdb5cea2ac74a1c51a6e247a65949e947482
Author: Chenhao Li <[email protected]>
AuthorDate: Mon Dec 2 08:37:46 2024 +0900

    [SPARK-48148][FOLLOWUP] Fix JSON parser feature flag
    
    ### What changes were proposed in this pull request?
    
    https://github.com/apache/spark/pull/46408 attempts to set the feature flag 
`INCLUDE_SOURCE_IN_LOCATION` in the JSON parser and reverts the flag to the 
original value. The reverting code is incorrect and accidentally sets the 
`AUTO_CLOSE_SOURCE` feature to false. The reason is that 
`overrideStdFeatures(value, mask)` sets the feature flags selected by `mask` to 
`value`. `originalMask` is a value of 0/1. When it is 1, it selects 
`AUTO_CLOSE_SOURCE`, whose ordinal is 0 ([reference](https [...]
    
    ### Why are the changes needed?
    
    Perform the originally intended feature, and avoid memory leak.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    New unit test. It would fail without the change in the PR.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #49018 from chenhao-db/fix_json_parser_flag.
    
    Authored-by: Chenhao Li <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../org/apache/spark/sql/catalyst/json/JacksonParser.scala  | 11 +++--------
 .../apache/spark/sql/catalyst/json/JacksonParserSuite.scala | 13 +++++++++++++
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 13129d44fe0c..19e2c4228236 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -292,13 +292,8 @@ class JacksonParser(
 
     case _: StringType => (parser: JsonParser) => {
       // This must be enabled if we will retrieve the bytes directly from the 
raw content:
-      val includeSourceInLocation = 
JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION
-      val originalMask = if 
(includeSourceInLocation.enabledIn(parser.getFeatureMask)) {
-        1
-      } else {
-        0
-      }
-      parser.overrideStdFeatures(includeSourceInLocation.getMask, 
includeSourceInLocation.getMask)
+      val oldFeature = parser.getFeatureMask
+      parser.setFeatureMask(oldFeature | 
JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION.getMask)
       val result = parseJsonToken[UTF8String](parser, dataType) {
         case VALUE_STRING =>
           UTF8String.fromString(parser.getText)
@@ -344,7 +339,7 @@ class JacksonParser(
           }
         }
       // Reset back to the original configuration:
-      parser.overrideStdFeatures(includeSourceInLocation.getMask, originalMask)
+      parser.setFeatureMask(oldFeature)
       result
     }
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
index 587e22e787b8..89cdd38a3e7b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
@@ -24,6 +24,19 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.types.UTF8String
 
 class JacksonParserSuite extends SparkFunSuite {
+  test("feature mask should remain unchanged") {
+    val options = new JSONOptions(Map.empty[String, String], "GMT", "")
+    val parser = new JacksonParser(StructType.fromDDL("a string"), options, 
false, Nil)
+    val input = """{"a": {"b": 1}}""".getBytes
+    // The creating function is usually called inside `parser.parse`, but we 
need the JSON parser
+    // here for testing purpose.
+    val jsonParser = options.buildJsonFactory().createParser(input)
+    val oldFeature = jsonParser.getFeatureMask
+    val result = parser.parse[Array[Byte]](input, (_, _) => jsonParser, 
UTF8String.fromBytes)
+    assert(result === Seq(InternalRow(UTF8String.fromString("""{"b": 1}"""))))
+    assert(jsonParser.getFeatureMask == oldFeature)
+  }
+
   test("skipping rows using pushdown filters") {
     def check(
       input: String = """{"i":1, "s": "a"}""",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to