This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 88eef06c221d [SPARK-54099][SQL] XML variant parser should fall back to 
string on decimal parsing errors
88eef06c221d is described below

commit 88eef06c221d9e1ec9e64e17296c20733572e3aa
Author: Xiaonan Yang <[email protected]>
AuthorDate: Mon Nov 3 08:03:08 2025 +0900

    [SPARK-54099][SQL] XML variant parser should fall back to string on decimal 
parsing errors
    
    ### What changes were proposed in this pull request?
    When parsing XML data with `parse_xml` that contains decimal numbers with 
very large
    exponents (e.g., "1E+2147483647"), the conversion to Variant type fails 
with:
    ```
    java.lang.ArithmeticException: BigInteger would overflow supported range
        at java.base/java.math.BigDecimal.setScale(BigDecimal.java:3000)
        at 
org.apache.spark.sql.catalyst.xml.StaxXmlParser$.org$apache$spark$sql$catalyst$xml$StaxXmlParser$$appendXMLCharacterToVariant(StaxXmlParser.scala:1335)
    ```
    
    It's because the parser calls `setScale(0)` to normalize the decimal. When 
the scale is extremely negative (e.g., -2147483647), `setScale(0)` attempts to
    multiply the unscaled value by 10^2147483647, causing BigInteger overflow.
    
    This PR will catch all errors when parsing strings as decimal in the XML 
variant parser and fall back to string.
    
    ### Why are the changes needed?
    Bug fix.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    New UT.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52801 from xiaonanyang-db/SPARK-54099.
    
    Authored-by: Xiaonan Yang <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../spark/sql/catalyst/xml/StaxXmlParser.scala     | 25 +++++++++++-----------
 .../datasources/xml/XmlVariantSuite.scala          | 20 +++++++++++++++++
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
index 1a47fa7bd43f..3d6c57c9f746 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
@@ -1278,18 +1278,19 @@ object StaxXmlParser {
 
     // Try parsing the value as decimal
     val decimalParser = ExprUtils.getDecimalParser(options.locale)
-    allCatch opt decimalParser(value) match {
-      case Some(decimalValue) =>
-        var d = decimalValue
-        if (d.scale() < 0) {
-          d = d.setScale(0)
-        }
-        if (d.scale <= VariantUtil.MAX_DECIMAL16_PRECISION &&
-            d.precision <= VariantUtil.MAX_DECIMAL16_PRECISION) {
-          builder.appendDecimal(d)
-          return
-        }
-      case _ =>
+    try {
+      var d = decimalParser(value)
+      if (d.scale() < 0) {
+        d = d.setScale(0)
+      }
+      if (d.scale <= VariantUtil.MAX_DECIMAL16_PRECISION &&
+        d.precision <= VariantUtil.MAX_DECIMAL16_PRECISION) {
+        builder.appendDecimal(d)
+        return
+      }
+    } catch {
+      case NonFatal(_) =>
+        // Ignore the exception and parse it as a string below
     }
 
     // If the character is of other primitive types, parse it as a string
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlVariantSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlVariantSuite.scala
index 5f5e017038b7..15713e759b81 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlVariantSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlVariantSuite.scala
@@ -942,6 +942,26 @@ class XmlVariantSuite extends QueryTest with 
SharedSparkSession with TestXmlData
       .map(_.getString(0).replaceAll("\\s+", ""))
     assert(xmlResult.head === xmlStr)
   }
+
+  test(
+    "[SPARK-54099] XML variant parser should fall back to string " +
+    "when failing to parse decimal values"
+  ) {
+    // Decimals with extreme exponents. The variant parser should throw 
ArithmeticException when
+    // parsing these values as Decimal:
+    val decimalString = Seq(
+      "1E+2147483647",    // Maximum int exponent - scale would be -2147483647
+      "5E+1000000000",    // 1 billion exponent
+      "1.23E+999999999",  // Very large exponent
+      "0.001E+2147483640" // Still results in huge effective exponent
+    )
+    decimalString.foreach { str =>
+      testParser(
+        xml = s"<ROW><decimal>$str</decimal></ROW>",
+        expectedJsonStr = s"""{"decimal":"$str"}"""
+      )
+    }
+  }
 }
 
 class XmlVariantSuiteWithLegacyParser extends XmlVariantSuite {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to