This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new df720c179584 [SPARK-54843][SQL] Try_to_number expression not working 
for empty string input
df720c179584 is described below

commit df720c17958482796740089595366d7201bc2557
Author: Stefan Kandic <[email protected]>
AuthorDate: Fri Dec 26 22:06:13 2025 +0800

    [SPARK-54843][SQL] Try_to_number expression not working for empty string 
input
    
    ### What changes were proposed in this pull request?
    Catching the case in `ToNumberParser` when the input string only consists 
of whitespace, preventing a failure with an internal error later on when trying 
to create `BigDecimal`.
    
    ### Why are the changes needed?
    Without this change passing an empty string (`select try_to_number('', 
'99')`) would fail with the following exception:
    ```
    JVM stacktrace:
    java.lang.NumberFormatException
            at java.base/java.math.BigDecimal.(BigDecimal.java:692)
            at java.base/java.math.BigDecimal.(BigDecimal.java:471)
            at java.base/java.math.BigDecimal.(BigDecimal.java:900)
            at 
org.apache.spark.sql.catalyst.util.ToNumberParser.parseResultToDecimalValue(ToNumberParser.scala:627)
            at 
org.apache.spark.sql.catalyst.util.ToNumberParser.parse(ToNumberParser.scala:499)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    New unit tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #53609 from stefankandic/numFormatFix.
    
    Authored-by: Stefan Kandic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/util/ToNumberParser.scala     |  3 +++
 .../org/apache/spark/sql/StringFunctionsSuite.scala  | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
index ea2f48fafc0d..ffcf8ba2cb93 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
@@ -495,6 +495,9 @@ class ToNumberParser(numberFormat: String, errorOnFail: 
Boolean) extends Seriali
       // If we have consumed all the tokens in the format string, but 
characters remain unconsumed
       // in the input string, then the input string does not match the format 
string.
       formatMatchFailure(input, numberFormat)
+    } else if (parsedBeforeDecimalPoint.isEmpty && 
parsedAfterDecimalPoint.isEmpty) {
+      // If no digits were collected (e.g. input was all whitespace), treat as 
format match failure.
+      formatMatchFailure(input, numberFormat)
     } else {
       parseResultToDecimalValue(negateResult)
     }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index a1de322ac298..ff0ee19ae971 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -1342,6 +1342,26 @@ class StringFunctionsSuite extends QueryTest with 
SharedSparkSession {
     checkAnswer(df.select(try_to_number($"value", lit("$99.99"))), 
Seq(Row(null)))
   }
 
+  test("try_to_number with whitespace-only input should return NULL") {
+    // Empty string
+    checkAnswer(sql("select try_to_number('', '99')"), Seq(Row(null)))
+    checkAnswer(sql("select try_to_number('', '999')"), Seq(Row(null)))
+
+    // Spaces only
+    checkAnswer(sql("select try_to_number('   ', '99')"), Seq(Row(null)))
+    checkAnswer(sql("select try_to_number(' ', '9')"), Seq(Row(null)))
+
+    // Different whitespace characters (tabs, newlines)
+    checkAnswer(sql("select try_to_number('\t\t', '99')"), Seq(Row(null)))
+    checkAnswer(sql("select try_to_number('\n\n', '99')"), Seq(Row(null)))
+    checkAnswer(sql("select try_to_number(' \t\n ', '99')"), Seq(Row(null)))
+
+    // With format strings containing decimal points, dollar signs, etc.
+    checkAnswer(sql("select try_to_number('   ', '$99.99')"), Seq(Row(null)))
+    checkAnswer(sql("select try_to_number('', '999.99')"), Seq(Row(null)))
+    checkAnswer(sql("select try_to_number('\t', '9,999')"), Seq(Row(null)))
+  }
+
   test("SPARK-44905: stateful lastRegex causes NullPointerException on eval 
for regexp_replace") {
     val df = sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')")
     intercept[SparkRuntimeException](df.queryExecution.optimizedPlan)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to