This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 6d7ff7670cba [SPARK-54843][SQL] Try_to_number expression not working
for empty string input
6d7ff7670cba is described below
commit 6d7ff7670cbabca8b57984584b751f479a882bd4
Author: Stefan Kandic <[email protected]>
AuthorDate: Fri Dec 26 22:06:13 2025 +0800
[SPARK-54843][SQL] Try_to_number expression not working for empty string
input
### What changes were proposed in this pull request?
Catching the case in `ToNumberParser` when the input string only consists
of whitespace, preventing a failure with an internal error later on when trying
to create `BigDecimal`.
### Why are the changes needed?
Without this change passing an empty string (`select try_to_number('',
'99')`) would fail with the following exception:
```
JVM stacktrace:
java.lang.NumberFormatException
at java.base/java.math.BigDecimal.(BigDecimal.java:692)
at java.base/java.math.BigDecimal.(BigDecimal.java:471)
at java.base/java.math.BigDecimal.(BigDecimal.java:900)
at
org.apache.spark.sql.catalyst.util.ToNumberParser.parseResultToDecimalValue(ToNumberParser.scala:627)
at
org.apache.spark.sql.catalyst.util.ToNumberParser.parse(ToNumberParser.scala:499)
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New unit tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #53609 from stefankandic/numFormatFix.
Authored-by: Stefan Kandic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit df720c17958482796740089595366d7201bc2557)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/util/ToNumberParser.scala | 3 +++
.../org/apache/spark/sql/StringFunctionsSuite.scala | 20 ++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
index ea2f48fafc0d..ffcf8ba2cb93 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
@@ -495,6 +495,9 @@ class ToNumberParser(numberFormat: String, errorOnFail:
Boolean) extends Seriali
// If we have consumed all the tokens in the format string, but
characters remain unconsumed
// in the input string, then the input string does not match the format
string.
formatMatchFailure(input, numberFormat)
+ } else if (parsedBeforeDecimalPoint.isEmpty &&
parsedAfterDecimalPoint.isEmpty) {
+ // If no digits were collected (e.g. input was all whitespace), treat as
format match failure.
+ formatMatchFailure(input, numberFormat)
} else {
parseResultToDecimalValue(negateResult)
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index a1de322ac298..ff0ee19ae971 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -1342,6 +1342,26 @@ class StringFunctionsSuite extends QueryTest with
SharedSparkSession {
checkAnswer(df.select(try_to_number($"value", lit("$99.99"))),
Seq(Row(null)))
}
+ test("try_to_number with whitespace-only input should return NULL") {
+ // Empty string
+ checkAnswer(sql("select try_to_number('', '99')"), Seq(Row(null)))
+ checkAnswer(sql("select try_to_number('', '999')"), Seq(Row(null)))
+
+ // Spaces only
+ checkAnswer(sql("select try_to_number(' ', '99')"), Seq(Row(null)))
+ checkAnswer(sql("select try_to_number(' ', '9')"), Seq(Row(null)))
+
+ // Different whitespace characters (tabs, newlines)
+ checkAnswer(sql("select try_to_number('\t\t', '99')"), Seq(Row(null)))
+ checkAnswer(sql("select try_to_number('\n\n', '99')"), Seq(Row(null)))
+ checkAnswer(sql("select try_to_number(' \t\n ', '99')"), Seq(Row(null)))
+
+ // With format strings containing decimal points, dollar signs, etc.
+ checkAnswer(sql("select try_to_number(' ', '$99.99')"), Seq(Row(null)))
+ checkAnswer(sql("select try_to_number('', '999.99')"), Seq(Row(null)))
+ checkAnswer(sql("select try_to_number('\t', '9,999')"), Seq(Row(null)))
+ }
+
test("SPARK-44905: stateful lastRegex causes NullPointerException on eval
for regexp_replace") {
val df = sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')")
intercept[SparkRuntimeException](df.queryExecution.optimizedPlan)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]