This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new 212d7b80219a [SPARK-57168][SQL] Simplify GetTimestamp codegen under
ANSI mode
212d7b80219a is described below
commit 212d7b80219a28a21b1e78b0ea310101d337b0c2
Author: Gengliang Wang <[email protected]>
AuthorDate: Sun May 31 19:08:47 2026 -0700
[SPARK-57168][SQL] Simplify GetTimestamp codegen under ANSI mode
### What changes were proposed in this pull request?
Add `DateTimeExpressionUtils.parseToTimestampExact(TimestampFormatter
formatter, String input, long downScaleFactor, boolean forTimestampNTZ, String
suggestedFuncOnFail)` and route the ANSI (`failOnError = true`) eval and
codegen paths of `ToTimestamp` (the base of `GetTimestamp` / `to_timestamp`,
`unix_timestamp`, etc.) through it. The helper parses via
`parseWithoutTimeZone` (TIMESTAMP_NTZ, no down-scaling) or `parse` + `/
downScaleFactor`, and translates a `DateTimeException` (whic [...]
`ToTimestamp.doGenCode` previously emitted the same 5-line `try { parse }
catch (DateTimeException) catch (ParseException)` block at both string call
sites (the cached-formatter path and the per-row-formatter path). A local
`parseTimestampCode` now dispatches on `failOnError`: the ANSI branch emits a
single `parseToTimestampExact(...)` call, while the non-ANSI branch keeps the
inline `try/catch -> isNull` form (the same shape used by the already-merged
`MakeDate` / `MakeInterval` clea [...]
### Why are the changes needed?
Part of SPARK-56908 (umbrella). Collapsing the duplicated inline try/catch
to a single helper call shrinks the generated Java for the common
`to_timestamp` / `unix_timestamp` family, helping with the JVM 64KB method /
constant-pool limits, Janino compile time, and JIT work.
### Does this PR introduce _any_ user-facing change?
No. The compiled behavior is identical; only the emitted Java source text
changes.
### How was this patch tested?
```
build/sbt "catalyst/testOnly *DateExpressionsSuite"
```
75/75 pass (exercised both with and without whole-stage codegen).
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.8)
Closes #56218 from gengliangwang/spark-get-timestamp-codegen.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
(cherry picked from commit bab316b7b600a1a14fa8d9350c5f7a2a096bbf72)
Signed-off-by: Gengliang Wang <[email protected]>
---
.../expressions/DateTimeExpressionUtils.java | 30 ++++++++++
.../catalyst/expressions/datetimeExpressions.scala | 70 ++++++++++++----------
2 files changed, 68 insertions(+), 32 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
index 97fc2abb3255..16312cdb648a 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
@@ -17,6 +17,7 @@
package org.apache.spark.sql.catalyst.expressions;
+import java.text.ParseException;
import java.time.DateTimeException;
import java.time.LocalDate;
import java.time.LocalDateTime;
@@ -26,6 +27,7 @@ import org.apache.spark.SparkDateTimeException;
import org.apache.spark.sql.catalyst.util.DateTimeConstants;
import org.apache.spark.sql.catalyst.util.DateTimeUtils;
import org.apache.spark.sql.catalyst.util.IntervalUtils;
+import org.apache.spark.sql.catalyst.util.TimestampFormatter;
import org.apache.spark.sql.errors.QueryExecutionErrors;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.unsafe.types.CalendarInterval;
@@ -135,4 +137,32 @@ public final class DateTimeExpressionUtils {
throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);
}
}
+
+ /**
+ * Parses {@code input} to a timestamp for {@code ToTimestamp} expressions
+ * (e.g. {@code to_timestamp}) in ANSI mode ({@code failOnError = true}).
+ * For a TIMESTAMP_NTZ result the formatter's {@code parseWithoutTimeZone} is
+ * used and {@code downScaleFactor} is not applied; otherwise the parsed
micros
+ * are divided by {@code downScaleFactor}. A {@link DateTimeException} (which
+ * also covers {@code DateTimeParseException}) or a {@link ParseException} is
+ * translated to {@code ansiDateTimeParseError} carrying the suggested
+ * fall-back function; any other exception (e.g. {@code
IllegalStateException})
+ * propagates unchanged.
+ */
+ public static long parseToTimestampExact(
+ TimestampFormatter formatter,
+ String input,
+ long downScaleFactor,
+ boolean forTimestampNTZ,
+ String suggestedFuncOnFail) {
+ try {
+ if (forTimestampNTZ) {
+ return formatter.parseWithoutTimeZone(input);
+ } else {
+ return formatter.parse(input) / downScaleFactor;
+ }
+ } catch (DateTimeException | ParseException e) {
+ throw QueryExecutionErrors.ansiDateTimeParseError(e,
suggestedFuncOnFail);
+ }
+ }
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 3274a268e158..9b16a5685fb1 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1324,18 +1324,20 @@ abstract class ToTimestamp
null
} else {
val formatter =
formatterOption.getOrElse(getFormatter(fmt.toString))
- try {
- if (forTimestampNTZ) {
-
formatter.parseWithoutTimeZone(t.asInstanceOf[UTF8String].toString)
- } else {
- formatter.parse(t.asInstanceOf[UTF8String].toString) /
downScaleFactor
+ val str = t.asInstanceOf[UTF8String].toString
+ if (failOnError) {
+ DateTimeExpressionUtils.parseToTimestampExact(
+ formatter, str, downScaleFactor, forTimestampNTZ,
suggestedFuncOnFail)
+ } else {
+ try {
+ if (forTimestampNTZ) {
+ formatter.parseWithoutTimeZone(str)
+ } else {
+ formatter.parse(str) / downScaleFactor
+ }
+ } catch {
+ case e if isParseError(e) => null
}
- } catch {
- case e: DateTimeException if failOnError =>
- throw QueryExecutionErrors.ansiDateTimeParseError(e,
suggestedFuncOnFail)
- case e: ParseException if failOnError =>
- throw QueryExecutionErrors.ansiDateTimeParseError(e,
suggestedFuncOnFail)
- case e if isParseError(e) => null
}
}
}
@@ -1344,11 +1346,7 @@ abstract class ToTimestamp
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val javaType = CodeGenerator.javaType(dataType)
- val parseErrorBranch: String = if (failOnError) {
- s"throw QueryExecutionErrors.ansiDateTimeParseError(e,
\"${suggestedFuncOnFail}\");"
- } else {
- s"${ev.isNull} = true;"
- }
+ val utils = classOf[DateTimeExpressionUtils].getName
val parseMethod = if (forTimestampNTZ) {
"parseWithoutTimeZone"
} else {
@@ -1359,21 +1357,35 @@ abstract class ToTimestamp
} else {
s"/ $downScaleFactor"
}
+ // Emits the string -> timestamp parse body. The ANSI (failOnError) branch
+ // delegates the parse and the parse-error -> ANSI error translation to
+ // DateTimeExpressionUtils.parseToTimestampExact, collapsing the inline
+ // try/catch to a single call. The non-ANSI branch keeps the inline
+ // try/catch that maps a parse failure to a null result.
+ def parseTimestampCode(formatterExpr: String, inputExpr: String): String =
{
+ if (failOnError) {
+ s"""${ev.value} = $utils.parseToTimestampExact(
+ | $formatterExpr, $inputExpr, ${downScaleFactor}L,
+ | $forTimestampNTZ, "$suggestedFuncOnFail");""".stripMargin
+ } else {
+ s"""
+ |try {
+ | ${ev.value} = $formatterExpr.$parseMethod($inputExpr)
$downScaleCode;
+ |} catch (java.time.DateTimeException e) {
+ | ${ev.isNull} = true;
+ |} catch (java.text.ParseException e) {
+ | ${ev.isNull} = true;
+ |}
+ |""".stripMargin
+ }
+ }
left.dataType match {
case _: StringType => formatterOption.map { fmt =>
val df = classOf[TimestampFormatter].getName
val formatterName = ctx.addReferenceObj("formatter", fmt, df)
nullSafeCodeGen(ctx, ev, (datetimeStr, _) =>
- s"""
- |try {
- | ${ev.value} =
$formatterName.$parseMethod($datetimeStr.toString()) $downScaleCode;
- |} catch (java.time.DateTimeException e) {
- | ${parseErrorBranch}
- |} catch (java.text.ParseException e) {
- | ${parseErrorBranch}
- |}
- |""".stripMargin)
+ parseTimestampCode(formatterName, s"$datetimeStr.toString()"))
}.getOrElse {
val zid = ctx.addReferenceObj("zoneId", zoneId,
classOf[ZoneId].getName)
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
@@ -1386,13 +1398,7 @@ abstract class ToTimestamp
| $zid,
| $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(),
| true);
- |try {
- | ${ev.value} =
$timestampFormatter.$parseMethod($string.toString()) $downScaleCode;
- |} catch (java.time.DateTimeException e) {
- | ${parseErrorBranch}
- |} catch (java.text.ParseException e) {
- | ${parseErrorBranch}
- |}
+ |${parseTimestampCode(timestampFormatter, s"$string.toString()")}
|""".stripMargin)
}
case TimestampType | TimestampNTZType =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]