This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bab316b7b600 [SPARK-57168][SQL] Simplify GetTimestamp codegen under 
ANSI mode
bab316b7b600 is described below

commit bab316b7b600a1a14fa8d9350c5f7a2a096bbf72
Author: Gengliang Wang <[email protected]>
AuthorDate: Sun May 31 19:08:47 2026 -0700

    [SPARK-57168][SQL] Simplify GetTimestamp codegen under ANSI mode
    
    ### What changes were proposed in this pull request?
    
    Add `DateTimeExpressionUtils.parseToTimestampExact(TimestampFormatter 
formatter, String input, long downScaleFactor, boolean forTimestampNTZ, String 
suggestedFuncOnFail)` and route the ANSI (`failOnError = true`) eval and 
codegen paths of `ToTimestamp` (the base of `GetTimestamp` / `to_timestamp`, 
`unix_timestamp`, etc.) through it. The helper parses via 
`parseWithoutTimeZone` (TIMESTAMP_NTZ, no down-scaling) or `parse` + `/ 
downScaleFactor`, and translates a `DateTimeException` (whic [...]
    
    `ToTimestamp.doGenCode` previously emitted the same 5-line `try { parse } 
catch (DateTimeException) catch (ParseException)` block at both string call 
sites (the cached-formatter path and the per-row-formatter path). A local 
`parseTimestampCode` now dispatches on `failOnError`: the ANSI branch emits a 
single `parseToTimestampExact(...)` call, while the non-ANSI branch keeps the 
inline `try/catch -> isNull` form (the same shape used by the already-merged 
`MakeDate` / `MakeInterval` clea [...]
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella). Collapsing the duplicated inline try/catch 
to a single helper call shrinks the generated Java for the common 
`to_timestamp` / `unix_timestamp` family, helping with the JVM 64KB method / 
constant-pool limits, Janino compile time, and JIT work.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. The compiled behavior is identical; only the emitted Java source text 
changes.
    
    ### How was this patch tested?
    
    ```
    build/sbt "catalyst/testOnly *DateExpressionsSuite"
    ```
    
    75/75 pass (exercised both with and without whole-stage codegen).
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8)
    
    Closes #56218 from gengliangwang/spark-get-timestamp-codegen.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../expressions/DateTimeExpressionUtils.java       | 30 ++++++++++
 .../catalyst/expressions/datetimeExpressions.scala | 70 ++++++++++++----------
 2 files changed, 68 insertions(+), 32 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
index 97fc2abb3255..16312cdb648a 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
+import java.text.ParseException;
 import java.time.DateTimeException;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
@@ -26,6 +27,7 @@ import org.apache.spark.SparkDateTimeException;
 import org.apache.spark.sql.catalyst.util.DateTimeConstants;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
 import org.apache.spark.sql.catalyst.util.IntervalUtils;
+import org.apache.spark.sql.catalyst.util.TimestampFormatter;
 import org.apache.spark.sql.errors.QueryExecutionErrors;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.unsafe.types.CalendarInterval;
@@ -135,4 +137,32 @@ public final class DateTimeExpressionUtils {
       throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);
     }
   }
+
+  /**
+   * Parses {@code input} to a timestamp for {@code ToTimestamp} expressions
+   * (e.g. {@code to_timestamp}) in ANSI mode ({@code failOnError = true}).
+   * For a TIMESTAMP_NTZ result the formatter's {@code parseWithoutTimeZone} is
+   * used and {@code downScaleFactor} is not applied; otherwise the parsed 
micros
+   * are divided by {@code downScaleFactor}. A {@link DateTimeException} (which
+   * also covers {@code DateTimeParseException}) or a {@link ParseException} is
+   * translated to {@code ansiDateTimeParseError} carrying the suggested
+   * fall-back function; any other exception (e.g. {@code 
IllegalStateException})
+   * propagates unchanged.
+   */
+  public static long parseToTimestampExact(
+      TimestampFormatter formatter,
+      String input,
+      long downScaleFactor,
+      boolean forTimestampNTZ,
+      String suggestedFuncOnFail) {
+    try {
+      if (forTimestampNTZ) {
+        return formatter.parseWithoutTimeZone(input);
+      } else {
+        return formatter.parse(input) / downScaleFactor;
+      }
+    } catch (DateTimeException | ParseException e) {
+      throw QueryExecutionErrors.ansiDateTimeParseError(e, 
suggestedFuncOnFail);
+    }
+  }
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 3274a268e158..9b16a5685fb1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1324,18 +1324,20 @@ abstract class ToTimestamp
             null
           } else {
             val formatter = 
formatterOption.getOrElse(getFormatter(fmt.toString))
-            try {
-              if (forTimestampNTZ) {
-                
formatter.parseWithoutTimeZone(t.asInstanceOf[UTF8String].toString)
-              } else {
-                formatter.parse(t.asInstanceOf[UTF8String].toString) / 
downScaleFactor
+            val str = t.asInstanceOf[UTF8String].toString
+            if (failOnError) {
+              DateTimeExpressionUtils.parseToTimestampExact(
+                formatter, str, downScaleFactor, forTimestampNTZ, 
suggestedFuncOnFail)
+            } else {
+              try {
+                if (forTimestampNTZ) {
+                  formatter.parseWithoutTimeZone(str)
+                } else {
+                  formatter.parse(str) / downScaleFactor
+                }
+              } catch {
+                case e if isParseError(e) => null
               }
-            } catch {
-              case e: DateTimeException if failOnError =>
-                throw QueryExecutionErrors.ansiDateTimeParseError(e, 
suggestedFuncOnFail)
-              case e: ParseException if failOnError =>
-                throw QueryExecutionErrors.ansiDateTimeParseError(e, 
suggestedFuncOnFail)
-              case e if isParseError(e) => null
             }
           }
       }
@@ -1344,11 +1346,7 @@ abstract class ToTimestamp
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = CodeGenerator.javaType(dataType)
-    val parseErrorBranch: String = if (failOnError) {
-      s"throw QueryExecutionErrors.ansiDateTimeParseError(e, 
\"${suggestedFuncOnFail}\");"
-    } else {
-      s"${ev.isNull} = true;"
-    }
+    val utils = classOf[DateTimeExpressionUtils].getName
     val parseMethod = if (forTimestampNTZ) {
       "parseWithoutTimeZone"
     } else {
@@ -1359,21 +1357,35 @@ abstract class ToTimestamp
     } else {
       s"/ $downScaleFactor"
     }
+    // Emits the string -> timestamp parse body. The ANSI (failOnError) branch
+    // delegates the parse and the parse-error -> ANSI error translation to
+    // DateTimeExpressionUtils.parseToTimestampExact, collapsing the inline
+    // try/catch to a single call. The non-ANSI branch keeps the inline
+    // try/catch that maps a parse failure to a null result.
+    def parseTimestampCode(formatterExpr: String, inputExpr: String): String = 
{
+      if (failOnError) {
+        s"""${ev.value} = $utils.parseToTimestampExact(
+           |    $formatterExpr, $inputExpr, ${downScaleFactor}L,
+           |    $forTimestampNTZ, "$suggestedFuncOnFail");""".stripMargin
+      } else {
+        s"""
+           |try {
+           |  ${ev.value} = $formatterExpr.$parseMethod($inputExpr) 
$downScaleCode;
+           |} catch (java.time.DateTimeException e) {
+           |  ${ev.isNull} = true;
+           |} catch (java.text.ParseException e) {
+           |  ${ev.isNull} = true;
+           |}
+           |""".stripMargin
+      }
+    }
 
     left.dataType match {
       case _: StringType => formatterOption.map { fmt =>
         val df = classOf[TimestampFormatter].getName
         val formatterName = ctx.addReferenceObj("formatter", fmt, df)
         nullSafeCodeGen(ctx, ev, (datetimeStr, _) =>
-          s"""
-             |try {
-             |  ${ev.value} = 
$formatterName.$parseMethod($datetimeStr.toString()) $downScaleCode;
-             |} catch (java.time.DateTimeException e) {
-             |  ${parseErrorBranch}
-             |} catch (java.text.ParseException e) {
-             |  ${parseErrorBranch}
-             |}
-             |""".stripMargin)
+          parseTimestampCode(formatterName, s"$datetimeStr.toString()"))
       }.getOrElse {
         val zid = ctx.addReferenceObj("zoneId", zoneId, 
classOf[ZoneId].getName)
         val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
@@ -1386,13 +1398,7 @@ abstract class ToTimestamp
              |  $zid,
              |  $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT(),
              |  true);
-             |try {
-             |  ${ev.value} = 
$timestampFormatter.$parseMethod($string.toString()) $downScaleCode;
-             |} catch (java.time.DateTimeException e) {
-             |    ${parseErrorBranch}
-             |} catch (java.text.ParseException e) {
-             |    ${parseErrorBranch}
-             |}
+             |${parseTimestampCode(timestampFormatter, s"$string.toString()")}
              |""".stripMargin)
       }
       case TimestampType | TimestampNTZType =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to