This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new abe59e6a95fa [SPARK-57167][SQL] Simplify MakeTimestamp codegen under
ANSI mode
abe59e6a95fa is described below
commit abe59e6a95fad01414184dd725167df8f6af9d72
Author: Gengliang Wang <[email protected]>
AuthorDate: Sun May 31 06:56:22 2026 -0700
[SPARK-57167][SQL] Simplify MakeTimestamp codegen under ANSI mode
### What changes were proposed in this pull request?
Extend `DateTimeExpressionUtils.java` with two static helpers and route
`MakeTimestamp`'s eval and codegen paths through them:
* `makeTimestampMicros(int year, int month, int day, int hour, int min,
Decimal secAndMicros, ZoneId zoneId, boolean timestampNTZ)`: the shared,
exception-raising core. It computes the micros (including the leap-second `sec
= 60` rollover supported for PostgreSQL compatibility), throwing
`SparkDateTimeException` for an invalid fraction of second and
`DateTimeException` for an invalid year/month/day/hour/min combination.
* `makeTimestampExact(...)`: the ANSI (`failOnError = true`) wrapper. It
rethrows `SparkDateTimeException` as-is (to preserve its message) and
translates any other `DateTimeException` to `ansiDateTimeArgumentOutOfRange`.
`SparkDateTimeException` is caught first because it is itself a
`DateTimeException`.
`MakeTimestamp.toMicros` (eval) and `doGenCode` now dispatch on
`failOnError`: the ANSI path emits a single `makeTimestampExact(...)` call,
while the non-ANSI path calls `makeTimestampMicros(...)` inside the existing
inline `try/catch -> isNull` form (matching the pattern used by the
already-merged `MakeDate` / `MakeInterval` cleanups).
### Why are the changes needed?
Part of SPARK-56908 (umbrella). The ANSI branch of
`MakeTimestamp.doGenCode` previously emitted a ~22-line inline block (Decimal
floor/nanos math, `LocalDateTime.of`, leap-second handling, the timezone/NTZ
conversion, and a two-arm `try/catch` mapping to the ANSI errors) into every
generated stage that calls `make_timestamp`. Collapsing it to a single helper
call shrinks the generated Java source, helping with the JVM 64KB method /
constant-pool limits, Janino compile time, and JIT wo [...]
### Does this PR introduce _any_ user-facing change?
No. The compiled behavior is identical; only the emitted Java source text
changes.
### How was this patch tested?
```
build/sbt "catalyst/testOnly *DateExpressionsSuite"
```
75/75 pass (covers make_timestamp / make_timestamp_ntz /
make_timestamp_ltz, exercised both with and without whole-stage codegen).
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.8)
Closes #56217 from gengliangwang/spark-make-timestamp-codegen.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
(cherry picked from commit fd19369cca988c45e4d24b241afffee2456d0ebe)
Signed-off-by: Gengliang Wang <[email protected]>
---
.../expressions/DateTimeExpressionUtils.java | 67 ++++++++++++++++
.../catalyst/expressions/datetimeExpressions.scala | 88 ++++++----------------
2 files changed, 91 insertions(+), 64 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
index 0413278d0cb8..97fc2abb3255 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/DateTimeExpressionUtils.java
@@ -19,7 +19,11 @@ package org.apache.spark.sql.catalyst.expressions;
import java.time.DateTimeException;
import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import org.apache.spark.SparkDateTimeException;
+import org.apache.spark.sql.catalyst.util.DateTimeConstants;
import org.apache.spark.sql.catalyst.util.DateTimeUtils;
import org.apache.spark.sql.catalyst.util.IntervalUtils;
import org.apache.spark.sql.errors.QueryExecutionErrors;
@@ -68,4 +72,67 @@ public final class DateTimeExpressionUtils {
throw QueryExecutionErrors.arithmeticOverflowError(e.getMessage(), "",
null);
}
}
+
+ /**
+ * Builds the microsecond count for
+ * {@code MakeTimestamp(year, month, day, hour, min, secAndMicros[,
timezone])}.
+ * {@code secAndMicros} carries the whole seconds plus the microsecond
fraction
+ * (scale 6); a value of {@code 60} seconds with no fraction is accepted for
+ * PostgreSQL compatibility and rolls over to the next minute. When
+ * {@code timestampNTZ} is {@code true} the result is the local-time micros
+ * (no zone applied); otherwise {@code zoneId} is used to resolve the
instant.
+ *
+ * <p>This is the shared, exception-raising core used by both the eval and
+ * codegen paths. It throws {@link SparkDateTimeException} for an invalid
+ * fraction-of-second and {@link DateTimeException} for an invalid
+ * year/month/day/hour/min combination; callers decide how to translate
those.
+ */
+ public static long makeTimestampMicros(
+ int year, int month, int day, int hour, int min,
+ Decimal secAndMicros, ZoneId zoneId, boolean timestampNTZ) {
+ assert secAndMicros.scale() == 6 :
+ "Seconds fraction must have 6 digits for microseconds but got " +
secAndMicros.scale();
+ // 8 digits cannot overflow Int.
+ int totalMicros = (int) secAndMicros.toUnscaledLong();
+ int microsPerSecond = (int) DateTimeConstants.MICROS_PER_SECOND;
+ int nanosPerMicros = (int) DateTimeConstants.NANOS_PER_MICROS;
+ int seconds = Math.floorDiv(totalMicros, microsPerSecond);
+ int nanos = Math.floorMod(totalMicros, microsPerSecond) * nanosPerMicros;
+ LocalDateTime ldt;
+ if (seconds == 60) {
+ if (nanos == 0) {
+ // This case of sec = 60 and nanos = 0 is supported for compatibility
with PostgreSQL.
+ ldt = LocalDateTime.of(year, month, day, hour, min, 0,
0).plusMinutes(1);
+ } else {
+ throw
QueryExecutionErrors.invalidFractionOfSecondError(secAndMicros.toDouble());
+ }
+ } else {
+ ldt = LocalDateTime.of(year, month, day, hour, min, seconds, nanos);
+ }
+ if (timestampNTZ) {
+ return DateTimeUtils.localDateTimeToMicros(ldt);
+ } else {
+ return DateTimeUtils.instantToMicros(ldt.atZone(zoneId).toInstant());
+ }
+ }
+
+ /**
+ * ANSI ({@code failOnError = true}) variant of {@link
#makeTimestampMicros}: a
+ * {@link SparkDateTimeException} (e.g. an invalid fraction of second) is
+ * rethrown as-is to preserve its message, while any other
+ * {@link DateTimeException} is translated to {@code
ansiDateTimeArgumentOutOfRange}.
+ * {@code SparkDateTimeException} is caught first because it is itself a
+ * {@link DateTimeException}.
+ */
+ public static long makeTimestampExact(
+ int year, int month, int day, int hour, int min,
+ Decimal secAndMicros, ZoneId zoneId, boolean timestampNTZ) {
+ try {
+ return makeTimestampMicros(year, month, day, hour, min, secAndMicros,
zoneId, timestampNTZ);
+ } catch (SparkDateTimeException e) {
+ throw e;
+ } catch (DateTimeException e) {
+ throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);
+ }
+ }
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index a724f02cd107..3274a268e158 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -24,7 +24,7 @@ import java.util.Locale
import org.apache.commons.text.StringEscapeUtils
-import org.apache.spark.{SparkDateTimeException, SparkException,
SparkIllegalArgumentException}
+import org.apache.spark.{SparkException, SparkIllegalArgumentException}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder,
FunctionRegistry, TypeCheckResult}
import
org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch,
TypeCheckSuccess}
@@ -2937,33 +2937,17 @@ case class MakeTimestamp(
min: Int,
secAndMicros: Decimal,
zoneId: ZoneId): Any = {
- try {
- assert(secAndMicros.scale == 6,
- s"Seconds fraction must have 6 digits for microseconds but got
${secAndMicros.scale}")
- val unscaledSecFrac = secAndMicros.toUnscaledLong
- val totalMicros = unscaledSecFrac.toInt // 8 digits cannot overflow Int
- val seconds = Math.floorDiv(totalMicros, MICROS_PER_SECOND.toInt)
- val nanos = Math.floorMod(totalMicros, MICROS_PER_SECOND.toInt) *
NANOS_PER_MICROS.toInt
- val ldt = if (seconds == 60) {
- if (nanos == 0) {
- // This case of sec = 60 and nanos = 0 is supported for
compatibility with PostgreSQL
- LocalDateTime.of(year, month, day, hour, min, 0, 0).plusMinutes(1)
- } else {
- throw
QueryExecutionErrors.invalidFractionOfSecondError(secAndMicros.toDouble)
- }
- } else {
- LocalDateTime.of(year, month, day, hour, min, seconds, nanos)
- }
- if (dataType == TimestampType) {
- instantToMicros(ldt.atZone(zoneId).toInstant)
- } else {
- localDateTimeToMicros(ldt)
+ val timestampNTZ = dataType != TimestampType
+ if (failOnError) {
+ DateTimeExpressionUtils.makeTimestampExact(
+ year, month, day, hour, min, secAndMicros, zoneId, timestampNTZ)
+ } else {
+ try {
+ DateTimeExpressionUtils.makeTimestampMicros(
+ year, month, day, hour, min, secAndMicros, zoneId, timestampNTZ)
+ } catch {
+ case _: DateTimeException => null
}
- } catch {
- case e: SparkDateTimeException if failOnError => throw e
- case e: DateTimeException if failOnError =>
- throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e)
- case _: DateTimeException => null
}
}
@@ -2990,47 +2974,23 @@ case class MakeTimestamp(
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+ val utils = classOf[DateTimeExpressionUtils].getName
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
- val d = Decimal.getClass.getName.stripSuffix("$")
- val failOnErrorBranch = if (failOnError) {
- "throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);"
- } else {
- s"${ev.isNull} = true;"
- }
- val failOnSparkErrorBranch = if (failOnError) "throw e;" else
s"${ev.isNull} = true;"
+ val timestampNTZ = dataType != TimestampType
nullSafeCodeGen(ctx, ev, (year, month, day, hour, min, secAndNanos,
timezone) => {
- val zoneId = timezone.map(tz =>
s"$dtu.getZoneId(${tz}.toString())").getOrElse(zid)
- val toMicrosCode = if (dataType == TimestampType) {
- s"""
- |java.time.Instant instant = ldt.atZone($zoneId).toInstant();
- |${ev.value} = $dtu.instantToMicros(instant);
- |""".stripMargin
+ val zoneIdExpr = timezone.map(tz =>
s"$dtu.getZoneId(${tz}.toString())").getOrElse(zid)
+ if (failOnError) {
+ s"${ev.value} = $utils.makeTimestampExact(" +
+ s"$year, $month, $day, $hour, $min, $secAndNanos, $zoneIdExpr,
$timestampNTZ);"
} else {
- s"${ev.value} = $dtu.localDateTimeToMicros(ldt);"
+ s"""
+ try {
+ ${ev.value} = $utils.makeTimestampMicros(
+ $year, $month, $day, $hour, $min, $secAndNanos, $zoneIdExpr,
$timestampNTZ);
+ } catch (java.time.DateTimeException e) {
+ ${ev.isNull} = true;
+ }"""
}
- s"""
- try {
- org.apache.spark.sql.types.Decimal secFloor = $secAndNanos.floor();
- org.apache.spark.sql.types.Decimal nanosPerSec =
$d$$.MODULE$$.apply(1000000000L, 10, 0);
- int nanos =
(($secAndNanos.$$minus(secFloor)).$$times(nanosPerSec)).toInt();
- int seconds = secFloor.toInt();
- java.time.LocalDateTime ldt;
- if (seconds == 60) {
- if (nanos == 0) {
- ldt = java.time.LocalDateTime.of(
- $year, $month, $day, $hour, $min, 0, 0).plusMinutes(1);
- } else {
- throw
QueryExecutionErrors.invalidFractionOfSecondError($secAndNanos.toDouble());
- }
- } else {
- ldt = java.time.LocalDateTime.of($year, $month, $day, $hour, $min,
seconds, nanos);
- }
- $toMicrosCode
- } catch (org.apache.spark.SparkDateTimeException e) {
- $failOnSparkErrorBranch
- } catch (java.time.DateTimeException e) {
- $failOnErrorBranch
- }"""
})
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]