This is an automated email from the ASF dual-hosted git repository.
MaxGekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 637803e98345 [SPARK-57257][SQL] Support nanosecond-precision
timestamps in Hive results
637803e98345 is described below
commit 637803e983456b9c6455c5ac4a55ed6720665ff7
Author: Maxim Gekk <[email protected]>
AuthorDate: Fri Jun 5 21:09:50 2026 +0200
[SPARK-57257][SQL] Support nanosecond-precision timestamps in Hive results
### What changes were proposed in this pull request?
This PR modifies `HiveResult` to support the nanosecond-precision timestamp
types `TIMESTAMP_LTZ(p)` (`TimestampLTZNanosType`) and `TIMESTAMP_NTZ(p)`
(`TimestampNTZNanosType`), `p` in [7, 9]. Two cases are added to
`HiveResult.toHiveStringDefault`, mirroring the existing microsecond timestamp
cases:
- `(i: Instant, _: TimestampLTZNanosType)` -> rendered in the session time
zone.
- `(l: LocalDateTime, _: TimestampNTZNanosType)` -> rendered
zone-independently.
The external collected values are `Instant` (LTZ) and `LocalDateTime`
(NTZ); they are converted to the physical `TimestampNanosVal` at the column
precision and formatted with the nanosecond-aware `TimestampFormatter`
(`formatNanos` / `formatWithoutTimeZoneNanos`, SPARK-57162), flooring sub-`p`
digits and trimming trailing zeros. This is the same rendering used by casting
these types to string (SPARK-57256), so Hive output stays consistent.
### Why are the changes needed?
Before the change, formatting a nanosecond timestamp column through
`HiveResult` (e.g. end-to-end SQL / golden-file tests, spark-sql CLI, Thrift
server output) hits the catch-all match and fails with a `MatchError`,
analogous to the `TimeType` issue fixed in SPARK-51517:
```
scala.MatchError
(2020-01-01T00:00:00.123456789Z, TimestampLTZNanosType(9)) (of class
scala.Tuple2)
```
### Does this PR introduce _any_ user-facing change?
Yes. It fixes the error above. After the change, nanosecond timestamp
values are rendered as proper strings in Hive results (only reachable when
`spark.sql.timestampNanosTypes.enabled=true`).
### How was this patch tested?
- New cases in `HiveResultSuite` covering `TIMESTAMP_LTZ(p)` /
`TIMESTAMP_NTZ(p)` for `p` in [7, 9]: precision-driven fraction width,
trailing-zero trimming, nanosWithinMicro 0 and 999, LTZ session-zone rendering
vs. zone-independent NTZ, and nested (array/map/struct) values.
- New golden-file end-to-end tests `timestamp-ltz-nanos.sql` and
`timestamp-ntz-nanos.sql` (as SPARK-51517 added `time.sql`), disabled in
`ThriftServerQueryTestSuite`.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor 1.7.0
Closes #56320 from MaxGekk/nanos-hiveresult.
Authored-by: Maxim Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../apache/spark/sql/execution/HiveResult.scala | 10 ++
.../analyzer-results/timestamp-ltz-nanos.sql.out | 118 ++++++++++++++++++
.../analyzer-results/timestamp-ntz-nanos.sql.out | 118 ++++++++++++++++++
.../sql-tests/inputs/timestamp-ltz-nanos.sql | 32 +++++
.../sql-tests/inputs/timestamp-ntz-nanos.sql | 32 +++++
.../sql-tests/results/timestamp-ltz-nanos.sql.out | 135 +++++++++++++++++++++
.../sql-tests/results/timestamp-ntz-nanos.sql.out | 135 +++++++++++++++++++++
.../spark/sql/execution/HiveResultSuite.scala | 91 +++++++++++++-
.../thriftserver/ThriftServerQueryTestSuite.scala | 5 +-
9 files changed, 672 insertions(+), 4 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 671451087435..25170da800ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -131,6 +131,16 @@ object HiveResult extends SQLConfHelper {
case (t: Timestamp, TimestampType) => formatters.timestamp.format(t)
case (i: Instant, TimestampType) => formatters.timestamp.format(i)
case (l: LocalDateTime, TimestampNTZType) => formatters.timestamp.format(l)
+ // Nanosecond-precision timestamps. The external values are `Instant`
(LTZ) and
+ // `LocalDateTime` (NTZ); convert to the physical `TimestampNanosVal` at
the column precision
+ // and render via the same formatter methods as the cast-to-string path
(SPARK-57256), so the
+ // output stays consistent. LTZ uses the session zone; NTZ is
zone-independent.
+ case (i: Instant, t: TimestampLTZNanosType) =>
+ formatters.timestamp.formatNanos(
+ DateTimeUtils.instantToTimestampNanos(i, t.precision), t.precision)
+ case (l: LocalDateTime, t: TimestampNTZNanosType) =>
+ formatters.timestamp.formatWithoutTimeZoneNanos(
+ DateTimeUtils.localDateTimeToTimestampNanos(l, t.precision),
t.precision)
case (bin: Array[Byte], BinaryType) => binaryFormatter(bin)
case (decimal: java.math.BigDecimal, DecimalType()) =>
decimal.toPlainString
case (n, _: NumericType) => n.toString
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
new file mode 100644
index 000000000000..6f896ab8014c
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz-nanos.sql.out
@@ -0,0 +1,118 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(7)) AS
CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(8)) AS
CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(8))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9)) AS
CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.999999000 as timestamp_ltz(9)) AS
CAST(2020-01-01 00:00:00.999999000 AS TIMESTAMP_LTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000999 as timestamp_ltz(9)) AS
CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_LTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(9)) AS
CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(8)) AS
CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(8))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ltz(7)) AS
CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(7))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9))
+-- !query analysis
+Project [cast(1960-01-01 00:00:00.000000001 as timestamp_ltz(9)) AS
CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_LTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7))
+-- !query analysis
+Project [cast(1960-01-01 00:00:00.123456789 as timestamp_ltz(7)) AS
CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)))
+-- !query analysis
+Project [array(cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS
array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)))
+-- !query analysis
+Project [map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ltz(9))) AS
map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS
timestamp_ltz(9)))
+-- !query analysis
+Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as
timestamp_ltz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(NULL AS timestamp_ltz(9))
+-- !query analysis
+Project [cast(null as timestamp_ltz(9)) AS CAST(NULL AS TIMESTAMP_LTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(CAST(NULL AS timestamp_ltz(9)))
+-- !query analysis
+Project [array(cast(null as timestamp_ltz(9))) AS array(CAST(NULL AS
TIMESTAMP_LTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT map('k', CAST(NULL AS timestamp_ltz(9)))
+-- !query analysis
+Project [map(k, cast(null as timestamp_ltz(9))) AS map(k, CAST(NULL AS
TIMESTAMP_LTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9)))
+-- !query analysis
+Project [named_struct(f, cast(null as timestamp_ltz(9))) AS named_struct(f,
CAST(NULL AS TIMESTAMP_LTZ(9)))#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out
new file mode 100644
index 000000000000..94570f139e84
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz-nanos.sql.out
@@ -0,0 +1,118 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(7)) AS
CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(8)) AS
CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(8))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9)) AS
CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.999999000 as timestamp_ntz(9)) AS
CAST(2020-01-01 00:00:00.999999000 AS TIMESTAMP_NTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000999 as timestamp_ntz(9)) AS
CAST(2020-01-01 00:00:00.000000999 AS TIMESTAMP_NTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(9)) AS
CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(8)) AS
CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(8))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7))
+-- !query analysis
+Project [cast(2020-01-01 00:00:00.000000001 as timestamp_ntz(7)) AS
CAST(2020-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(7))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9))
+-- !query analysis
+Project [cast(1960-01-01 00:00:00.000000001 as timestamp_ntz(9)) AS
CAST(1960-01-01 00:00:00.000000001 AS TIMESTAMP_NTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7))
+-- !query analysis
+Project [cast(1960-01-01 00:00:00.123456789 as timestamp_ntz(7)) AS
CAST(1960-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(7))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)))
+-- !query analysis
+Project [array(cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS
array(CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)))
+-- !query analysis
+Project [map(k, cast(2020-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS
map(k, CAST(2020-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS
timestamp_ntz(9)))
+-- !query analysis
+Project [named_struct(f, cast(2020-01-01 00:00:00.123456789 as
timestamp_ntz(9))) AS named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(NULL AS timestamp_ntz(9))
+-- !query analysis
+Project [cast(null as timestamp_ntz(9)) AS CAST(NULL AS TIMESTAMP_NTZ(9))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(CAST(NULL AS timestamp_ntz(9)))
+-- !query analysis
+Project [array(cast(null as timestamp_ntz(9))) AS array(CAST(NULL AS
TIMESTAMP_NTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT map('k', CAST(NULL AS timestamp_ntz(9)))
+-- !query analysis
+Project [map(k, cast(null as timestamp_ntz(9))) AS map(k, CAST(NULL AS
TIMESTAMP_NTZ(9)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9)))
+-- !query analysis
+Project [named_struct(f, cast(null as timestamp_ntz(9))) AS named_struct(f,
CAST(NULL AS TIMESTAMP_NTZ(9)))#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
new file mode 100644
index 000000000000..f7c36256a6be
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz-nanos.sql
@@ -0,0 +1,32 @@
+-- Nanosecond-precision TIMESTAMP_LTZ(p) (p in [7, 9]) in Hive results
(SPARK-57257).
+-- LTZ values are rendered in the session time zone.
+
+--SET spark.sql.timestampNanosTypes.enabled=true
+--SET spark.sql.session.timeZone=America/Los_Angeles
+
+-- Precision-driven fraction width: sub-p digits are floored.
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7));
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8));
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9));
+
+-- Trailing-zero trimming: an all-zero fraction renders as no fraction at all.
+SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9));
+SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9));
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9));
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8));
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7));
+
+-- Pre-1970 values exercise the negative-epoch path.
+SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9));
+SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7));
+
+-- Nested values (array / map / struct).
+SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)));
+SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)));
+SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS
timestamp_ltz(9)));
+
+-- NULL values (top-level and nested).
+SELECT CAST(NULL AS timestamp_ltz(9));
+SELECT array(CAST(NULL AS timestamp_ltz(9)));
+SELECT map('k', CAST(NULL AS timestamp_ltz(9)));
+SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9)));
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql
b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql
new file mode 100644
index 000000000000..c1db88eb409b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz-nanos.sql
@@ -0,0 +1,32 @@
+-- Nanosecond-precision TIMESTAMP_NTZ(p) (p in [7, 9]) in Hive results
(SPARK-57257).
+-- NTZ values are zone-independent.
+
+--SET spark.sql.timestampNanosTypes.enabled=true
+--SET spark.sql.session.timeZone=America/Los_Angeles
+
+-- Precision-driven fraction width: sub-p digits are floored.
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7));
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8));
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9));
+
+-- Trailing-zero trimming: an all-zero fraction renders as no fraction at all.
+SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9));
+SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9));
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9));
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8));
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7));
+
+-- Pre-1970 values exercise the negative-epoch path.
+SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9));
+SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7));
+
+-- Nested values (array / map / struct).
+SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)));
+SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)));
+SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS
timestamp_ntz(9)));
+
+-- NULL values (top-level and nested).
+SELECT CAST(NULL AS timestamp_ntz(9));
+SELECT array(CAST(NULL AS timestamp_ntz(9)));
+SELECT map('k', CAST(NULL AS timestamp_ntz(9)));
+SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9)));
diff --git
a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
new file mode 100644
index 000000000000..75171edef611
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz-nanos.sql.out
@@ -0,0 +1,135 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(7))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(7)):timestamp_ltz(7)>
+-- !query output
+2020-01-01 00:00:00.1234567
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(8))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(8)):timestamp_ltz(8)>
+-- !query output
+2020-01-01 00:00:00.12345678
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(9)):timestamp_ltz(9)>
+-- !query output
+2020-01-01 00:00:00.123456789
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ltz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.999999000 AS
TIMESTAMP_LTZ(9)):timestamp_ltz(9)>
+-- !query output
+2020-01-01 00:00:00.999999
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ltz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000999 AS
TIMESTAMP_LTZ(9)):timestamp_ltz(9)>
+-- !query output
+2020-01-01 00:00:00.000000999
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000001 AS
TIMESTAMP_LTZ(9)):timestamp_ltz(9)>
+-- !query output
+2020-01-01 00:00:00.000000001
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(8))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000001 AS
TIMESTAMP_LTZ(8)):timestamp_ltz(8)>
+-- !query output
+2020-01-01 00:00:00
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ltz(7))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000001 AS
TIMESTAMP_LTZ(7)):timestamp_ltz(7)>
+-- !query output
+2020-01-01 00:00:00
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ltz(9))
+-- !query schema
+struct<CAST(1960-01-01 00:00:00.000000001 AS
TIMESTAMP_LTZ(9)):timestamp_ltz(9)>
+-- !query output
+1960-01-01 00:00:00.000000001
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ltz(7))
+-- !query schema
+struct<CAST(1960-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(7)):timestamp_ltz(7)>
+-- !query output
+1960-01-01 00:00:00.1234567
+
+
+-- !query
+SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)))
+-- !query schema
+struct<array(CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(9))):array<timestamp_ltz(9)>>
+-- !query output
+[2020-01-01 00:00:00.123456789]
+
+
+-- !query
+SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ltz(9)))
+-- !query schema
+struct<map(k, CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(9))):map<string,timestamp_ltz(9)>>
+-- !query output
+{"k":2020-01-01 00:00:00.123456789}
+
+
+-- !query
+SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS
timestamp_ltz(9)))
+-- !query schema
+struct<named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_LTZ(9))):struct<f:timestamp_ltz(9)>>
+-- !query output
+{"f":2020-01-01 00:00:00.123456789}
+
+
+-- !query
+SELECT CAST(NULL AS timestamp_ltz(9))
+-- !query schema
+struct<CAST(NULL AS TIMESTAMP_LTZ(9)):timestamp_ltz(9)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT array(CAST(NULL AS timestamp_ltz(9)))
+-- !query schema
+struct<array(CAST(NULL AS TIMESTAMP_LTZ(9))):array<timestamp_ltz(9)>>
+-- !query output
+[null]
+
+
+-- !query
+SELECT map('k', CAST(NULL AS timestamp_ltz(9)))
+-- !query schema
+struct<map(k, CAST(NULL AS TIMESTAMP_LTZ(9))):map<string,timestamp_ltz(9)>>
+-- !query output
+{"k":null}
+
+
+-- !query
+SELECT named_struct('f', CAST(NULL AS timestamp_ltz(9)))
+-- !query schema
+struct<named_struct(f, CAST(NULL AS
TIMESTAMP_LTZ(9))):struct<f:timestamp_ltz(9)>>
+-- !query output
+{"f":null}
diff --git
a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out
b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out
new file mode 100644
index 000000000000..39542fdd121e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz-nanos.sql.out
@@ -0,0 +1,135 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(7))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(7)):timestamp_ntz(7)>
+-- !query output
+2020-01-01 00:00:00.1234567
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(8))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(8)):timestamp_ntz(8)>
+-- !query output
+2020-01-01 00:00:00.12345678
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(9)):timestamp_ntz(9)>
+-- !query output
+2020-01-01 00:00:00.123456789
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.999999000' AS timestamp_ntz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.999999000 AS
TIMESTAMP_NTZ(9)):timestamp_ntz(9)>
+-- !query output
+2020-01-01 00:00:00.999999
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000999' AS timestamp_ntz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000999 AS
TIMESTAMP_NTZ(9)):timestamp_ntz(9)>
+-- !query output
+2020-01-01 00:00:00.000000999
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(9))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000001 AS
TIMESTAMP_NTZ(9)):timestamp_ntz(9)>
+-- !query output
+2020-01-01 00:00:00.000000001
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(8))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000001 AS
TIMESTAMP_NTZ(8)):timestamp_ntz(8)>
+-- !query output
+2020-01-01 00:00:00
+
+
+-- !query
+SELECT CAST('2020-01-01 00:00:00.000000001' AS timestamp_ntz(7))
+-- !query schema
+struct<CAST(2020-01-01 00:00:00.000000001 AS
TIMESTAMP_NTZ(7)):timestamp_ntz(7)>
+-- !query output
+2020-01-01 00:00:00
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.000000001' AS timestamp_ntz(9))
+-- !query schema
+struct<CAST(1960-01-01 00:00:00.000000001 AS
TIMESTAMP_NTZ(9)):timestamp_ntz(9)>
+-- !query output
+1960-01-01 00:00:00.000000001
+
+
+-- !query
+SELECT CAST('1960-01-01 00:00:00.123456789' AS timestamp_ntz(7))
+-- !query schema
+struct<CAST(1960-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(7)):timestamp_ntz(7)>
+-- !query output
+1960-01-01 00:00:00.1234567
+
+
+-- !query
+SELECT array(CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)))
+-- !query schema
+struct<array(CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(9))):array<timestamp_ntz(9)>>
+-- !query output
+[2020-01-01 00:00:00.123456789]
+
+
+-- !query
+SELECT map('k', CAST('2020-01-01 00:00:00.123456789' AS timestamp_ntz(9)))
+-- !query schema
+struct<map(k, CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(9))):map<string,timestamp_ntz(9)>>
+-- !query output
+{"k":2020-01-01 00:00:00.123456789}
+
+
+-- !query
+SELECT named_struct('f', CAST('2020-01-01 00:00:00.123456789' AS
timestamp_ntz(9)))
+-- !query schema
+struct<named_struct(f, CAST(2020-01-01 00:00:00.123456789 AS
TIMESTAMP_NTZ(9))):struct<f:timestamp_ntz(9)>>
+-- !query output
+{"f":2020-01-01 00:00:00.123456789}
+
+
+-- !query
+SELECT CAST(NULL AS timestamp_ntz(9))
+-- !query schema
+struct<CAST(NULL AS TIMESTAMP_NTZ(9)):timestamp_ntz(9)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT array(CAST(NULL AS timestamp_ntz(9)))
+-- !query schema
+struct<array(CAST(NULL AS TIMESTAMP_NTZ(9))):array<timestamp_ntz(9)>>
+-- !query output
+[null]
+
+
+-- !query
+SELECT map('k', CAST(NULL AS timestamp_ntz(9)))
+-- !query schema
+struct<map(k, CAST(NULL AS TIMESTAMP_NTZ(9))):map<string,timestamp_ntz(9)>>
+-- !query output
+{"k":null}
+
+
+-- !query
+SELECT named_struct('f', CAST(NULL AS timestamp_ntz(9)))
+-- !query schema
+struct<named_struct(f, CAST(NULL AS
TIMESTAMP_NTZ(9))):struct<f:timestamp_ntz(9)>>
+-- !query output
+{"f":null}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
index 251ed064af91..fd1b8eaf20cf 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -17,15 +17,17 @@
package org.apache.spark.sql.execution
-import java.time.{Duration, Period, Year}
+import java.time.{Duration, LocalDateTime, Period, Year, ZoneOffset}
-import org.apache.spark.sql.YearUDT
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.{Row, YearUDT}
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog
import org.apache.spark.sql.execution.HiveResult._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT,
SharedSparkSession}
-import org.apache.spark.sql.types.{YearMonthIntervalType,
YearMonthIntervalType => YM}
+import org.apache.spark.sql.types.{StructField, StructType,
TimestampLTZNanosType, TimestampNTZNanosType, YearMonthIntervalType,
YearMonthIntervalType => YM}
class HiveResultSuite extends SharedSparkSession {
@@ -76,6 +78,89 @@ class HiveResultSuite extends SharedSparkSession {
assert(result2 == timestamps.map(x => s"[$x]"))
}
+ test("SPARK-57257: nanosecond timestamp formatting in hive result") {
+ // Each input fraction maps to the expected rendered fraction at precision
7, 8, 9. Sub-`p`
+ // digits are floored and trailing zeros trimmed, so an all-zero fraction
renders as no
+ // fraction at all (e.g. ".000000001" at p=7/8). The flooring/trimming is
independent of the
+ // epoch sign, so the pre-1970 base (negative epoch micros + positive
nanosWithinMicro)
+ // shares the same expected fractions.
+ val bases = Seq("2020-01-01 00:00:00", "1960-01-01 00:00:00")
+ val cases = Seq(
+ ".123456789" -> Seq(".1234567", ".12345678", ".123456789"),
+ ".999999999" -> Seq(".9999999", ".99999999", ".999999999"),
+ ".999999000" -> Seq(".999999", ".999999", ".999999"),
+ ".000000001" -> Seq("", "", ".000000001"),
+ ".000000999" -> Seq(".0000009", ".00000099", ".000000999"))
+ // Render LTZ in a fixed zone so the wall-clock fields round-trip from the
cast.
+ withSQLConf(
+ SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true",
+ SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+ Seq(7, 8, 9).zipWithIndex.foreach { case (p, idx) =>
+ bases.foreach { base =>
+ cases.foreach { case (frac, expectedByPrecision) =>
+ val input = base + frac
+ val expected = base + expectedByPrecision(idx)
+ Seq("timestamp_ltz", "timestamp_ntz").foreach { typeName =>
+ val df = spark.sql(s"SELECT CAST('$input' AS $typeName($p)) AS
b")
+ assert(hiveResultString(df.queryExecution.executedPlan) ===
Seq(expected),
+ s"type = $typeName($p), input = $input")
+ val nested = spark.sql(s"SELECT array(CAST('$input' AS
$typeName($p))) AS b")
+ assert(hiveResultString(nested.queryExecution.executedPlan) ===
Seq(s"[$expected]"),
+ s"nested type = $typeName($p), input = $input")
+ }
+ }
+ }
+ }
+
+ // NULL values: handled by the generic `(null, _)` branch in
`toHiveString` (before the
+ // type-specific cases), so the path is type-agnostic. Verify top-level
and nested NULLs.
+ Seq("timestamp_ltz(9)", "timestamp_ntz(9)").foreach { typeName =>
+ val nullCast = s"CAST(NULL AS $typeName)"
+ val topLevel = spark.sql(s"SELECT $nullCast AS b")
+ assert(hiveResultString(topLevel.queryExecution.executedPlan) ===
Seq("NULL"),
+ s"top-level NULL of $typeName")
+ val inArray = spark.sql(s"SELECT array($nullCast) AS b")
+ assert(hiveResultString(inArray.queryExecution.executedPlan) ===
Seq("[null]"),
+ s"array NULL of $typeName")
+ val inMap = spark.sql(s"SELECT map('k', $nullCast) AS b")
+ assert(hiveResultString(inMap.queryExecution.executedPlan) ===
Seq("{\"k\":null}"),
+ s"map NULL of $typeName")
+ val inStruct = spark.sql(s"SELECT named_struct('f', $nullCast) AS b")
+ assert(hiveResultString(inStruct.queryExecution.executedPlan) ===
Seq("{\"f\":null}"),
+ s"struct NULL of $typeName")
+ }
+ }
+ }
+
+ test("SPARK-57257: LTZ nanos timestamp honors session time zone, NTZ is
zone-independent") {
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ // A fixed instant and the matching local date-time at UTC.
+ val ldt = LocalDateTime.of(2020, 1, 1, 12, 0, 0, 123456789)
+ val instant = ldt.toInstant(ZoneOffset.UTC)
+ val ltzDf = spark.createDataFrame(
+ Seq(Row(instant)).asJava,
+ StructType(Seq(StructField("b", TimestampLTZNanosType(9)))))
+ val ntzDf = spark.createDataFrame(
+ Seq(Row(ldt)).asJava,
+ StructType(Seq(StructField("b", TimestampNTZNanosType(9)))))
+
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+ assert(hiveResultString(ltzDf.queryExecution.executedPlan) ===
+ Seq("2020-01-01 12:00:00.123456789"))
+ assert(hiveResultString(ntzDf.queryExecution.executedPlan) ===
+ Seq("2020-01-01 12:00:00.123456789"))
+ }
+ withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles")
{
+ // LTZ shifts with the session zone (UTC-08:00 on this date) ...
+ assert(hiveResultString(ltzDf.queryExecution.executedPlan) ===
+ Seq("2020-01-01 04:00:00.123456789"))
+ // ... while NTZ stays the same wall-clock value.
+ assert(hiveResultString(ntzDf.queryExecution.executedPlan) ===
+ Seq("2020-01-01 12:00:00.123456789"))
+ }
+ }
+ }
+
test("toHiveString correctly handles UDTs") {
val point = new ExamplePoint(50.0, 50.0)
val tpe = new ExamplePointUDT()
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 401b3c126d85..f9d333479bbd 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -118,7 +118,10 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite
with SharedThriftServ
"pipe-operators.sql",
// VARIANT type
"variant/named-function-arguments.sql",
- "variant-field-extractions.sql"
+ "variant-field-extractions.sql",
+ // SPARK-57257: nanosecond-precision timestamp types are not yet mapped by
the Thrift Server
+ "timestamp-ltz-nanos.sql",
+ "timestamp-ntz-nanos.sql"
)
override def runQueries(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]