This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/spark-connect-swift.git
The following commit(s) were added to refs/heads/main by this push:
new fc90ae5 [SPARK-52524] Support `Timestamp` type
fc90ae5 is described below
commit fc90ae5c8850a445a4493edb9111617a312dab92
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Jun 18 06:13:41 2025 -0700
[SPARK-52524] Support `Timestamp` type
### What changes were proposed in this pull request?
This PR aims to support `Timestamp` type in `Row` and `DataFrame.collect`.
### Why are the changes needed?
Previously, `Timestamp` is supported inside `Spark Connect Server`-side
operation only, e.g. `DataFrame.show`.
### Does this PR introduce _any_ user-facing change?
No, this is an additional type.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #202 from dongjoon-hyun/SPARK-52524.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
Sources/SparkConnect/DataFrame.swift | 16 ++++++++++++++++
Sources/SparkConnect/Row.swift | 2 ++
Tests/SparkConnectTests/DataFrameTests.swift | 13 +++++++++++++
3 files changed, 31 insertions(+)
diff --git a/Sources/SparkConnect/DataFrame.swift
b/Sources/SparkConnect/DataFrame.swift
index 2f590de..bbd98bd 100644
--- a/Sources/SparkConnect/DataFrame.swift
+++ b/Sources/SparkConnect/DataFrame.swift
@@ -429,6 +429,22 @@ public actor DataFrame: Sendable {
values.append(array.asAny(i) as? Decimal)
case .primitiveInfo(.date32):
values.append(array.asAny(i) as! Date)
+ case .timeInfo(.timestamp):
+ let timestampType = column.data.type as! ArrowTypeTimestamp
+ assert(timestampType.timezone == "Etc/UTC")
+ let timestamp = array.asAny(i) as! Int64
+ let timeInterval =
+ switch timestampType.unit {
+ case .seconds:
+ TimeInterval(timestamp)
+ case .milliseconds:
+ TimeInterval(timestamp) / 1_000
+ case .microseconds:
+ TimeInterval(timestamp) / 1_000_000
+ case .nanoseconds:
+ TimeInterval(timestamp) / 1_000_000_000
+ }
+ values.append(Date(timeIntervalSince1970: timeInterval))
case ArrowType.ArrowBinary:
values.append((array as! AsString).asString(i).utf8)
case .complexInfo(.strct):
diff --git a/Sources/SparkConnect/Row.swift b/Sources/SparkConnect/Row.swift
index 249ccae..56418d2 100644
--- a/Sources/SparkConnect/Row.swift
+++ b/Sources/SparkConnect/Row.swift
@@ -71,6 +71,8 @@ public struct Row: Sendable, Equatable {
return a == b
} else if let a = x as? Decimal, let b = y as? Decimal {
return a == b
+ } else if let a = x as? Date, let b = y as? Date {
+ return a == b
} else if let a = x as? String, let b = y as? String {
return a == b
} else {
diff --git a/Tests/SparkConnectTests/DataFrameTests.swift
b/Tests/SparkConnectTests/DataFrameTests.swift
index 2edd5f8..62b117a 100644
--- a/Tests/SparkConnectTests/DataFrameTests.swift
+++ b/Tests/SparkConnectTests/DataFrameTests.swift
@@ -932,6 +932,19 @@ struct DataFrameTests {
#expect(try await df.collect() == expected)
await spark.stop()
}
+
+ @Test
+ func timestamp() async throws {
+ let spark = try await SparkSession.builder.getOrCreate()
+ let df = try await spark.sql(
+ "SELECT TIMESTAMP '2025-05-01 16:23:40', TIMESTAMP '2025-05-01
16:23:40.123456'")
+ let expected = [
+ Row(
+ Date(timeIntervalSince1970: 1746116620.0),
Date(timeIntervalSince1970: 1746116620.123456))
+ ]
+ #expect(try await df.collect() == expected)
+ await spark.stop()
+ }
#endif
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]