This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/spark-connect-swift.git
The following commit(s) were added to refs/heads/main by this push: new 119eeea [SPARK-51815] Add `Row` struct 119eeea is described below commit 119eeea430cd75246903c044eb1e288c19f319ea Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Wed Apr 16 15:59:33 2025 +0900 [SPARK-51815] Add `Row` struct ### What changes were proposed in this pull request? This PR aims to add `Row` struct and use it. ### Why are the changes needed? To make `DataFrame` APIs return `Row` instead of `[String?]` in `Swift`. ```swift - public func collect() async throws -> [[String?]] { + public func collect() async throws -> [Row] { - public func head(_ n: Int32 = 1) async throws -> [[String?]] { + public func head(_ n: Int32 = 1) async throws -> [Row] { ``` Note that `Row` is added to support general type fields, but this PR replaces the existing API's `[String?]` signature into `Row`-based signature. The detailed one-to-one mapping among other types will be handled later. ### Does this PR introduce _any_ user-facing change? Yes, but this is a change to the unreleased version. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #63 from dongjoon-hyun/SPARK-51815. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- Sources/SparkConnect/Catalog.swift | 10 +-- Sources/SparkConnect/DataFrame.swift | 20 ++--- Sources/SparkConnect/Row.swift | 90 +++++++++++++++++++++ Tests/SparkConnectTests/DataFrameTests.swift | 20 ++--- .../Resources/queries/binary.sql.answer | 1 + .../Resources/queries/binary.sql.json | 1 - .../Resources/queries/cache.sql.answer | 1 + .../Resources/queries/cache.sql.json | 1 - .../Resources/queries/clear_cache.sql.answer | 1 + .../Resources/queries/clear_cache.sql.json | 1 - .../Resources/queries/date.sql.answer | 1 + .../Resources/queries/date.sql.json | 1 - .../Resources/queries/describe_database.sql.answer | 5 ++ .../Resources/queries/describe_database.sql.json | 1 - .../Resources/queries/describe_function.sql.answer | 3 + .../Resources/queries/describe_function.sql.json | 1 - .../Resources/queries/describe_query.sql.answer | 3 + .../Resources/queries/describe_query.sql.json | 1 - .../Resources/queries/describe_table.sql.answer | 1 + .../Resources/queries/describe_table.sql.json | 1 - .../Resources/queries/explain.sql.answer | 22 +++++ .../Resources/queries/explain.sql.json | 1 - .../Resources/queries/floating.sql.answer | 1 + .../Resources/queries/floating.sql.json | 1 - .../Resources/queries/integral.sql.answer | 1 + .../Resources/queries/integral.sql.json | 1 - .../Resources/queries/pipesyntax.sql.answer | 2 + .../Resources/queries/pipesyntax.sql.json | 1 - .../Resources/queries/select.sql.answer | 1 + .../Resources/queries/select.sql.json | 1 - .../Resources/queries/show_databases.sql.answer | 1 + .../Resources/queries/show_databases.sql.json | 1 - .../Resources/queries/show_tables.sql.answer | 1 + .../Resources/queries/show_tables.sql.json | 1 - .../Resources/queries/string.sql.answer | 1 + .../Resources/queries/string.sql.json | 1 - .../Resources/queries/struct.sql.answer | 1 + .../Resources/queries/struct.sql.json | 1 - .../Resources/queries/uncache.sql.answer | 1 + .../Resources/queries/uncache.sql.json | 1 - Tests/SparkConnectTests/RowTests.swift | 94 ++++++++++++++++++++++ Tests/SparkConnectTests/SQLTests.swift | 5 +- Tests/SparkConnectTests/SparkSessionTests.swift | 2 +- 43 files changed, 260 insertions(+), 47 deletions(-) diff --git a/Sources/SparkConnect/Catalog.swift b/Sources/SparkConnect/Catalog.swift index ed37bf7..f14bff5 100644 --- a/Sources/SparkConnect/Catalog.swift +++ b/Sources/SparkConnect/Catalog.swift @@ -100,7 +100,7 @@ public actor Catalog: Sendable { catalog.catType = .currentCatalog(Spark_Connect_CurrentCatalog()) return catalog }) - return try await df.collect()[0][0]! + return try await df.collect()[0][0] as! String } /// Sets the current default catalog in this session. @@ -130,7 +130,7 @@ public actor Catalog: Sendable { return catalog }) return try await df.collect().map { - CatalogMetadata(name: $0[0]!, description: $0[1]) + try CatalogMetadata(name: $0[0] as! String, description: $0[1] as? String) } } @@ -142,7 +142,7 @@ public actor Catalog: Sendable { catalog.catType = .currentDatabase(Spark_Connect_CurrentDatabase()) return catalog }) - return try await df.collect()[0][0]! + return try await df.collect()[0][0] as! String } /// Sets the current default database in this session. @@ -173,7 +173,7 @@ public actor Catalog: Sendable { return catalog }) return try await df.collect().map { - Database(name: $0[0]!, catalog: $0[1], description: $0[2], locationUri: $0[3]!) + try Database(name: $0[0] as! String, catalog: $0[1] as? String, description: $0[2] as? String, locationUri: $0[3] as! String) } } @@ -189,7 +189,7 @@ public actor Catalog: Sendable { return catalog }) return try await df.collect().map { - Database(name: $0[0]!, catalog: $0[1], description: $0[2], locationUri: $0[3]!) + try Database(name: $0[0] as! String, catalog: $0[1] as? String, description: $0[2] as? String, locationUri: $0[3] as! String) }.first! } diff --git a/Sources/SparkConnect/DataFrame.swift b/Sources/SparkConnect/DataFrame.swift index be5c5e7..80e5692 100644 --- a/Sources/SparkConnect/DataFrame.swift +++ b/Sources/SparkConnect/DataFrame.swift @@ -197,15 +197,15 @@ public actor DataFrame: Sendable { } } - /// Execute the plan and return the result as ``[[String?]]``. - /// - Returns: ``[[String?]]`` - public func collect() async throws -> [[String?]] { + /// Execute the plan and return the result as ``[Row]``. + /// - Returns: ``[Row]`` + public func collect() async throws -> [Row] { try await execute() - var result: [[String?]] = [] + var result: [Row] = [] for batch in self.batches { for i in 0..<batch.length { - var values: [String?] = [] + var values: [Sendable?] = [] for column in batch.columns { let str = column.array as! AsString if column.data.isNull(i) { @@ -217,7 +217,7 @@ public actor DataFrame: Sendable { values.append(str.asString(i)) } } - result.append(values) + result.append(Row(valueArray: values)) } } @@ -377,15 +377,15 @@ public actor DataFrame: Sendable { /// Returns the first `n` rows. /// - Parameter n: The number of rows. (default: 1) - /// - Returns: ``[[String?]]`` - public func head(_ n: Int32 = 1) async throws -> [[String?]] { + /// - Returns: ``[Row]`` + public func head(_ n: Int32 = 1) async throws -> [Row] { return try await limit(n).collect() } /// Returns the last `n` rows. /// - Parameter n: The number of rows. - /// - Returns: ``[[String?]]`` - public func tail(_ n: Int32) async throws -> [[String?]] { + /// - Returns: ``[Row]`` + public func tail(_ n: Int32) async throws -> [Row] { let lastN = DataFrame(spark:spark, plan: SparkConnectClient.getTail(self.plan.root, n)) return try await lastN.collect() } diff --git a/Sources/SparkConnect/Row.swift b/Sources/SparkConnect/Row.swift new file mode 100644 index 0000000..0caf505 --- /dev/null +++ b/Sources/SparkConnect/Row.swift @@ -0,0 +1,90 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +import Foundation + +public struct Row: Sendable, Equatable { + let values: [Sendable?] + + public init(_ values: Sendable?...) { + self.values = values + } + + public init(valueArray: [Sendable?]) { + self.values = valueArray + } + + public static var empty: Row { + return Row() + } + + public var size: Int { return length } + + public var length: Int { return values.count } + + subscript(index: Int) -> Sendable { + get throws { + return try get(index) + } + } + + public func get(_ i: Int) throws -> Sendable { + if i < 0 || i >= self.length { + throw SparkConnectError.InvalidArgumentException + } + return values[i] + } + + public static func == (lhs: Row, rhs: Row) -> Bool { + if lhs.values.count != rhs.values.count { + return false + } + return lhs.values.elementsEqual(rhs.values) { (x, y) in + if x == nil && y == nil { + return true + } else if let a = x as? Bool, let b = y as? Bool { + return a == b + } else if let a = x as? Int, let b = y as? Int { + return a == b + } else if let a = x as? Int8, let b = y as? Int8 { + return a == b + } else if let a = x as? Int16, let b = y as? Int16 { + return a == b + } else if let a = x as? Int32, let b = y as? Int32 { + return a == b + } else if let a = x as? Int64, let b = y as? Int64 { + return a == b + } else if let a = x as? Float, let b = y as? Float { + return a == b + } else if let a = x as? Double, let b = y as? Double { + return a == b + } else if let a = x as? String, let b = y as? String { + return a == b + } else { + return false + } + } + } + + public func toString() -> String { + return "[\(self.values.map { "\($0 ?? "null")" }.joined(separator: ","))]" + } +} + +extension Row { +} diff --git a/Tests/SparkConnectTests/DataFrameTests.swift b/Tests/SparkConnectTests/DataFrameTests.swift index b9c927d..afd182c 100644 --- a/Tests/SparkConnectTests/DataFrameTests.swift +++ b/Tests/SparkConnectTests/DataFrameTests.swift @@ -249,7 +249,7 @@ struct DataFrameTests { @Test func sort() async throws { let spark = try await SparkSession.builder.getOrCreate() - let expected = (1...10).map{ [String($0)] } + let expected = Array((1...10).map{ Row(String($0)) }) #expect(try await spark.range(10, 0, -1).sort("id").collect() == expected) await spark.stop() } @@ -257,7 +257,7 @@ struct DataFrameTests { @Test func orderBy() async throws { let spark = try await SparkSession.builder.getOrCreate() - let expected = (1...10).map{ [String($0)] } + let expected = Array((1...10).map{ Row(String($0)) }) #expect(try await spark.range(10, 0, -1).orderBy("id").collect() == expected) await spark.stop() } @@ -284,7 +284,7 @@ struct DataFrameTests { #expect( try await spark.sql( "SELECT * FROM VALUES (1, true, 'abc'), (null, null, null), (3, false, 'def')" - ).collect() == [["1", "true", "abc"], [nil, nil, nil], ["3", "false", "def"]]) + ).collect() == [Row("1", "true", "abc"), Row(nil, nil, nil), Row("3", "false", "def")]) await spark.stop() } @@ -292,10 +292,10 @@ struct DataFrameTests { func head() async throws { let spark = try await SparkSession.builder.getOrCreate() #expect(try await spark.range(0).head().isEmpty) - #expect(try await spark.range(2).sort("id").head() == [["0"]]) - #expect(try await spark.range(2).sort("id").head(1) == [["0"]]) - #expect(try await spark.range(2).sort("id").head(2) == [["0"], ["1"]]) - #expect(try await spark.range(2).sort("id").head(3) == [["0"], ["1"]]) + #expect(try await spark.range(2).sort("id").head() == [Row("0")]) + #expect(try await spark.range(2).sort("id").head(1) == [Row("0")]) + #expect(try await spark.range(2).sort("id").head(2) == [Row("0"), Row("1")]) + #expect(try await spark.range(2).sort("id").head(3) == [Row("0"), Row("1")]) await spark.stop() } @@ -303,9 +303,9 @@ struct DataFrameTests { func tail() async throws { let spark = try await SparkSession.builder.getOrCreate() #expect(try await spark.range(0).tail(1).isEmpty) - #expect(try await spark.range(2).sort("id").tail(1) == [["1"]]) - #expect(try await spark.range(2).sort("id").tail(2) == [["0"], ["1"]]) - #expect(try await spark.range(2).sort("id").tail(3) == [["0"], ["1"]]) + #expect(try await spark.range(2).sort("id").tail(1) == [Row("1")]) + #expect(try await spark.range(2).sort("id").tail(2) == [Row("0"), Row("1")]) + #expect(try await spark.range(2).sort("id").tail(3) == [Row("0"), Row("1")]) await spark.stop() } diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.answer b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer new file mode 100644 index 0000000..0d42bcd --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/binary.sql.answer @@ -0,0 +1 @@ +[[61 62 63]] diff --git a/Tests/SparkConnectTests/Resources/queries/binary.sql.json b/Tests/SparkConnectTests/Resources/queries/binary.sql.json deleted file mode 100644 index 08434e6..0000000 --- a/Tests/SparkConnectTests/Resources/queries/binary.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["[61 62 63]"]] diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/cache.sql.answer @@ -0,0 +1 @@ + diff --git a/Tests/SparkConnectTests/Resources/queries/cache.sql.json b/Tests/SparkConnectTests/Resources/queries/cache.sql.json deleted file mode 100644 index fe51488..0000000 --- a/Tests/SparkConnectTests/Resources/queries/cache.sql.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.answer @@ -0,0 +1 @@ + diff --git a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json b/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json deleted file mode 100644 index fe51488..0000000 --- a/Tests/SparkConnectTests/Resources/queries/clear_cache.sql.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.answer b/Tests/SparkConnectTests/Resources/queries/date.sql.answer new file mode 100644 index 0000000..41ae9ec --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/date.sql.answer @@ -0,0 +1 @@ +[2025-03-15 00:00:00 +0000] diff --git a/Tests/SparkConnectTests/Resources/queries/date.sql.json b/Tests/SparkConnectTests/Resources/queries/date.sql.json deleted file mode 100644 index 3fda858..0000000 --- a/Tests/SparkConnectTests/Resources/queries/date.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["2025-03-15 00:00:00 +0000"]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer new file mode 100644 index 0000000..a44243f --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.answer @@ -0,0 +1,5 @@ +[Catalog Name,spark_catalog] +[Namespace Name,default] +[Comment,default database] +[Location,*] +[Owner,*] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json deleted file mode 100644 index 614a70d..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_database.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["Catalog Name","spark_catalog"],["Namespace Name","default"],["Comment","default database"],["Location","file:\/opt\/spark\/work-dir\/spark-warehouse"],["Owner","185"]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer new file mode 100644 index 0000000..3044189 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.answer @@ -0,0 +1,3 @@ +[Function: abs] +[Class: org.apache.spark.sql.catalyst.expressions.Abs] +[Usage: abs(expr) - Returns the absolute value of the numeric or interval value.] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json deleted file mode 100644 index a9a8b67..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_function.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["Function: abs"],["Class: org.apache.spark.sql.catalyst.expressions.Abs"],["Usage: abs(expr) - Returns the absolute value of the numeric or interval value."]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer new file mode 100644 index 0000000..fba8554 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.answer @@ -0,0 +1,3 @@ +[id,int,null] +[name,string,null] +[salary,double,null] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json deleted file mode 100644 index 7f3b5be..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_query.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["id","int",null],["name","string",null],["salary","double",null]] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer new file mode 100644 index 0000000..eeec974 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.answer @@ -0,0 +1 @@ +[col,int,null] diff --git a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json b/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json deleted file mode 100644 index 381060a..0000000 --- a/Tests/SparkConnectTests/Resources/queries/describe_table.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["col","int",null]] diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.answer b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer new file mode 100644 index 0000000..df0b263 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/explain.sql.answer @@ -0,0 +1,22 @@ +[== Parsed Logical Plan == +'Aggregate ['k], ['k, unresolvedalias('sum('v))] ++- SubqueryAlias t + +- LocalRelation [k#, v#] + +== Analyzed Logical Plan == +k: int, sum(v): bigint +Aggregate [k#], [k#, sum(v#) AS sum(v)#] ++- SubqueryAlias t + +- LocalRelation [k#, v#] + +== Optimized Logical Plan == +Aggregate [k#], [k#, sum(v#) AS sum(v)#] ++- LocalRelation [k#, v#] + +== Physical Plan == +AdaptiveSparkPlan isFinalPlan=false ++- HashAggregate(keys=[k#], functions=[sum(v#)], output=[k#, sum(v)#]) + +- Exchange hashpartitioning(k#, 200), ENSURE_REQUIREMENTS, [plan_id=] + +- HashAggregate(keys=[k#], functions=[partial_sum(v#)], output=[k#, sum#]) + +- LocalTableScan [k#, v#] +] diff --git a/Tests/SparkConnectTests/Resources/queries/explain.sql.json b/Tests/SparkConnectTests/Resources/queries/explain.sql.json deleted file mode 100644 index 4335a7a..0000000 --- a/Tests/SparkConnectTests/Resources/queries/explain.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["== Parsed Logical Plan ==\n'Aggregate ['k], ['k, unresolvedalias('sum('v))]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Analyzed Logical Plan ==\nk: int, sum(v): bigint\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- SubqueryAlias t\n +- LocalRelation [k#, v#]\n\n== Optimized Logical Plan ==\nAggregate [k#], [k#, sum(v#) AS sum(v)#]\n+- LocalRelation [k#, v#]\n\n== Physical Plan ==\nAdaptiveSparkPlan isFinalPlan=false\n+- HashAggregate(keys=[k#], functions=[sum(v#)], outpu [...] diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.answer b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer new file mode 100644 index 0000000..913b56c --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/floating.sql.answer @@ -0,0 +1 @@ +[1.0,-2.0,3.0,-4.0,inf,nan,inf,nan] diff --git a/Tests/SparkConnectTests/Resources/queries/floating.sql.json b/Tests/SparkConnectTests/Resources/queries/floating.sql.json deleted file mode 100644 index 8bedf9a..0000000 --- a/Tests/SparkConnectTests/Resources/queries/floating.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["1.0","-2.0","3.0","-4.0","inf","nan","inf","nan"]] diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.answer b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer new file mode 100644 index 0000000..3933c80 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/integral.sql.answer @@ -0,0 +1 @@ +[127,-128,32767,-32768,2147483647,-2147483648,9223372036854775807,-9223372036854775808] diff --git a/Tests/SparkConnectTests/Resources/queries/integral.sql.json b/Tests/SparkConnectTests/Resources/queries/integral.sql.json deleted file mode 100644 index 4c24e38..0000000 --- a/Tests/SparkConnectTests/Resources/queries/integral.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["127","-128","32767","-32768","2147483647","-2147483648","9223372036854775807","-9223372036854775808"]] diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer new file mode 100644 index 0000000..8e79aed --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.answer @@ -0,0 +1,2 @@ +[0,0] +[1,2] diff --git a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json b/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json deleted file mode 100644 index bfa3b54..0000000 --- a/Tests/SparkConnectTests/Resources/queries/pipesyntax.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["0","0"],["1","2"]] diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.answer b/Tests/SparkConnectTests/Resources/queries/select.sql.answer new file mode 100644 index 0000000..7660873 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/select.sql.answer @@ -0,0 +1 @@ +[1] diff --git a/Tests/SparkConnectTests/Resources/queries/select.sql.json b/Tests/SparkConnectTests/Resources/queries/select.sql.json deleted file mode 100644 index 0a0ab46..0000000 --- a/Tests/SparkConnectTests/Resources/queries/select.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["1"]] diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer new file mode 100644 index 0000000..ab109a1 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.answer @@ -0,0 +1 @@ +[default] diff --git a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json b/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json deleted file mode 100644 index 621d59f..0000000 --- a/Tests/SparkConnectTests/Resources/queries/show_databases.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["default"]] diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer new file mode 100644 index 0000000..9338974 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.answer @@ -0,0 +1 @@ +[,testcache,true] diff --git a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json b/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json deleted file mode 100644 index 2785318..0000000 --- a/Tests/SparkConnectTests/Resources/queries/show_tables.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["","testcache","true"]] diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.answer b/Tests/SparkConnectTests/Resources/queries/string.sql.answer new file mode 100644 index 0000000..8895994 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/string.sql.answer @@ -0,0 +1 @@ +[abc,def] diff --git a/Tests/SparkConnectTests/Resources/queries/string.sql.json b/Tests/SparkConnectTests/Resources/queries/string.sql.json deleted file mode 100644 index f5935b2..0000000 --- a/Tests/SparkConnectTests/Resources/queries/string.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["abc","def"]] diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.answer b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer new file mode 100644 index 0000000..cbbff35 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/struct.sql.answer @@ -0,0 +1 @@ +[{1},{2,{3}}] diff --git a/Tests/SparkConnectTests/Resources/queries/struct.sql.json b/Tests/SparkConnectTests/Resources/queries/struct.sql.json deleted file mode 100644 index 08ef088..0000000 --- a/Tests/SparkConnectTests/Resources/queries/struct.sql.json +++ /dev/null @@ -1 +0,0 @@ -[["{1}","{2,{3}}"]] diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Tests/SparkConnectTests/Resources/queries/uncache.sql.answer @@ -0,0 +1 @@ + diff --git a/Tests/SparkConnectTests/Resources/queries/uncache.sql.json b/Tests/SparkConnectTests/Resources/queries/uncache.sql.json deleted file mode 100644 index fe51488..0000000 --- a/Tests/SparkConnectTests/Resources/queries/uncache.sql.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Tests/SparkConnectTests/RowTests.swift b/Tests/SparkConnectTests/RowTests.swift new file mode 100644 index 0000000..3262686 --- /dev/null +++ b/Tests/SparkConnectTests/RowTests.swift @@ -0,0 +1,94 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +import Foundation +import SparkConnect +import Testing + +/// A test suite for `Row` +struct RowTests { + @Test + func empty() { + #expect(Row.empty.size == 0) + #expect(Row.empty.length == 0) + #expect(throws: SparkConnectError.InvalidArgumentException) { + try Row.empty.get(0) + } + } + + @Test + func create() { + #expect(Row(nil).size == 1) + #expect(Row(1).size == 1) + #expect(Row(1.1).size == 1) + #expect(Row("a").size == 1) + #expect(Row(nil, 1, 1.1, "a", true).size == 5) + #expect(Row(valueArray: [nil, 1, 1.1, "a", true]).size == 5) + } + + @Test + func string() async throws { + #expect(Row(nil, 1, 1.1, "a", true).toString() == "[null,1,1.1,a,true]") + } + + @Test + func get() throws { + let row = Row(1, 1.1, "a", true) + #expect(try row.get(0) as! Int == 1) + #expect(try row.get(1) as! Double == 1.1) + #expect(try row.get(2) as! String == "a") + #expect(try row.get(3) as! Bool == true) + #expect(throws: SparkConnectError.InvalidArgumentException) { + try Row.empty.get(-1) + } + } + + @Test + func compare() { + #expect(Row(nil) != Row()) + #expect(Row(nil) == Row(nil)) + + #expect(Row(1) == Row(1)) + #expect(Row(1) != Row(2)) + #expect(Row(1, 2, 3) == Row(1, 2, 3)) + #expect(Row(1, 2, 3) != Row(1, 2, 4)) + + #expect(Row(1.0) == Row(1.0)) + #expect(Row(1.0) != Row(2.0)) + + #expect(Row("a") == Row("a")) + #expect(Row("a") != Row("b")) + + #expect(Row(true) == Row(true)) + #expect(Row(true) != Row(false)) + + #expect(Row(1, "a") == Row(1, "a")) + #expect(Row(1, "a") != Row(2, "a")) + #expect(Row(1, "a") != Row(1, "b")) + + #expect(Row(0, 1, 2) == Row(valueArray: [0, 1, 2])) + + #expect(Row(0) == Row(Optional(0))) + #expect(Row(Optional(0)) == Row(Optional(0))) + + #expect([Row(1)] == [Row(1)]) + #expect([Row(1), Row(2)] == [Row(1), Row(2)]) + #expect([Row(1), Row(2)] != [Row(1), Row(3)]) + } +} diff --git a/Tests/SparkConnectTests/SQLTests.swift b/Tests/SparkConnectTests/SQLTests.swift index 997e888..7df5c99 100644 --- a/Tests/SparkConnectTests/SQLTests.swift +++ b/Tests/SparkConnectTests/SQLTests.swift @@ -78,9 +78,8 @@ struct SQLTests { print(name) let sql = try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name)"), encoding: .utf8) - let jsonData = try encoder.encode(try await spark.sql(sql).collect()) - let answer = cleanUp(String(data: jsonData, encoding: .utf8)!) - let expected = cleanUp(try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).json"), encoding: .utf8)) + let answer = cleanUp(try await spark.sql(sql).collect().map { $0.toString() }.joined(separator: "\n")) + let expected = cleanUp(try String(contentsOf: URL(fileURLWithPath: "\(path)/\(name).answer"), encoding: .utf8)) #expect(answer == expected.trimmingCharacters(in: .whitespacesAndNewlines)) } await spark.stop() diff --git a/Tests/SparkConnectTests/SparkSessionTests.swift b/Tests/SparkConnectTests/SparkSessionTests.swift index 901a683..f864b09 100644 --- a/Tests/SparkConnectTests/SparkSessionTests.swift +++ b/Tests/SparkConnectTests/SparkSessionTests.swift @@ -91,7 +91,7 @@ struct SparkSessionTests { let spark = try await SparkSession.builder.getOrCreate() #expect(try await spark.time(spark.range(1000).count) == 1000) #if !os(Linux) - #expect(try await spark.time(spark.range(1).collect) == [["0"]]) + #expect(try await spark.time(spark.range(1).collect) == [Row("0")]) try await spark.time(spark.range(10).show) #endif await spark.stop() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org