This is an automated email from the ASF dual-hosted git repository.
dongjoon-hyun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/spark-connect-swift.git
The following commit(s) were added to refs/heads/main by this push:
new e56c5ca [SPARK-57306] Move `DataFrameNaFunctions` tests to new
`DataFrameNaFunctionsTests`
e56c5ca is described below
commit e56c5ca7e1dfa2e801ecec8ee9726ef483810cf5
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Jun 7 14:11:27 2026 -0700
[SPARK-57306] Move `DataFrameNaFunctions` tests to new
`DataFrameNaFunctionsTests`
### What changes were proposed in this pull request?
This PR moves the `DataFrameNaFunctions` test cases (`naFill`, `naDrop`,
`naReplace`) out of `DataFrameTests.swift` into a new dedicated
`DataFrameNaFunctionsTests.swift` file.
### Why are the changes needed?
To improve test organization. The `na.(fill|drop|replace)` APIs are backed
by a separate `DataFrameNaFunctions` source file, so their tests belong in a
matching test file. This keeps `DataFrameTests.swift` focused and mirrors the
source layout.
### Does this PR introduce _any_ user-facing change?
No. This is a test-only refactoring with no test logic changes.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Opus 4.8
Closes #408 from dongjoon-hyun/SPARK-57306.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../DataFrameNaFunctionsTests.swift | 100 +++++++++++++++++++++
Tests/SparkConnectTests/DataFrameTests.swift | 70 ---------------
2 files changed, 100 insertions(+), 70 deletions(-)
diff --git a/Tests/SparkConnectTests/DataFrameNaFunctionsTests.swift
b/Tests/SparkConnectTests/DataFrameNaFunctionsTests.swift
new file mode 100644
index 0000000..bd5f978
--- /dev/null
+++ b/Tests/SparkConnectTests/DataFrameNaFunctionsTests.swift
@@ -0,0 +1,100 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+
+#if canImport(FoundationEssentials)
+import FoundationEssentials
+#else
+import Foundation
+#endif
+import SparkConnect
+import Testing
+
+/// A test suite for `DataFrameNaFunctions`
+@Suite(.serialized)
+struct DataFrameNaFunctionsTests {
+ @Test
+ func naFill() async throws {
+ let spark = try await SparkSession.builder.getOrCreate()
+ let df = try await spark.sql(
+ "SELECT * FROM VALUES (1, 10, 'a'), (NULL, NULL, 'b'), (3, 30, NULL) AS
T(a, b, s)")
+ // Fill all type-compatible (numeric) columns.
+ #expect(
+ try await df.na.fill(0).collect()
+ == [Row(1, 10, "a"), Row(0, 0, "b"), Row(3, 30, nil)])
+ // Fill a subset of columns.
+ #expect(
+ try await df.na.fill(0, ["a"]).collect()
+ == [Row(1, 10, "a"), Row(0, nil, "b"), Row(3, 30, nil)])
+ // Fill string columns.
+ #expect(
+ try await df.na.fill("z").collect()
+ == [Row(1, 10, "a"), Row(nil, nil, "b"), Row(3, 30, "z")])
+ // Fill per-column values.
+ #expect(
+ try await df.na.fill(["a": 0, "s": "z"]).collect()
+ == [Row(1, 10, "a"), Row(0, nil, "b"), Row(3, 30, "z")])
+ await spark.stop()
+ }
+
+ @Test
+ func naDrop() async throws {
+ let spark = try await SparkSession.builder.getOrCreate()
+ let df = try await spark.sql(
+ "SELECT * FROM VALUES (1, 10, 'a'), (NULL, NULL, 'b'), (3, 30, NULL) AS
T(a, b, s)")
+ // Drop rows containing any null value (default).
+ #expect(try await df.na.drop().collect() == [Row(1, 10, "a")])
+ // Drop rows only when every value is null.
+ #expect(
+ try await df.na.drop(how: "all").collect()
+ == [Row(1, 10, "a"), Row(nil, nil, "b"), Row(3, 30, nil)])
+ // Keep rows with at least 2 non-null values.
+ #expect(
+ try await df.na.drop(minNonNulls: 2).collect()
+ == [Row(1, 10, "a"), Row(3, 30, nil)])
+ // Consider only a subset of columns.
+ #expect(
+ try await df.na.drop(how: "any", ["s"]).collect()
+ == [Row(1, 10, "a"), Row(nil, nil, "b")])
+ await spark.stop()
+ }
+
+ @Test
+ func naReplace() async throws {
+ let spark = try await SparkSession.builder.getOrCreate()
+ let df = try await spark.sql("SELECT * FROM VALUES (1, 'a'), (2, 'b'), (3,
'a') AS T(n, s)")
+ // Replace string values in a single column.
+ #expect(
+ try await df.na.replace("s", ["a": "z"]).collect()
+ == [Row(1, "z"), Row(2, "b"), Row(3, "z")])
+ // Replace several string values across the given columns.
+ #expect(
+ try await df.na.replace(["s"], ["a": "z", "b": "y"]).collect()
+ == [Row(1, "z"), Row(2, "y"), Row(3, "z")])
+ // `*` considers all type-compatible columns.
+ #expect(
+ try await df.na.replace("*", ["a": "z"]).collect()
+ == [Row(1, "z"), Row(2, "b"), Row(3, "z")])
+ // Replace numeric (double) values.
+ let df2 = try await spark.sql("SELECT * FROM VALUES (1.0D), (2.0D), (1.0D)
AS T(d)")
+ #expect(
+ try await df2.na.replace("d", [1.0: 9.0]).collect()
+ == [Row(9.0), Row(2.0), Row(9.0)])
+ await spark.stop()
+ }
+}
diff --git a/Tests/SparkConnectTests/DataFrameTests.swift
b/Tests/SparkConnectTests/DataFrameTests.swift
index bdacb8a..139c0ed 100644
--- a/Tests/SparkConnectTests/DataFrameTests.swift
+++ b/Tests/SparkConnectTests/DataFrameTests.swift
@@ -309,76 +309,6 @@ struct DataFrameTests {
await spark.stop()
}
- @Test
- func naFill() async throws {
- let spark = try await SparkSession.builder.getOrCreate()
- let df = try await spark.sql(
- "SELECT * FROM VALUES (1, 10, 'a'), (NULL, NULL, 'b'), (3, 30, NULL) AS
T(a, b, s)")
- // Fill all type-compatible (numeric) columns.
- #expect(
- try await df.na.fill(0).collect()
- == [Row(1, 10, "a"), Row(0, 0, "b"), Row(3, 30, nil)])
- // Fill a subset of columns.
- #expect(
- try await df.na.fill(0, ["a"]).collect()
- == [Row(1, 10, "a"), Row(0, nil, "b"), Row(3, 30, nil)])
- // Fill string columns.
- #expect(
- try await df.na.fill("z").collect()
- == [Row(1, 10, "a"), Row(nil, nil, "b"), Row(3, 30, "z")])
- // Fill per-column values.
- #expect(
- try await df.na.fill(["a": 0, "s": "z"]).collect()
- == [Row(1, 10, "a"), Row(0, nil, "b"), Row(3, 30, "z")])
- await spark.stop()
- }
-
- @Test
- func naDrop() async throws {
- let spark = try await SparkSession.builder.getOrCreate()
- let df = try await spark.sql(
- "SELECT * FROM VALUES (1, 10, 'a'), (NULL, NULL, 'b'), (3, 30, NULL) AS
T(a, b, s)")
- // Drop rows containing any null value (default).
- #expect(try await df.na.drop().collect() == [Row(1, 10, "a")])
- // Drop rows only when every value is null.
- #expect(
- try await df.na.drop(how: "all").collect()
- == [Row(1, 10, "a"), Row(nil, nil, "b"), Row(3, 30, nil)])
- // Keep rows with at least 2 non-null values.
- #expect(
- try await df.na.drop(minNonNulls: 2).collect()
- == [Row(1, 10, "a"), Row(3, 30, nil)])
- // Consider only a subset of columns.
- #expect(
- try await df.na.drop(how: "any", ["s"]).collect()
- == [Row(1, 10, "a"), Row(nil, nil, "b")])
- await spark.stop()
- }
-
- @Test
- func naReplace() async throws {
- let spark = try await SparkSession.builder.getOrCreate()
- let df = try await spark.sql("SELECT * FROM VALUES (1, 'a'), (2, 'b'), (3,
'a') AS T(n, s)")
- // Replace string values in a single column.
- #expect(
- try await df.na.replace("s", ["a": "z"]).collect()
- == [Row(1, "z"), Row(2, "b"), Row(3, "z")])
- // Replace several string values across the given columns.
- #expect(
- try await df.na.replace(["s"], ["a": "z", "b": "y"]).collect()
- == [Row(1, "z"), Row(2, "y"), Row(3, "z")])
- // `*` considers all type-compatible columns.
- #expect(
- try await df.na.replace("*", ["a": "z"]).collect()
- == [Row(1, "z"), Row(2, "b"), Row(3, "z")])
- // Replace numeric (double) values.
- let df2 = try await spark.sql("SELECT * FROM VALUES (1.0D), (2.0D), (1.0D)
AS T(d)")
- #expect(
- try await df2.na.replace("d", [1.0: 9.0]).collect()
- == [Row(9.0), Row(2.0), Row(9.0)])
- await spark.stop()
- }
-
@Test
func filter() async throws {
let spark = try await SparkSession.builder.getOrCreate()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]