This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 98de7eb1f3a
[SPARK-42011][SPARK-42012][CONNECT][PYTHON][TESTS][FOLLOW-UP] Enable csv, orc
tests in connect/test_parity_datasources.py
98de7eb1f3a is described below
commit 98de7eb1f3af82a3dc2cd67065c863a04793f50c
Author: Sandeep Singh <[email protected]>
AuthorDate: Mon Jan 16 09:28:25 2023 +0900
[SPARK-42011][SPARK-42012][CONNECT][PYTHON][TESTS][FOLLOW-UP] Enable csv,
orc tests in connect/test_parity_datasources.py
### What changes were proposed in this pull request?
Enable csv, orc tests in connect/test_parity_datasources.py
### Why are the changes needed?
for test coverage
### Does this PR introduce _any_ user-facing change?
no, test-only
### How was this patch tested?
enabled UT
Closes #39581 from techaddict/SPARK-42011-followup.
Authored-by: Sandeep Singh <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/column.py | 3 ---
python/pyspark/sql/connect/readwriter.py | 4 ----
python/pyspark/sql/dataframe.py | 2 +-
.../pyspark/sql/tests/connect/test_parity_datasources.py | 16 ----------------
4 files changed, 1 insertion(+), 24 deletions(-)
diff --git a/python/pyspark/sql/connect/column.py
b/python/pyspark/sql/connect/column.py
index d26283571cc..d2c334ae67f 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -439,9 +439,6 @@ def _test() -> None:
.getOrCreate()
)
- # Spark Connect has a different string representation for Column.
- del pyspark.sql.connect.column.Column.getItem.__doc__
-
# TODO(SPARK-41772): Enable
pyspark.sql.connect.column.Column.withField doctest
del pyspark.sql.connect.column.Column.withField.__doc__
diff --git a/python/pyspark/sql/connect/readwriter.py
b/python/pyspark/sql/connect/readwriter.py
index 8e8f4476799..4643da317d0 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -616,12 +616,8 @@ def _test() -> None:
globs = pyspark.sql.connect.readwriter.__dict__.copy()
# TODO(SPARK-41817): Support reading with schema
- del pyspark.sql.connect.readwriter.DataFrameReader.load.__doc__
del pyspark.sql.connect.readwriter.DataFrameReader.option.__doc__
- del pyspark.sql.connect.readwriter.DataFrameReader.text.__doc__
- del pyspark.sql.connect.readwriter.DataFrameWriter.csv.__doc__
del pyspark.sql.connect.readwriter.DataFrameWriter.option.__doc__
- del pyspark.sql.connect.readwriter.DataFrameWriter.text.__doc__
del pyspark.sql.connect.readwriter.DataFrameWriter.bucketBy.__doc__
del pyspark.sql.connect.readwriter.DataFrameWriter.sortBy.__doc__
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 09a5e9d0b07..f45b0d70e99 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -506,7 +506,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
--------
>>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")],
schema=["age", "name"])
>>> type(df.write)
- <class 'pyspark.sql.readwriter.DataFrameWriter'>
+ <class '...readwriter.DataFrameWriter'>
Write the DataFrame as a table.
diff --git a/python/pyspark/sql/tests/connect/test_parity_datasources.py
b/python/pyspark/sql/tests/connect/test_parity_datasources.py
index db1bba8de10..9d4ab151542 100644
--- a/python/pyspark/sql/tests/connect/test_parity_datasources.py
+++ b/python/pyspark/sql/tests/connect/test_parity_datasources.py
@@ -22,12 +22,6 @@ from pyspark.testing.connectutils import
ReusedConnectTestCase
class DataSourcesParityTests(DataSourcesTestsMixin, ReusedConnectTestCase):
-
- # TODO(SPARK-42011): Implement DataFrameReader.csv
- @unittest.skip("Fails in Spark Connect, should enable.")
- def test_checking_csv_header(self):
- super().test_checking_csv_header()
-
@unittest.skip("Spark Connect does not support RDD but the tests depend on
them.")
def test_csv_sampling_ratio(self):
super().test_csv_sampling_ratio()
@@ -36,16 +30,6 @@ class DataSourcesParityTests(DataSourcesTestsMixin,
ReusedConnectTestCase):
def test_json_sampling_ratio(self):
super().test_json_sampling_ratio()
- # TODO(SPARK-42011): Implement DataFrameReader.csv
- @unittest.skip("Fails in Spark Connect, should enable.")
- def test_multiline_csv(self):
- super().test_multiline_csv()
-
- # TODO(SPARK-42012): Implement DataFrameReader.orc
- @unittest.skip("Fails in Spark Connect, should enable.")
- def test_read_multiple_orc_file(self):
- super().test_read_multiple_orc_file()
-
if __name__ == "__main__":
import unittest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]