This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 999c1d74e0b [SPARK-42007][CONNECT][TESTS] Reuse 
pyspark.sql.tests.test_group test cases
999c1d74e0b is described below

commit 999c1d74e0bcd436b241474db59c61d9bb92e69a
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Jan 12 17:01:05 2023 +0900

    [SPARK-42007][CONNECT][TESTS] Reuse pyspark.sql.tests.test_group test cases
    
    ### What changes were proposed in this pull request?
    
    This PR reuses PySpark `pyspark.sql.tests.test_group` tests in Spark 
Connect that pass for now.
    
    ### Why are the changes needed?
    
    To make sure on the test coverage.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, test-only.
    
    ### How was this patch tested?
    
    Manually ran it in my local.
    
    Closes #39525 from HyukjinKwon/SPARK-42007.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 dev/sparktestsupport/modules.py                    |  1 +
 .../test_parity_group.py}                          | 25 ++++++----------------
 python/pyspark/sql/tests/test_group.py             |  6 +++++-
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 080b2ea0465..0bd82c7122d 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -520,6 +520,7 @@ pyspark_connect = Module(
         "pyspark.sql.tests.connect.test_connect_column",
         "pyspark.sql.tests.connect.test_parity_catalog",
         "pyspark.sql.tests.connect.test_parity_functions",
+        "pyspark.sql.tests.connect.test_parity_group",
         "pyspark.sql.tests.connect.test_parity_dataframe",
         "pyspark.sql.tests.connect.test_parity_readwriter",
     ],
diff --git a/python/pyspark/sql/tests/test_group.py 
b/python/pyspark/sql/tests/connect/test_parity_group.py
similarity index 55%
copy from python/pyspark/sql/tests/test_group.py
copy to python/pyspark/sql/tests/connect/test_parity_group.py
index 19e1228d250..1dba236a12d 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/connect/test_parity_group.py
@@ -15,33 +15,20 @@
 # limitations under the License.
 #
 
-from pyspark.sql import Row
-from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.sql.tests.test_group import GroupTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
-class GroupTests(ReusedSQLTestCase):
-    def test_aggregator(self):
-        df = self.df
-        g = df.groupBy()
-        self.assertEqual([99, 100], sorted(g.agg({"key": "max", "value": 
"count"}).collect()[0]))
-        self.assertEqual([Row(**{"AVG(key#0)": 49.5})], g.mean().collect())
-
-        from pyspark.sql import functions
-
-        self.assertEqual(
-            (0, "99"), tuple(g.agg(functions.first(df.key), 
functions.last(df.value)).first())
-        )
-        self.assertTrue(95 < 
g.agg(functions.approx_count_distinct(df.key)).first()[0])
-        # test deprecated countDistinct
-        self.assertEqual(100, 
g.agg(functions.countDistinct(df.value)).first()[0])
+class GroupParityTests(GroupTestsMixin, ReusedConnectTestCase):
+    pass
 
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.test_group import *  # noqa: F401
+    from pyspark.sql.tests.connect.test_parity_group import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_group.py 
b/python/pyspark/sql/tests/test_group.py
index 19e1228d250..2715571a44d 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/test_group.py
@@ -19,7 +19,7 @@ from pyspark.sql import Row
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class GroupTests(ReusedSQLTestCase):
+class GroupTestsMixin:
     def test_aggregator(self):
         df = self.df
         g = df.groupBy()
@@ -36,6 +36,10 @@ class GroupTests(ReusedSQLTestCase):
         self.assertEqual(100, 
g.agg(functions.countDistinct(df.value)).first()[0])
 
 
+class GroupTests(GroupTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_group import *  # noqa: F401


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to