This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b4c84c917aee [SPARK-54544][PYTHON] Enable flake8 F811 check
b4c84c917aee is described below

commit b4c84c917aee673d26162a92bd95afc743ea7684
Author: Tian Gao <[email protected]>
AuthorDate: Tue Dec 2 08:53:25 2025 +0900

    [SPARK-54544][PYTHON] Enable flake8 F811 check
    
    ### What changes were proposed in this pull request?
    
    Enabled flake8 [F811](https://www.flake8rules.com/rules/F811.html) check on 
our repo and fixed reported issues.
    
    ### Why are the changes needed?
    
    I know upgrading lint system is a pain, but we should not just put it aside 
forever. Our pinned `flake8` version is not even usable on Python3.12+.
    
    During this "lint fix", I actually discovered a few real bugs - most of 
them are silently disabled unittests because there is a test method that has 
the same name (probably due to copy/paste). I think this result supported the 
idea that we should take lint more seriously.
    
    About `functions.log`, we got it wrong. It's not because `overload` does 
not work properly - it's because we have two `log` function in that gigantic 
file. The former one is 
[dead](https://app.codecov.io/gh/apache/spark/blob/master/python%2Fpyspark%2Fsql%2Ffunctions%2Fbuiltin.py#L3111).
 I just removed that one.
    
    Again, I really think we should upgrade our lint system. I'm trying to do 
it slowly - piece by piece, so that people's daily workflow is not impacted too 
much.
    
    I hope we can eventually move to a place where all linters are updated and 
people can be more confident about their changes.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    `flake8` test on major directories. CI should give more a comprehensive 
result.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #53253 from gaogaotiantian/flake8-f811.
    
    Authored-by: Tian Gao <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 dev/tox.ini                                        |  3 --
 python/pyspark/logger/tests/test_logger.py         |  1 -
 python/pyspark/ml/util.py                          |  1 -
 python/pyspark/mllib/classification.py             |  1 -
 python/pyspark/mllib/feature.py                    |  1 -
 python/pyspark/mllib/regression.py                 |  2 +-
 python/pyspark/mllib/tree.py                       |  2 +-
 python/pyspark/mllib/util.py                       |  3 +-
 .../pandas/tests/diff_frames_ops/test_corrwith.py  |  1 -
 .../pandas/tests/diff_frames_ops/test_dot_frame.py |  1 -
 .../pyspark/pandas/tests/indexes/test_category.py  |  1 -
 .../pyspark/pandas/tests/indexes/test_timedelta.py |  1 -
 .../pandas/tests/series/test_string_ops_adv.py     |  1 -
 python/pyspark/pandas/tests/test_namespace.py      |  1 -
 python/pyspark/pandas/tests/test_numpy_compat.py   |  1 -
 python/pyspark/pandas/tests/test_utils.py          |  2 +-
 .../pipelines/tests/test_block_connect_access.py   |  1 -
 python/pyspark/sql/connect/dataframe.py            |  1 -
 python/pyspark/sql/connect/group.py                |  1 -
 python/pyspark/sql/connect/udf.py                  |  1 -
 python/pyspark/sql/functions/builtin.py            | 52 +---------------------
 python/pyspark/sql/tests/arrow/test_arrow.py       |  6 ---
 python/pyspark/sql/tests/arrow/test_arrow_udf.py   | 14 +++---
 .../sql/tests/arrow/test_arrow_udf_scalar.py       |  2 +-
 .../connect/streaming/test_parity_listener.py      |  1 -
 .../connect/test_parity_dataframe_query_context.py |  1 -
 .../sql/tests/connect/test_parity_geographytype.py |  1 -
 .../sql/tests/connect/test_parity_geometrytype.py  |  1 -
 .../sql/tests/connect/test_parity_observation.py   |  1 -
 .../sql/tests/connect/test_parity_readwriter.py    |  1 -
 .../pyspark/sql/tests/connect/test_parity_udf.py   |  4 --
 .../test_transform_with_state_state_variable.py    |  1 -
 python/pyspark/sql/tests/pandas/test_pandas_udf.py | 16 +++----
 .../sql/tests/pandas/test_pandas_udf_scalar.py     |  1 -
 python/pyspark/sql/tests/test_python_datasource.py |  9 ++--
 python/pyspark/sql/tests/test_udtf.py              |  1 -
 python/pyspark/sql/tests/test_utils.py             | 49 +-------------------
 .../tests/udf_type_tests/test_udf_return_types.py  |  1 -
 python/pyspark/worker.py                           |  2 -
 39 files changed, 26 insertions(+), 165 deletions(-)

diff --git a/dev/tox.ini b/dev/tox.ini
index 8a8e03ec9be3..721266ec2bfa 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -18,9 +18,6 @@
 ignore =
     E203, # Skip as black formatter adds a whitespace around ':'.
     E402, # Module top level import is disabled for optional import check, etc.
-    # 1. Type hints with def are treated as redefinition (e.g., functions.log).
-    # 2. Some are used for testing.
-    F811,
     # There are too many instances to fix. Ignored for now.
     W503,
     W504,
diff --git a/python/pyspark/logger/tests/test_logger.py 
b/python/pyspark/logger/tests/test_logger.py
index 96d707eb34a2..69b28d2308a7 100644
--- a/python/pyspark/logger/tests/test_logger.py
+++ b/python/pyspark/logger/tests/test_logger.py
@@ -215,7 +215,6 @@ class LoggerTests(LoggerTestsMixin, ReusedSQLTestCase):
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.logger.tests.test_logger import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 9072e88ca29f..06759bc25269 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -50,7 +50,6 @@ if TYPE_CHECKING:
     from py4j.java_gateway import JavaGateway, JavaObject
     from pyspark.ml._typing import PipelineStage
     from pyspark.ml.base import Params
-    from pyspark.ml.wrapper import JavaWrapper
     from pyspark.core.context import SparkContext
     from pyspark.sql import DataFrame
     from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
diff --git a/python/pyspark/mllib/classification.py 
b/python/pyspark/mllib/classification.py
index bf8fd04dc283..84b59eecbe9d 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -34,7 +34,6 @@ from pyspark.mllib.regression import (
 )
 from pyspark.mllib.util import Saveable, Loader, inherit_doc
 from pyspark.mllib.linalg import Vector
-from pyspark.mllib.regression import LabeledPoint
 
 if TYPE_CHECKING:
     from pyspark.mllib._typing import VectorLike
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 915a55595cb5..0d988dcb065a 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -36,7 +36,6 @@ from py4j.java_collections import JavaMap
 
 if TYPE_CHECKING:
     from pyspark.mllib._typing import VectorLike
-    from py4j.java_collections import JavaMap
 
 __all__ = [
     "Normalizer",
diff --git a/python/pyspark/mllib/regression.py 
b/python/pyspark/mllib/regression.py
index 87f05bc0979b..b4bd41492706 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -32,7 +32,7 @@ from typing import (
 
 import numpy as np
 
-from pyspark import RDD, since
+from pyspark import since
 from pyspark.streaming.dstream import DStream
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
 from pyspark.mllib.linalg import _convert_to_vector
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index b24bced3ced6..9e945d8a1b94 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -18,7 +18,7 @@
 import sys
 import random
 
-from pyspark import RDD, since
+from pyspark import since
 from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper
 from pyspark.mllib.linalg import _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 5572d9ca8555..caa2c9338a95 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -20,7 +20,7 @@ from functools import reduce
 
 import numpy as np
 
-from pyspark import SparkContext, since
+from pyspark import since
 from pyspark.mllib.common import callMLlibFunc, inherit_doc
 from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
 from pyspark.sql import DataFrame
@@ -28,7 +28,6 @@ from typing import Generic, Iterable, List, Optional, Tuple, 
Type, TypeVar, cast
 from pyspark.core.context import SparkContext
 from pyspark.mllib.linalg import Vector
 from pyspark.core.rdd import RDD
-from pyspark.sql.dataframe import DataFrame
 
 T = TypeVar("T")
 L = TypeVar("L", bound="Loader")
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_corrwith.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_corrwith.py
index 1bd274b45a74..5421de9f3371 100644
--- a/python/pyspark/pandas/tests/diff_frames_ops/test_corrwith.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_corrwith.py
@@ -124,7 +124,6 @@ class DiffFramesCorrWithTests(
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.diff_frames_ops.test_corrwith import *  # noqa: 
F401
 
     try:
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py
index 7a94e1858f09..14292b6c3280 100644
--- a/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py
@@ -91,7 +91,6 @@ class DiffFramesDotFrameTests(DiffFramesDotFrameMixin, 
PandasOnSparkTestCase, SQ
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.diff_frames_ops.test_dot_frame import *  # noqa: 
F401
 
     try:
diff --git a/python/pyspark/pandas/tests/indexes/test_category.py 
b/python/pyspark/pandas/tests/indexes/test_category.py
index acd80378333e..7d44bd426da7 100644
--- a/python/pyspark/pandas/tests/indexes/test_category.py
+++ b/python/pyspark/pandas/tests/indexes/test_category.py
@@ -402,7 +402,6 @@ class CategoricalIndexTests(CategoricalIndexTestsMixin, 
PandasOnSparkTestCase, T
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.indexes.test_category import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/pandas/tests/indexes/test_timedelta.py 
b/python/pyspark/pandas/tests/indexes/test_timedelta.py
index 400bb0e2a365..037d784840ae 100644
--- a/python/pyspark/pandas/tests/indexes/test_timedelta.py
+++ b/python/pyspark/pandas/tests/indexes/test_timedelta.py
@@ -115,7 +115,6 @@ class TimedeltaIndexTests(
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.indexes.test_timedelta import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/pandas/tests/series/test_string_ops_adv.py 
b/python/pyspark/pandas/tests/series/test_string_ops_adv.py
index b0e4c69a35ea..f03167d2ad72 100644
--- a/python/pyspark/pandas/tests/series/test_string_ops_adv.py
+++ b/python/pyspark/pandas/tests/series/test_string_ops_adv.py
@@ -226,7 +226,6 @@ class SeriesStringOpsAdvTests(
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.series.test_string_ops_adv import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/pandas/tests/test_namespace.py 
b/python/pyspark/pandas/tests/test_namespace.py
index 141c6873d7f5..145ac2dcb9f3 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -683,7 +683,6 @@ class NamespaceTests(NamespaceTestsMixin, 
PandasOnSparkTestCase, SQLTestUtils):
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.test_namespace import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/pandas/tests/test_numpy_compat.py 
b/python/pyspark/pandas/tests/test_numpy_compat.py
index f754ee08a783..d961a433e181 100644
--- a/python/pyspark/pandas/tests/test_numpy_compat.py
+++ b/python/pyspark/pandas/tests/test_numpy_compat.py
@@ -198,7 +198,6 @@ class NumPyCompatTests(
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.pandas.tests.test_numpy_compat import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/pandas/tests/test_utils.py 
b/python/pyspark/pandas/tests/test_utils.py
index 6286df8e5469..89efb85dd61a 100644
--- a/python/pyspark/pandas/tests/test_utils.py
+++ b/python/pyspark/pandas/tests/test_utils.py
@@ -181,7 +181,7 @@ class UtilsTestsMixin:
             },
         )
 
-    def test_series_error_assert_pandas_equal(self):
+    def test_series_error_assert_pandas_almost_equal_2(self):
         series1 = pd.Series([1, 2, 3])
         series2 = pd.Series([4, 5, 6])
 
diff --git a/python/pyspark/pipelines/tests/test_block_connect_access.py 
b/python/pyspark/pipelines/tests/test_block_connect_access.py
index 1ad1881b7127..60688f30bfb9 100644
--- a/python/pyspark/pipelines/tests/test_block_connect_access.py
+++ b/python/pyspark/pipelines/tests/test_block_connect_access.py
@@ -17,7 +17,6 @@
 import unittest
 
 from pyspark.errors import PySparkException
-from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.connectutils import (
     ReusedConnectTestCase,
     should_test_connect,
diff --git a/python/pyspark/sql/connect/dataframe.py 
b/python/pyspark/sql/connect/dataframe.py
index 65dea5cf1a57..8de8663693f7 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -19,7 +19,6 @@
 from pyspark.errors.exceptions.base import (
     SessionNotSameException,
     PySparkIndexError,
-    PySparkAttributeError,
 )
 from pyspark.resource import ResourceProfile
 from pyspark.sql.connect.logging import logger
diff --git a/python/pyspark/sql/connect/group.py 
b/python/pyspark/sql/connect/group.py
index d540e721f149..afaf286a7e9c 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -55,7 +55,6 @@ if TYPE_CHECKING:
         PandasGroupedMapFunctionWithState,
     )
     from pyspark.sql.connect.dataframe import DataFrame
-    from pyspark.sql.types import StructType
 
 
 class GroupedData:
diff --git a/python/pyspark/sql/connect/udf.py 
b/python/pyspark/sql/connect/udf.py
index 3bc2f87e4bd3..64506731badb 100644
--- a/python/pyspark/sql/connect/udf.py
+++ b/python/pyspark/sql/connect/udf.py
@@ -50,7 +50,6 @@ if TYPE_CHECKING:
         UserDefinedFunctionLike,
     )
     from pyspark.sql.connect.session import SparkSession
-    from pyspark.sql.types import StringType
 
 
 def _create_py_udf(
diff --git a/python/pyspark/sql/functions/builtin.py 
b/python/pyspark/sql/functions/builtin.py
index 17eda26f76e1..04b800be2372 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -3061,56 +3061,6 @@ def floor(col: "ColumnOrName", scale: 
Optional[Union[Column, int]] = None) -> Co
         return _invoke_function_over_columns("floor", col, scale)  # type: 
ignore[arg-type]
 
 
-@_try_remote_functions
-def log(col: "ColumnOrName") -> Column:
-    """
-    Computes the natural logarithm of the given value.
-
-    .. versionadded:: 1.4.0
-
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
-
-    Parameters
-    ----------
-    col : :class:`~pyspark.sql.Column` or column name
-        column to calculate natural logarithm for.
-
-    Returns
-    -------
-    :class:`~pyspark.sql.Column`
-        natural logarithm of the given value.
-
-    Examples
-    --------
-    Example 1: Compute the natural logarithm of E
-
-    >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.log(sf.e())).show()
-    +-------+
-    |ln(E())|
-    +-------+
-    |    1.0|
-    +-------+
-
-    Example 2: Compute the natural logarithm of invalid values
-
-    >>> from pyspark.sql import functions as sf
-    >>> spark.sql(
-    ...     "SELECT * FROM VALUES (-1), (0), (FLOAT('NAN')), (NULL) AS 
TAB(value)"
-    ... ).select("*", sf.log("value")).show()
-    +-----+---------+
-    |value|ln(value)|
-    +-----+---------+
-    | -1.0|     NULL|
-    |  0.0|     NULL|
-    |  NaN|      NaN|
-    | NULL|     NULL|
-    +-----+---------+
-    """
-    return _invoke_function_over_columns("log", col)
-
-
 @_try_remote_functions
 def log10(col: "ColumnOrName") -> Column:
     """
@@ -8342,7 +8292,7 @@ def when(condition: Column, value: Any) -> Column:
     return _invoke_function("when", condition._jc, v)
 
 
-@overload  # type: ignore[no-redef]
+@overload
 def log(arg1: "ColumnOrName") -> Column:
     ...
 
diff --git a/python/pyspark/sql/tests/arrow/test_arrow.py 
b/python/pyspark/sql/tests/arrow/test_arrow.py
index 79d8bf77d9d5..19e579cb6778 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow.py
@@ -61,7 +61,6 @@ from pyspark.testing.sqlutils import (
 from pyspark.errors import ArithmeticException, PySparkTypeError, 
UnsupportedOperationException
 from pyspark.loose_version import LooseVersion
 from pyspark.util import is_remote_only
-from pyspark.loose_version import LooseVersion
 
 if have_pandas:
     import pandas as pd
@@ -1009,11 +1008,6 @@ class ArrowTestsMixin:
                             expected[r][e] == result[r][e], f"{expected[r][e]} 
== {result[r][e]}"
                         )
 
-    def test_createDataFrame_pandas_with_struct_type(self):
-        for arrow_enabled in [True, False]:
-            with self.subTest(arrow_enabled=arrow_enabled):
-                
self.check_createDataFrame_pandas_with_struct_type(arrow_enabled)
-
     def test_createDataFrame_arrow_with_struct_type_nulls(self):
         t = pa.table(
             {
diff --git a/python/pyspark/sql/tests/arrow/test_arrow_udf.py 
b/python/pyspark/sql/tests/arrow/test_arrow_udf.py
index b56ed75acad9..51190f6d2a3d 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow_udf.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_udf.py
@@ -164,13 +164,13 @@ class ArrowUDFTestsMixin:
             with self.assertRaises(ParseException):
 
                 @arrow_udf("blah")
-                def foo(x):
+                def _(x):
                     return x
 
             with self.assertRaises(PySparkTypeError) as pe:
 
                 @arrow_udf(returnType="double", 
functionType=PandasUDFType.SCALAR)
-                def foo(df):
+                def _(df):
                     return df
 
             self.check_error(
@@ -185,7 +185,7 @@ class ArrowUDFTestsMixin:
             with self.assertRaises(PySparkTypeError) as pe:
 
                 @arrow_udf(functionType=ArrowUDFType.SCALAR)
-                def foo(x):
+                def _(x):
                     return x
 
             self.check_error(
@@ -197,7 +197,7 @@ class ArrowUDFTestsMixin:
             with self.assertRaises(PySparkTypeError) as pe:
 
                 @arrow_udf("double", 100)
-                def foo(x):
+                def _(x):
                     return x
 
             self.check_error(
@@ -209,7 +209,7 @@ class ArrowUDFTestsMixin:
             with self.assertRaises(PySparkTypeError) as pe:
 
                 @arrow_udf(returnType=PandasUDFType.GROUPED_MAP)
-                def foo(df):
+                def _(df):
                     return df
 
             self.check_error(
@@ -224,13 +224,13 @@ class ArrowUDFTestsMixin:
             with self.assertRaisesRegex(ValueError, "0-arg 
arrow_udfs.*not.*supported"):
 
                 @arrow_udf(LongType(), ArrowUDFType.SCALAR)
-                def zero_with_type():
+                def _():
                     return 1
 
             with self.assertRaisesRegex(ValueError, "0-arg 
arrow_udfs.*not.*supported"):
 
                 @arrow_udf(LongType(), ArrowUDFType.SCALAR_ITER)
-                def zero_with_type():
+                def _():
                     yield 1
                     yield 2
 
diff --git a/python/pyspark/sql/tests/arrow/test_arrow_udf_scalar.py 
b/python/pyspark/sql/tests/arrow/test_arrow_udf_scalar.py
index 3bd00d2cc921..a0d805a3ab27 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow_udf_scalar.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_udf_scalar.py
@@ -27,7 +27,7 @@ from typing import Iterator, Tuple
 from pyspark.util import PythonEvalType
 
 from pyspark.sql.functions import arrow_udf, ArrowUDFType
-from pyspark.sql import Row, functions as F
+from pyspark.sql import functions as F
 from pyspark.sql.types import (
     IntegerType,
     ByteType,
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py 
b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index f05f982d2d14..cc750f2b3439 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -313,7 +313,6 @@ class 
StreamingListenerParityTests(StreamingListenerTestsMixin, ReusedConnectTes
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.streaming.test_parity_listener import *  # 
noqa: F401
 
     try:
diff --git 
a/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py 
b/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py
index 59107363571e..b83a2d5f198b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py
+++ b/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py
@@ -26,7 +26,6 @@ class 
DataFrameQueryContextParityTests(DataFrameQueryContextTestsMixin, ReusedCo
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.test_parity_dataframe_query_context import 
*  # noqa: F401
 
     try:
diff --git a/python/pyspark/sql/tests/connect/test_parity_geographytype.py 
b/python/pyspark/sql/tests/connect/test_parity_geographytype.py
index 501bbed20ff1..a85edbc69c2a 100644
--- a/python/pyspark/sql/tests/connect/test_parity_geographytype.py
+++ b/python/pyspark/sql/tests/connect/test_parity_geographytype.py
@@ -26,7 +26,6 @@ class GeographyTypeParityTest(GeographyTypeTestMixin, 
ReusedConnectTestCase):
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.test_parity_geographytype import *  # noqa: 
F401
 
     try:
diff --git a/python/pyspark/sql/tests/connect/test_parity_geometrytype.py 
b/python/pyspark/sql/tests/connect/test_parity_geometrytype.py
index b95321b3c61b..640ead4c3244 100644
--- a/python/pyspark/sql/tests/connect/test_parity_geometrytype.py
+++ b/python/pyspark/sql/tests/connect/test_parity_geometrytype.py
@@ -26,7 +26,6 @@ class GeometryTypeParityTest(GeometryTypeTestMixin, 
ReusedConnectTestCase):
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.test_parity_geometrytype import *  # noqa: 
F401
 
     try:
diff --git a/python/pyspark/sql/tests/connect/test_parity_observation.py 
b/python/pyspark/sql/tests/connect/test_parity_observation.py
index e16053d5a082..ebe0ed78e5c4 100644
--- a/python/pyspark/sql/tests/connect/test_parity_observation.py
+++ b/python/pyspark/sql/tests/connect/test_parity_observation.py
@@ -29,7 +29,6 @@ class DataFrameObservationParityTests(
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.test_parity_observation import *  # noqa: 
F401
 
     try:
diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py 
b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
index f83f3edbfa78..a95cea3e7345 100644
--- a/python/pyspark/sql/tests/connect/test_parity_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
@@ -37,7 +37,6 @@ class ReadwriterV2ParityTests(ReadwriterV2TestsMixin, 
ReusedConnectTestCase):
 
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.test_parity_readwriter import *  # noqa: 
F401
 
     try:
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py 
b/python/pyspark/sql/tests/connect/test_parity_udf.py
index 5507f8e9f289..4b027f6fca37 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -56,10 +56,6 @@ class UDFParityTests(BaseUDFTestsMixin, 
ReusedConnectTestCase):
     def test_nondeterministic_udf3(self):
         super().test_nondeterministic_udf3()
 
-    @unittest.skip("Spark Connect doesn't support RDD but the test depends on 
it.")
-    def test_worker_original_stdin_closed(self):
-        super().test_worker_original_stdin_closed()
-
     @unittest.skip("Spark Connect does not support SQLContext but the test 
depends on it.")
     def test_udf_on_sql_context(self):
         super().test_udf_on_sql_context()
diff --git 
a/python/pyspark/sql/tests/pandas/streaming/test_transform_with_state_state_variable.py
 
b/python/pyspark/sql/tests/pandas/streaming/test_transform_with_state_state_variable.py
index 7f9eb8979c08..c5172ff6af3f 100644
--- 
a/python/pyspark/sql/tests/pandas/streaming/test_transform_with_state_state_variable.py
+++ 
b/python/pyspark/sql/tests/pandas/streaming/test_transform_with_state_state_variable.py
@@ -26,7 +26,6 @@ from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
 )
 
-from pyspark.testing.sqlutils import ReusedSQLTestCase
 from 
pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state_state_variable
 import (
     TransformWithStateStateVariableTestsMixin,
 )
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf.py 
b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
index 017698f318d5..6b397bc63626 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -170,13 +170,13 @@ class PandasUDFTestsMixin:
             with self.assertRaises(ParseException):
 
                 @pandas_udf("blah")
-                def foo(x):
+                def _(x):
                     return x
 
             with self.assertRaises(PySparkTypeError) as pe:
 
                 @pandas_udf(returnType="double", 
functionType=PandasUDFType.GROUPED_MAP)
-                def foo(df):
+                def _(df):
                     return df
 
             self.check_error(
@@ -193,14 +193,14 @@ class PandasUDFTestsMixin:
             with self.assertRaisesRegex(ValueError, "Invalid function"):
 
                 @pandas_udf(returnType="k int, v double", 
functionType=PandasUDFType.GROUPED_MAP)
-                def foo(k, v, w):
+                def _(k, v, w):
                     return k
 
     def check_udf_wrong_arg(self):
         with self.assertRaises(PySparkTypeError) as pe:
 
             @pandas_udf(functionType=PandasUDFType.SCALAR)
-            def foo(x):
+            def _(x):
                 return x
 
         self.check_error(
@@ -212,7 +212,7 @@ class PandasUDFTestsMixin:
         with self.assertRaises(PySparkTypeError) as pe:
 
             @pandas_udf("double", 100)
-            def foo(x):
+            def _(x):
                 return x
 
         self.check_error(
@@ -227,20 +227,20 @@ class PandasUDFTestsMixin:
         with self.assertRaisesRegex(ValueError, "0-arg 
pandas_udfs.*not.*supported"):
 
             @pandas_udf(LongType(), PandasUDFType.SCALAR)
-            def zero_with_type():
+            def _():
                 return 1
 
         with self.assertRaisesRegex(ValueError, "0-arg 
pandas_udfs.*not.*supported"):
 
             @pandas_udf(LongType(), PandasUDFType.SCALAR_ITER)
-            def zero_with_type():
+            def _():
                 yield 1
                 yield 2
 
         with self.assertRaises(PySparkTypeError) as pe:
 
             @pandas_udf(returnType=PandasUDFType.GROUPED_MAP)
-            def foo(df):
+            def _(df):
                 return df
 
         self.check_error(
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py 
b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
index b936a9240e52..88f2168f131b 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
@@ -53,7 +53,6 @@ from pyspark.sql.types import (
     StringType,
     ArrayType,
     StructField,
-    Row,
     TimestampType,
     MapType,
     DateType,
diff --git a/python/pyspark/sql/tests/test_python_datasource.py 
b/python/pyspark/sql/tests/test_python_datasource.py
index e5171876656c..9a4782e7322f 100644
--- a/python/pyspark/sql/tests/test_python_datasource.py
+++ b/python/pyspark/sql/tests/test_python_datasource.py
@@ -20,7 +20,6 @@ import tempfile
 import unittest
 import logging
 import json
-import os
 from dataclasses import dataclass
 from datetime import datetime
 from decimal import Decimal
@@ -852,9 +851,9 @@ class BasePythonDataSourceTestsMixin:
                             return "x string"
 
                         def reader(self, schema):
-                            return TestReader()
+                            return TestReader2()
 
-                    class TestReader(DataSourceReader):
+                    class TestReader2(DataSourceReader):
                         def read(self, partition):
                             ctypes.string_at(0)
                             yield "x",
@@ -894,9 +893,9 @@ class BasePythonDataSourceTestsMixin:
                             return "test"
 
                         def writer(self, schema, overwrite):
-                            return TestWriter()
+                            return TestWriter2()
 
-                    class TestWriter(DataSourceWriter):
+                    class TestWriter2(DataSourceWriter):
                         def write(self, iterator):
                             return WriterCommitMessage()
 
diff --git a/python/pyspark/sql/tests/test_udtf.py 
b/python/pyspark/sql/tests/test_udtf.py
index 5ded5aa67b4e..389df5b5a6cf 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -40,7 +40,6 @@ from pyspark.sql.functions import (
     array,
     col,
     create_map,
-    array,
     lit,
     named_struct,
     udf,
diff --git a/python/pyspark/sql/tests/test_utils.py 
b/python/pyspark/sql/tests/test_utils.py
index 08d360a19e81..5f85c1886b4b 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -923,7 +923,7 @@ class UtilsTestsMixin:
         assertDataFrameEqual(df1, df2, checkRowOrder=True)
 
     @unittest.skipIf(not have_pandas or not have_pyarrow, "no pandas or 
pyarrow dependency")
-    def test_assert_equal_exact_pandas_on_spark_df(self):
+    def test_assert_equal_exact_pandas_on_spark_df_no_order(self):
         import pyspark.pandas as ps
 
         df1 = ps.DataFrame(data=[10, 20, 30], columns=["Numbers"])
@@ -1543,53 +1543,6 @@ class UtilsTestsMixin:
             messageParameters={"error_msg": error_msg},
         )
 
-    def test_list_row_unequal_schema(self):
-        df1 = self.spark.createDataFrame(
-            data=[
-                (1, 3000),
-                (2, 1000),
-                (3, 10),
-            ],
-            schema=["id", "amount"],
-        )
-
-        list_of_rows = [Row(id=1, amount=300), Row(id=2, amount=100), 
Row(id=3, amount=10)]
-
-        rows_str1 = ""
-        rows_str2 = ""
-
-        # count different rows
-        for r1, r2 in list(zip_longest(df1, list_of_rows)):
-            rows_str1 += str(r1) + "\n"
-            rows_str2 += str(r2) + "\n"
-
-        generated_diff = _context_diff(
-            actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=3
-        )
-
-        error_msg = "Results do not match: "
-        percent_diff = (2 / 3) * 100
-        error_msg += "( %.5f %% )" % percent_diff
-        error_msg += "\n" + "\n".join(generated_diff)
-
-        with self.assertRaises(PySparkAssertionError) as pe:
-            assertDataFrameEqual(df1, list_of_rows)
-
-        self.check_error(
-            exception=pe.exception,
-            errorClass="DIFFERENT_ROWS",
-            messageParameters={"error_msg": error_msg},
-        )
-
-        with self.assertRaises(PySparkAssertionError) as pe:
-            assertDataFrameEqual(df1, list_of_rows, checkRowOrder=True)
-
-        self.check_error(
-            exception=pe.exception,
-            errorClass="DIFFERENT_ROWS",
-            messageParameters={"error_msg": error_msg},
-        )
-
     def test_list_row_unequal_schema(self):
         from pyspark.sql import Row
 
diff --git a/python/pyspark/sql/tests/udf_type_tests/test_udf_return_types.py 
b/python/pyspark/sql/tests/udf_type_tests/test_udf_return_types.py
index 0ad17e919754..5e307110559b 100644
--- a/python/pyspark/sql/tests/udf_type_tests/test_udf_return_types.py
+++ b/python/pyspark/sql/tests/udf_type_tests/test_udf_return_types.py
@@ -21,7 +21,6 @@ import os
 import platform
 import unittest
 from decimal import Decimal
-import numpy as np
 import pandas as pd
 
 from pyspark.sql import Row
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 96aac6083bf2..ee3d71ee5ffb 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -19,7 +19,6 @@
 Worker that receives input from Piped RDD.
 """
 import datetime
-import itertools
 import os
 import sys
 import dataclasses
@@ -54,7 +53,6 @@ from pyspark.sql.functions import 
SkipRestOfInputTableException
 from pyspark.sql.pandas.serializers import (
     ArrowStreamPandasUDFSerializer,
     ArrowStreamPandasUDTFSerializer,
-    GroupPandasUDFSerializer,
     GroupArrowUDFSerializer,
     CogroupArrowUDFSerializer,
     CogroupPandasUDFSerializer,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to