(spark) branch master updated: [SPARK-50388][PYTHON][TESTS] Further centralize import checks

ruifengz Thu, 21 Nov 2024 18:22:57 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 190c50451333 [SPARK-50388][PYTHON][TESTS] Further centralize import 
checks
190c50451333 is described below

commit 190c50451333c5f6be349defcac1ad1983632935
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Fri Nov 22 10:22:30 2024 +0800

    [SPARK-50388][PYTHON][TESTS] Further centralize import checks
    
    ### What changes were proposed in this pull request?
    Further centralized import checks:
    1, move `have_xxx` from `sqlutils.py/pandasutils.py/xxx` to `utils.py`;
    2, but still keep `have_pandas` and `have_pyarrow` in `sqlutils.py`, by 
importing them from `utils.py`, because there are too many usage places
    
    ### Why are the changes needed?
    simplify the import checks, e.g. `have_plotly` has been defined in multiple 
places
    
    ### Does this PR introduce _any_ user-facing change?
    no, test only
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #48926 from zhengruifeng/py_dep_2.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/pandas/tests/io/test_io.py          |  7 ++--
 .../tests/plot/test_frame_plot_matplotlib.py       |  8 ++---
 .../pandas/tests/plot/test_frame_plot_plotly.py    |  8 ++---
 .../pyspark/pandas/tests/plot/test_series_plot.py  |  2 +-
 .../tests/plot/test_series_plot_matplotlib.py      |  8 ++---
 .../pandas/tests/plot/test_series_plot_plotly.py   |  8 ++---
 .../pyspark/pandas/tests/series/test_conversion.py |  2 +-
 python/pyspark/sql/metrics.py                      |  6 ++--
 python/pyspark/sql/tests/connect/test_df_debug.py  |  7 ++--
 python/pyspark/sql/tests/plot/test_frame_plot.py   |  4 +--
 .../sql/tests/plot/test_frame_plot_plotly.py       |  4 +--
 python/pyspark/testing/pandasutils.py              | 23 ------------
 python/pyspark/testing/sqlutils.py                 | 41 ++++++----------------
 python/pyspark/testing/utils.py                    | 33 +++++++++++++++++
 14 files changed, 64 insertions(+), 97 deletions(-)

diff --git a/python/pyspark/pandas/tests/io/test_io.py 
b/python/pyspark/pandas/tests/io/test_io.py
index d4e61319f229..6fbdc366dd76 100644
--- a/python/pyspark/pandas/tests/io/test_io.py
+++ b/python/pyspark/pandas/tests/io/test_io.py
@@ -22,12 +22,9 @@ import numpy as np
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.testing.pandasutils import (
-    have_tabulate,
-    PandasOnSparkTestCase,
-    tabulate_requirement_message,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.testing.utils import have_tabulate, tabulate_requirement_message
 
 
 # This file contains test cases for 'Serialization / IO / Conversion'
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py 
b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
index 365d34b1f550..1d63cafe19b4 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
@@ -24,12 +24,8 @@ import numpy as np
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import (
-    have_matplotlib,
-    matplotlib_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_matplotlib, 
matplotlib_requirement_message
 
 if have_matplotlib:
     import matplotlib
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py 
b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
index 8d197649aaeb..530893257333 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
@@ -23,12 +23,8 @@ import numpy as np
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import (
-    have_plotly,
-    plotly_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_plotly, plotly_requirement_message
 from pyspark.pandas.utils import name_like_string
 
 if have_plotly:
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot.py 
b/python/pyspark/pandas/tests/plot/test_series_plot.py
index 6e0bdd232fc4..61d114f37b0e 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot.py
@@ -22,7 +22,7 @@ import numpy as np
 
 from pyspark import pandas as ps
 from pyspark.pandas.plot import PandasOnSparkPlotAccessor, BoxPlotBase
-from pyspark.testing.pandasutils import have_plotly, plotly_requirement_message
+from pyspark.testing.utils import have_plotly, plotly_requirement_message
 
 
 class SeriesPlotTestsMixin:
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py 
b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
index c98c1aeea04e..0fdcbc9d748e 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
@@ -24,12 +24,8 @@ import pandas as pd
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import (
-    have_matplotlib,
-    matplotlib_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_matplotlib, 
matplotlib_requirement_message
 
 if have_matplotlib:
     import matplotlib
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py 
b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
index 1aa175f9308a..8123af26dbf4 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
@@ -24,12 +24,8 @@ import numpy as np
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
 from pyspark.pandas.utils import name_like_string
-from pyspark.testing.pandasutils import (
-    have_plotly,
-    plotly_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_plotly, plotly_requirement_message
 
 if have_plotly:
     from plotly import express
diff --git a/python/pyspark/pandas/tests/series/test_conversion.py 
b/python/pyspark/pandas/tests/series/test_conversion.py
index 71ae858631d4..7711d05abd76 100644
--- a/python/pyspark/pandas/tests/series/test_conversion.py
+++ b/python/pyspark/pandas/tests/series/test_conversion.py
@@ -21,7 +21,7 @@ import pandas as pd
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
-from pyspark.testing.pandasutils import have_tabulate, 
tabulate_requirement_message
+from pyspark.testing.utils import have_tabulate, tabulate_requirement_message
 
 
 class SeriesConversionMixin:
diff --git a/python/pyspark/sql/metrics.py b/python/pyspark/sql/metrics.py
index 0f4142e91b25..4ab9b041e313 100644
--- a/python/pyspark/sql/metrics.py
+++ b/python/pyspark/sql/metrics.py
@@ -21,10 +21,10 @@ from typing import Optional, List, Tuple, Dict, Any, Union, 
TYPE_CHECKING, Seque
 from pyspark.errors import PySparkValueError
 
 if TYPE_CHECKING:
-    from pyspark.testing.connectutils import have_graphviz
-
-    if have_graphviz:
+    try:
         import graphviz  # type: ignore
+    except ImportError:
+        pass
 
 
 class ObservedMetrics(abc.ABC):
diff --git a/python/pyspark/sql/tests/connect/test_df_debug.py 
b/python/pyspark/sql/tests/connect/test_df_debug.py
index 8a4ec68fda84..40b6a072e912 100644
--- a/python/pyspark/sql/tests/connect/test_df_debug.py
+++ b/python/pyspark/sql/tests/connect/test_df_debug.py
@@ -17,12 +17,9 @@
 
 import unittest
 
-from pyspark.testing.connectutils import (
-    should_test_connect,
-    have_graphviz,
-    graphviz_requirement_message,
-)
 from pyspark.sql.tests.connect.test_connect_basic import 
SparkConnectSQLTestCase
+from pyspark.testing.connectutils import should_test_connect
+from pyspark.testing.utils import have_graphviz, graphviz_requirement_message
 
 if should_test_connect:
     from pyspark.sql.connect.dataframe import DataFrame
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot.py 
b/python/pyspark/sql/tests/plot/test_frame_plot.py
index 3221a408d153..c37aef5f7c94 100644
--- a/python/pyspark/sql/tests/plot/test_frame_plot.py
+++ b/python/pyspark/sql/tests/plot/test_frame_plot.py
@@ -18,8 +18,8 @@
 import unittest
 from pyspark.errors import PySparkValueError
 from pyspark.sql import Row
-from pyspark.testing.sqlutils import (
-    ReusedSQLTestCase,
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import (
     have_plotly,
     plotly_requirement_message,
     have_pandas,
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py 
b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
index 84a9c2aa0170..fd264c348882 100644
--- a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
@@ -19,8 +19,8 @@ import unittest
 from datetime import datetime
 
 from pyspark.errors import PySparkTypeError, PySparkValueError
-from pyspark.testing.sqlutils import (
-    ReusedSQLTestCase,
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import (
     have_plotly,
     plotly_requirement_message,
     have_pandas,
diff --git a/python/pyspark/testing/pandasutils.py 
b/python/pyspark/testing/pandasutils.py
index 10e8ce6f69af..09d3ffb09708 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -23,29 +23,6 @@ from contextlib import contextmanager
 import decimal
 from typing import Any, Union
 
-tabulate_requirement_message = None
-try:
-    from tabulate import tabulate
-except ImportError as e:
-    # If tabulate requirement is not satisfied, skip related tests.
-    tabulate_requirement_message = str(e)
-have_tabulate = tabulate_requirement_message is None
-
-matplotlib_requirement_message = None
-try:
-    import matplotlib
-except ImportError as e:
-    # If matplotlib requirement is not satisfied, skip related tests.
-    matplotlib_requirement_message = str(e)
-have_matplotlib = matplotlib_requirement_message is None
-
-plotly_requirement_message = None
-try:
-    import plotly
-except ImportError as e:
-    # If plotly requirement is not satisfied, skip related tests.
-    plotly_requirement_message = str(e)
-have_plotly = plotly_requirement_message is None
 
 try:
     from pyspark.sql.pandas.utils import require_minimum_pandas_version
diff --git a/python/pyspark/testing/sqlutils.py 
b/python/pyspark/testing/sqlutils.py
index c833abfb805d..e5464257422a 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -22,23 +22,17 @@ import shutil
 import tempfile
 from contextlib import contextmanager
 
-pandas_requirement_message = None
-try:
-    from pyspark.sql.pandas.utils import require_minimum_pandas_version
-
-    require_minimum_pandas_version()
-except ImportError as e:
-    # If Pandas version requirement is not satisfied, skip related tests.
-    pandas_requirement_message = str(e)
-
-pyarrow_requirement_message = None
-try:
-    from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+from pyspark.sql import SparkSession
+from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
+from pyspark.testing.utils import (
+    ReusedPySparkTestCase,
+    PySparkErrorTestUtils,
+    have_pandas,
+    pandas_requirement_message,
+    have_pyarrow,
+    pyarrow_requirement_message,
+)
 
-    require_minimum_pyarrow_version()
-except ImportError as e:
-    # If Arrow version requirement is not satisfied, skip related tests.
-    pyarrow_requirement_message = str(e)
 
 test_not_compiled_message = None
 try:
@@ -48,21 +42,6 @@ try:
 except Exception as e:
     test_not_compiled_message = str(e)
 
-plotly_requirement_message = None
-try:
-    import plotly
-except ImportError as e:
-    plotly_requirement_message = str(e)
-have_plotly = plotly_requirement_message is None
-
-
-from pyspark.sql import SparkSession
-from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
-from pyspark.testing.utils import ReusedPySparkTestCase, PySparkErrorTestUtils
-
-
-have_pandas = pandas_requirement_message is None
-have_pyarrow = pyarrow_requirement_message is None
 test_compiled = test_not_compiled_message is None
 
 
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index ca16628fc56f..1dd15666382f 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -82,6 +82,39 @@ deepspeed_requirement_message = None if have_deepspeed else 
"No module named 'de
 have_plotly = have_package("plotly")
 plotly_requirement_message = None if have_plotly else "No module named 
'plotly'"
 
+have_matplotlib = have_package("matplotlib")
+matplotlib_requirement_message = None if have_matplotlib else "No module named 
'matplotlib'"
+
+have_tabulate = have_package("tabulate")
+tabulate_requirement_message = None if have_tabulate else "No module named 
'tabulate'"
+
+have_graphviz = have_package("graphviz")
+graphviz_requirement_message = None if have_graphviz else "No module named 
'graphviz'"
+
+
+pandas_requirement_message = None
+try:
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
+
+    require_minimum_pandas_version()
+except Exception as e:
+    # If Pandas version requirement is not satisfied, skip related tests.
+    pandas_requirement_message = str(e)
+
+have_pandas = pandas_requirement_message is None
+
+
+pyarrow_requirement_message = None
+try:
+    from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+
+    require_minimum_pyarrow_version()
+except Exception as e:
+    # If Arrow version requirement is not satisfied, skip related tests.
+    pyarrow_requirement_message = str(e)
+
+have_pyarrow = pyarrow_requirement_message is None
+
 
 def read_int(b):
     return struct.unpack("!i", b)[0]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-50388][PYTHON][TESTS] Further centralize import checks

Reply via email to