This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 70bb9319e50 [SPARK-45996][PYTHON][CONNECT] Show proper dependency 
requirement messages for Spark Connect
70bb9319e50 is described below

commit 70bb9319e504a8bde7984a12a6614d2c3e636ee6
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Nov 20 13:03:17 2023 +0900

    [SPARK-45996][PYTHON][CONNECT] Show proper dependency requirement messages 
for Spark Connect
    
    ### What changes were proposed in this pull request?
    
    This PR improve the error messages for the dependency requirement for 
Python Spark Connect.
    
    ### Why are the changes needed?
    
    In order to improve error messages. This is what you get for now:
    
    ```
    /.../pyspark/shell.py:57: UserWarning: Failed to initialize Spark session.
      warnings.warn("Failed to initialize Spark session.")
    Traceback (most recent call last):
      File "/.../pyspark/shell.py", line 52, in <module>
        spark = SparkSession.builder.getOrCreate()
      File "/.../pyspark/sql/session.py", line 476, in getOrCreate
        from pyspark.sql.connect.session import SparkSession as 
RemoteSparkSession
      File "/.../pyspark/sql/connect/session.py", line 53, in <module>
        from pyspark.sql.connect.client import SparkConnectClient, 
ChannelBuilder
      File "/.../pyspark/sql/connect/client/__init__.py", line 22, in <module>
        from pyspark.sql.connect.client.core import *  # noqa: F401,F403
      File "/.../pyspark/sql/connect/client/core.py", line 51, in <module>
        import google.protobuf.message
    ModuleNotFoundError: No module named 'google
    ```
    
    ```
    /.../pyspark/shell.py:57: UserWarning: Failed to initialize Spark session.
      warnings.warn("Failed to initialize Spark session.")
    Traceback (most recent call last):
      File "/.../pyspark/shell.py", line 52, in <module>
        spark = SparkSession.builder.getOrCreate()
      File "/.../pyspark/sql/session.py", line 476, in getOrCreate
        from pyspark.sql.connect.session import SparkSession as 
RemoteSparkSession
      File "/.../pyspark/sql/connect/session.py", line 53, in <module>
        from pyspark.sql.connect.client import SparkConnectClient, 
ChannelBuilder
      File "/.../pyspark/sql/connect/client/__init__.py", line 22, in <module>
        from pyspark.sql.connect.client.core import *  # noqa: F401,F403
      File "/.../pyspark/sql/connect/client/core.py", line 52, in <module>
        from grpc_status import rpc_status
    ModuleNotFoundError: No module named 'grpc_status'
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it changes the user-facing error messages.
    
    ### How was this patch tested?
    
    Manually tested as below:
    
    ```bash
    ➜  spark git:(master) ✗ conda create -y -n python3.10 python=3.10
    ...
    ➜  spark git:(master) ✗ conda activate python3.10
    (python3.10) ➜  spark git:(master) ✗ ./bin/pyspark --remote local
    ...
        raise ImportError(
    ImportError: Pandas >= 1.4.4 must be installed; however, it was not found.
    (python3.10) ➜  spark git:(master) ✗ pip install 'pandas >= 1.4.4'
    ...
    (python3.10) ➜  spark git:(SPARK-45996) ✗ ./bin/pyspark --remote local
    ...
        raise ImportError(
    ImportError: PyArrow >= 4.0.0 must be installed; however, it was not found.
    (python3.10) ➜  spark git:(SPARK-45996) pip install 'PyArrow >= 4.0.0'
    ...
    (python3.10) ➜  spark git:(SPARK-45996) ./bin/pyspark --remote local
    ...
        raise ImportError(
    ImportError: grpcio >= 1.48.1 must be installed; however, it was not found.
    (python3.10) ➜  spark git:(SPARK-45996) pip install 'grpcio >= 1.48.1'
    ...
    (python3.10) ➜  spark git:(SPARK-45996) ./bin/pyspark --remote local
    ...
        raise ImportError(
    ImportError: grpc-status >= 1.48.1 must be installed; however, it was not 
found.
    (python3.10) ➜  spark git:(SPARK-45996) ✗ pip install 'grpcio-status >= 
1.48.1'
    ...
    (python3.10) ➜  spark git:(SPARK-45996) ✗ ./bin/pyspark --remote local
    ...
    Welcome to
          ____              __
         / __/__  ___ _____/ /__
        _\ \/ _ \/ _ `/ __/  '_/
       /__ / .__/\_,_/_/ /_/\_\   version 4.0.0.dev0
          /_/
    
    Using Python version 3.10.13 (main, Sep 11 2023 08:39:02)
    Client connected to the Spark Connect server at localhost
    SparkSession available as 'spark'.
    >>> spark.range(10).show()
    +---+
    | id|
    +---+
    |  0|
    ...
    ```
    
    Note that `grpcio-status` includes the common `googleapis-common-protos` 
(see 
https://github.com/grpc/grpc/blob/master/src/python/grpcio_status/setup.py#L67-L69)
 so it wasn't explicitly installed.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43894 from HyukjinKwon/SPARK-45996.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 dev/requirements.txt                           |  2 +-
 python/docs/source/getting_started/install.rst |  2 +-
 python/pyspark/sql/connect/utils.py            | 33 +++++++++++++++++++++++---
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/dev/requirements.txt b/dev/requirements.txt
index 2658f8eec82..fc76407c448 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -54,7 +54,7 @@ py
 grpcio>=1.48,<1.57
 grpcio-status>=1.48,<1.57
 protobuf==4.25.1
-googleapis-common-protos==1.56.4
+googleapis-common-protos>=1.56.4
 
 # Spark Connect python proto generation plugin (optional)
 mypy-protobuf==3.3.0
diff --git a/python/docs/source/getting_started/install.rst 
b/python/docs/source/getting_started/install.rst
index 154429f276a..b01831bc846 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -161,7 +161,7 @@ Package                    Supported version Note
 `numpy`                    >=1.21                    Required for pandas API 
on Spark and MLLib DataFrame-based API; Optional for Spark SQL
 `grpcio`                   >=1.48,<1.57              Required for Spark Connect
 `grpcio-status`            >=1.48,<1.57              Required for Spark Connect
-`googleapis-common-protos` ==1.56.4                  Required for Spark Connect
+`googleapis-common-protos` >=1.56.4                  Required for Spark Connect
 ========================== ========================= 
======================================================================================
 
 Note that PySpark requires Java 17 or later with ``JAVA_HOME`` properly set.
diff --git a/python/pyspark/sql/connect/utils.py 
b/python/pyspark/sql/connect/utils.py
index e96529e44f8..fd85d75060b 100644
--- a/python/pyspark/sql/connect/utils.py
+++ b/python/pyspark/sql/connect/utils.py
@@ -34,6 +34,8 @@ def check_dependencies(mod_name: str) -> None:
         require_minimum_pandas_version()
         require_minimum_pyarrow_version()
         require_minimum_grpc_version()
+        require_minimum_grpcio_status_version()
+        require_minimum_googleapis_common_protos_version()
 
 
 def require_minimum_grpc_version() -> None:
@@ -44,14 +46,39 @@ def require_minimum_grpc_version() -> None:
         import grpc
     except ImportError as error:
         raise ImportError(
-            "grpcio >= %s must be installed; however, " "it was not found." % 
minimum_grpc_version
+            f"grpcio >= {minimum_grpc_version} must be installed; however, it 
was not found."
         ) from error
     if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
         raise ImportError(
-            "grpcio >= %s must be installed; however, "
-            "your version was %s." % (minimum_grpc_version, grpc.__version__)
+            f"grpcio >= {minimum_grpc_version} must be installed; however, "
+            f"your version was {grpc.__version__}."
         )
 
 
+def require_minimum_grpcio_status_version() -> None:
+    """Raise ImportError if grpcio-status is not installed"""
+    minimum_grpc_version = "1.48.1"
+
+    try:
+        import grpc_status  # noqa
+    except ImportError as error:
+        raise ImportError(
+            f"grpcio-status >= {minimum_grpc_version} must be installed; 
however, it was not found."
+        ) from error
+
+
+def require_minimum_googleapis_common_protos_version() -> None:
+    """Raise ImportError if googleapis-common-protos is not installed"""
+    minimum_common_protos_version = "1.56.4"
+
+    try:
+        import google.rpc  # noqa
+    except ImportError as error:
+        raise ImportError(
+            f"googleapis-common-protos >= {minimum_common_protos_version} must 
be installed; "
+            "however, it was not found."
+        ) from error
+
+
 def get_python_ver() -> str:
     return "%d.%d" % sys.version_info[:2]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to