(spark) branch master updated: [SPARK-52985][PS] Raise TypeError for pandas numpy operand in comparison operators

xinrong Tue, 29 Jul 2025 13:08:05 -0700

This is an automated email from the ASF dual-hosted git repository.

xinrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 349c8637f367 [SPARK-52985][PS] Raise TypeError for pandas numpy 
operand in comparison operators
349c8637f367 is described below

commit 349c8637f3671e96e1c334d683f510b8f12069b0
Author: Xinrong Meng <xinr...@apache.org>
AuthorDate: Tue Jul 29 13:07:50 2025 -0700

    [SPARK-52985][PS] Raise TypeError for pandas numpy operand in comparison 
operators
    
    ### What changes were proposed in this pull request?
    Raise TypeError for pandas numpy operand in comparison operators, which 
otherwise silently triggers Spark errors or unexpected behavior.
    
    We can support thos by converting them to the corresponding pandas on Spark 
objects with proper testing as a follow-up.
    
    ### Why are the changes needed?
    Raising a clear TypeError early helps users quickly understand that such 
types are unsupported, rather than encountering obscure Spark errors later.
    
    ### Does this PR introduce _any_ user-facing change?
    Better error messages.
    
    Previous behavior please see 
https://github.com/apache/spark/pull/51370#discussion_r2229272972.
    
    ### How was this patch tested?
    Unit tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #51670 from xinrong-meng/err_comp.
    
    Authored-by: Xinrong Meng <xinr...@apache.org>
    Signed-off-by: Xinrong Meng <xinr...@apache.org>
---
 python/pyspark/pandas/base.py                              | 14 ++++++++++++++
 python/pyspark/pandas/tests/data_type_ops/test_date_ops.py |  6 ++++++
 2 files changed, 20 insertions(+)

diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index 01e23214d662..dc2e83031469 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -269,6 +269,14 @@ def numpy_column_op(f: Callable[..., Column]) -> 
Callable[..., SeriesOrIndex]:
     return wrapper
 
 
+def _exclude_pd_np_operand(other: Any) -> None:
+    if isinstance(other, (pd.Series, pd.Index, pd.DataFrame, np.ndarray)):
+        raise TypeError(
+            f"Operand of type 
{type(other).__module__}.{type(other).__qualname__} "
+            f"is not supported for this operation. "
+        )
+
+
 class IndexOpsMixin(object, metaclass=ABCMeta):
     """common ops mixin to support a unified interface / docs for Series / 
Index
 
@@ -397,24 +405,30 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
     # comparison operators
     def __eq__(self, other: Any) -> SeriesOrIndex:  # type: ignore[override]
         # pandas always returns False for all items with dict and set.
+        _exclude_pd_np_operand(other)
         if isinstance(other, (dict, set)):
             return self != self
         else:
             return self._dtype_op.eq(self, other)
 
     def __ne__(self, other: Any) -> SeriesOrIndex:  # type: ignore[override]
+        _exclude_pd_np_operand(other)
         return self._dtype_op.ne(self, other)
 
     def __lt__(self, other: Any) -> SeriesOrIndex:
+        _exclude_pd_np_operand(other)
         return self._dtype_op.lt(self, other)
 
     def __le__(self, other: Any) -> SeriesOrIndex:
+        _exclude_pd_np_operand(other)
         return self._dtype_op.le(self, other)
 
     def __ge__(self, other: Any) -> SeriesOrIndex:
+        _exclude_pd_np_operand(other)
         return self._dtype_op.ge(self, other)
 
     def __gt__(self, other: Any) -> SeriesOrIndex:
+        _exclude_pd_np_operand(other)
         return self._dtype_op.gt(self, other)
 
     def __invert__(self: IndexOpsLike) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py 
b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
index c2b29ee8a1d3..a584497832b2 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@@ -208,31 +208,37 @@ class DateOpsTestsMixin:
         pdf, psdf = self.date_pdf, self.date_psdf
         self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == 
psdf["that"])
         self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == 
psdf["this"])
+        self.assertRaises(TypeError, lambda: psdf["this"] == pdf["this"])
 
     def test_ne(self):
         pdf, psdf = self.date_pdf, self.date_psdf
         self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != 
psdf["that"])
         self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != 
psdf["this"])
+        self.assertRaises(TypeError, lambda: psdf["this"] != pdf["this"])
 
     def test_lt(self):
         pdf, psdf = self.date_pdf, self.date_psdf
         self.assert_eq(pdf["this"] < pdf["that"], psdf["this"] < psdf["that"])
         self.assert_eq(pdf["this"] < pdf["this"], psdf["this"] < psdf["this"])
+        self.assertRaises(TypeError, lambda: psdf["this"] < pdf["this"])
 
     def test_le(self):
         pdf, psdf = self.date_pdf, self.date_psdf
         self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= 
psdf["that"])
         self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= 
psdf["this"])
+        self.assertRaises(TypeError, lambda: psdf["this"] <= pdf["this"])
 
     def test_gt(self):
         pdf, psdf = self.date_pdf, self.date_psdf
         self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
         self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
+        self.assertRaises(TypeError, lambda: psdf["this"] > pdf["this"])
 
     def test_ge(self):
         pdf, psdf = self.date_pdf, self.date_psdf
         self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= 
psdf["that"])
         self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= 
psdf["this"])
+        self.assertRaises(TypeError, lambda: psdf["this"] >= pdf["this"])
 
 
 class DateOpsTests(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-52985][PS] Raise TypeError for pandas numpy operand in comparison operators

Reply via email to