This is an automated email from the ASF dual-hosted git repository. xinrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 349c8637f367 [SPARK-52985][PS] Raise TypeError for pandas numpy operand in comparison operators 349c8637f367 is described below commit 349c8637f3671e96e1c334d683f510b8f12069b0 Author: Xinrong Meng <xinr...@apache.org> AuthorDate: Tue Jul 29 13:07:50 2025 -0700 [SPARK-52985][PS] Raise TypeError for pandas numpy operand in comparison operators ### What changes were proposed in this pull request? Raise TypeError for pandas numpy operand in comparison operators, which otherwise silently triggers Spark errors or unexpected behavior. We can support thos by converting them to the corresponding pandas on Spark objects with proper testing as a follow-up. ### Why are the changes needed? Raising a clear TypeError early helps users quickly understand that such types are unsupported, rather than encountering obscure Spark errors later. ### Does this PR introduce _any_ user-facing change? Better error messages. Previous behavior please see https://github.com/apache/spark/pull/51370#discussion_r2229272972. ### How was this patch tested? Unit tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #51670 from xinrong-meng/err_comp. Authored-by: Xinrong Meng <xinr...@apache.org> Signed-off-by: Xinrong Meng <xinr...@apache.org> --- python/pyspark/pandas/base.py | 14 ++++++++++++++ python/pyspark/pandas/tests/data_type_ops/test_date_ops.py | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py index 01e23214d662..dc2e83031469 100644 --- a/python/pyspark/pandas/base.py +++ b/python/pyspark/pandas/base.py @@ -269,6 +269,14 @@ def numpy_column_op(f: Callable[..., Column]) -> Callable[..., SeriesOrIndex]: return wrapper +def _exclude_pd_np_operand(other: Any) -> None: + if isinstance(other, (pd.Series, pd.Index, pd.DataFrame, np.ndarray)): + raise TypeError( + f"Operand of type {type(other).__module__}.{type(other).__qualname__} " + f"is not supported for this operation. " + ) + + class IndexOpsMixin(object, metaclass=ABCMeta): """common ops mixin to support a unified interface / docs for Series / Index @@ -397,24 +405,30 @@ class IndexOpsMixin(object, metaclass=ABCMeta): # comparison operators def __eq__(self, other: Any) -> SeriesOrIndex: # type: ignore[override] # pandas always returns False for all items with dict and set. + _exclude_pd_np_operand(other) if isinstance(other, (dict, set)): return self != self else: return self._dtype_op.eq(self, other) def __ne__(self, other: Any) -> SeriesOrIndex: # type: ignore[override] + _exclude_pd_np_operand(other) return self._dtype_op.ne(self, other) def __lt__(self, other: Any) -> SeriesOrIndex: + _exclude_pd_np_operand(other) return self._dtype_op.lt(self, other) def __le__(self, other: Any) -> SeriesOrIndex: + _exclude_pd_np_operand(other) return self._dtype_op.le(self, other) def __ge__(self, other: Any) -> SeriesOrIndex: + _exclude_pd_np_operand(other) return self._dtype_op.ge(self, other) def __gt__(self, other: Any) -> SeriesOrIndex: + _exclude_pd_np_operand(other) return self._dtype_op.gt(self, other) def __invert__(self: IndexOpsLike) -> IndexOpsLike: diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py index c2b29ee8a1d3..a584497832b2 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py @@ -208,31 +208,37 @@ class DateOpsTestsMixin: pdf, psdf = self.date_pdf, self.date_psdf self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"]) self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"]) + self.assertRaises(TypeError, lambda: psdf["this"] == pdf["this"]) def test_ne(self): pdf, psdf = self.date_pdf, self.date_psdf self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"]) self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"]) + self.assertRaises(TypeError, lambda: psdf["this"] != pdf["this"]) def test_lt(self): pdf, psdf = self.date_pdf, self.date_psdf self.assert_eq(pdf["this"] < pdf["that"], psdf["this"] < psdf["that"]) self.assert_eq(pdf["this"] < pdf["this"], psdf["this"] < psdf["this"]) + self.assertRaises(TypeError, lambda: psdf["this"] < pdf["this"]) def test_le(self): pdf, psdf = self.date_pdf, self.date_psdf self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"]) self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"]) + self.assertRaises(TypeError, lambda: psdf["this"] <= pdf["this"]) def test_gt(self): pdf, psdf = self.date_pdf, self.date_psdf self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"]) self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"]) + self.assertRaises(TypeError, lambda: psdf["this"] > pdf["this"]) def test_ge(self): pdf, psdf = self.date_pdf, self.date_psdf self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"]) self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) + self.assertRaises(TypeError, lambda: psdf["this"] >= pdf["this"]) class DateOpsTests( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org