This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 27c51009dc33 [SPARK-46166][PS] Implementation of pandas.DataFrame.any
with axis=None
27c51009dc33 is described below
commit 27c51009dc3335dff5729cae9f9ea6666ab4686d
Author: Devin Petersohn <[email protected]>
AuthorDate: Sun Dec 21 14:49:44 2025 +0900
[SPARK-46166][PS] Implementation of pandas.DataFrame.any with axis=None
### What changes were proposed in this pull request?
Support for `axis=None` in `pandas.DataFrame.any`.
### Why are the changes needed?
New API
### Does this PR introduce _any_ user-facing change?
New parameter support for an existing API.
### How was this patch tested?
CI / local
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #53478 from devin-petersohn/devin/any_axis_none.
Authored-by: Devin Petersohn <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/frame.py | 18 ++++++++++++------
.../pyspark/pandas/tests/computation/test_any_all.py | 10 ++++++++++
2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index e5aaecbb64fd..0ec7ee60bb5b 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -11141,10 +11141,12 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
return self._result_aggregated(column_labels, applied)
- # TODO(SPARK-46166): axis and **kwargs should be implemented.
def any(
- self, axis: Axis = 0, bool_only: Optional[bool] = None, skipna: bool =
True
- ) -> "Series":
+ self,
+ axis: Optional[Axis] = 0,
+ bool_only: Optional[bool] = None,
+ skipna: bool = True,
+ ) -> Union["Series", bool]:
"""
Return whether any element is True.
@@ -11153,11 +11155,14 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
Parameters
----------
- axis : {0 or 'index'}, default 0
+ axis : {0, 'index', 1, 'columns' or None}, default 0
Indicate which axis or axes should be reduced.
* 0 / 'index' : reduce the index, return a Series whose index is
the
original column labels.
+ * 1 / 'columns' : reduce the columns, return a Series whose index
is the
+ original row index.
+ * None : reduce all dimensions, return a single boolean value.
bool_only : bool, default None
Include only boolean columns. If None, will attempt to use
everything,
@@ -11207,7 +11212,8 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
>>> df[[]].any()
Series([], dtype: bool)
"""
- axis = validate_axis(axis)
+ if axis is not None:
+ axis = validate_axis(axis)
column_labels = self._internal.column_labels
if bool_only:
column_labels = self._bool_column_labels(column_labels)
@@ -11256,7 +11262,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
)
else:
# axis=None case - return single boolean value
- raise NotImplementedError('axis should be 0, 1, "index", or
"columns" currently.')
+ return bool(self.any(axis=1, bool_only=bool_only,
skipna=skipna).any()) # type: ignore
def _bool_column_labels(self, column_labels: List[Label]) -> List[Label]:
"""
diff --git a/python/pyspark/pandas/tests/computation/test_any_all.py
b/python/pyspark/pandas/tests/computation/test_any_all.py
index 37966f9e0bf1..9914488d3f68 100644
--- a/python/pyspark/pandas/tests/computation/test_any_all.py
+++ b/python/pyspark/pandas/tests/computation/test_any_all.py
@@ -158,6 +158,11 @@ class FrameAnyAllMixin:
psdf.any(axis="columns", bool_only=False), pdf.any(axis="columns",
bool_only=False)
)
+ # Test axis=None
+ self.assert_eq(psdf.any(axis=None), pdf.any(axis=None))
+ self.assert_eq(psdf.any(axis=None, bool_only=True), pdf.any(axis=None,
bool_only=True))
+ self.assert_eq(psdf.any(axis=None, bool_only=False),
pdf.any(axis=None, bool_only=False))
+
columns.names = ["X", "Y"]
pdf.columns = columns
psdf.columns = columns
@@ -171,6 +176,11 @@ class FrameAnyAllMixin:
self.assert_eq(psdf.any(axis=1, bool_only=True), pdf.any(axis=1,
bool_only=True))
self.assert_eq(psdf.any(axis=1, bool_only=False), pdf.any(axis=1,
bool_only=False))
+ # Test axis=None
+ self.assert_eq(psdf.any(axis=None), pdf.any(axis=None))
+ self.assert_eq(psdf.any(axis=None, bool_only=True), pdf.any(axis=None,
bool_only=True))
+ self.assert_eq(psdf.any(axis=None, bool_only=False),
pdf.any(axis=None, bool_only=False))
+
# Test skipna parameter
pdf = pd.DataFrame(
{"A": [True, False], "B": [1, np.nan], "C": [True, None], "D":
[None, np.nan]}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]