This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new dcbe275543e [SPARK-45634][PS] Remove `DataFrame.get_dtype_counts` from
Pandas API on Spark
dcbe275543e is described below
commit dcbe275543e05cb4529317ddb933d09253d65d6f
Author: Haejoon Lee <[email protected]>
AuthorDate: Thu Oct 26 11:16:36 2023 +0900
[SPARK-45634][PS] Remove `DataFrame.get_dtype_counts` from Pandas API on
Spark
### What changes were proposed in this pull request?
This PR proposes to remove old API `get_dtype_counts` from Pandas API on
Spark
### Why are the changes needed?
This API was deprecated a long time ago, but has not been removed since
it's internally used in our code base. But it's no longer used in anywhere
currently.
### Does this PR introduce _any_ user-facing change?
`DataFrame.get_dtype_counts` is removed.
### How was this patch tested?
No new test is required for API removal. The existing CI should pass.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43488 from itholic/SPARK-45634.
Authored-by: Haejoon Lee <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../source/migration_guide/pyspark_upgrade.rst | 1 +
python/pyspark/pandas/generic.py | 51 ----------------------
2 files changed, 1 insertion(+), 51 deletions(-)
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst
b/python/docs/source/migration_guide/pyspark_upgrade.rst
index 933fa936f70..20fab578504 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -53,6 +53,7 @@ Upgrading from PySpark 3.5 to 4.0
* In Spark 4.0, ``col_space`` parameter from ``DataFrame.to_latex`` and
``Series.to_latex`` has been removed from pandas API on Spark.
* In Spark 4.0, ``DataFrame.to_spark_io`` has been removed from pandas API on
Spark, use ``DataFrame.spark.to_spark_io`` instead.
* In Spark 4.0, ``Series.is_monotonic`` and ``Index.is_monotonic`` have been
removed from pandas API on Spark, use ``Series.is_monotonic_increasing`` or
``Index.is_monotonic_increasing`` instead respectively.
+* In Spark 4.0, ``DataFrame.get_dtype_counts`` has been removed from pandas
API on Spark, use ``DataFrame.dtypes.value_counts()`` instead.
Upgrading from PySpark 3.3 to 3.4
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index c6f1b9ccbb7..16eaeb6142e 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -19,13 +19,11 @@
A base class of DataFrame/Column to behave like pandas DataFrame/Series.
"""
from abc import ABCMeta, abstractmethod
-from collections import Counter
from functools import reduce
from typing import (
Any,
Callable,
Dict,
- Iterable,
IO,
List,
Optional,
@@ -400,55 +398,6 @@ class Frame(object, metaclass=ABCMeta):
"""
return self._apply_series_op(lambda psser: psser._cumprod(skipna),
should_resolve=True)
- # TODO: Although this has removed pandas >= 1.0.0, but we're keeping this
as deprecated
- # since we're using this for `DataFrame.info` internally.
- # We can drop it once our minimal pandas version becomes 1.0.0.
- def get_dtype_counts(self) -> pd.Series:
- """
- Return counts of unique dtypes in this object.
-
- .. deprecated:: 0.14.0
-
- Returns
- -------
- dtype: pd.Series
- Series with the count of columns with each dtype.
-
- See Also
- --------
- dtypes: Return the dtypes in this object.
-
- Examples
- --------
- >>> a = [['a', 1, 1], ['b', 2, 2], ['c', 3, 3]]
- >>> df = ps.DataFrame(a, columns=['str', 'int1', 'int2'])
- >>> df
- str int1 int2
- 0 a 1 1
- 1 b 2 2
- 2 c 3 3
-
- >>> df.get_dtype_counts().sort_values()
- object 1
- int64 2
- dtype: int64
-
- >>> df.str.get_dtype_counts().sort_values()
- object 1
- dtype: int64
- """
- warnings.warn(
- "`get_dtype_counts` has been deprecated and will be "
- "removed in a future version. For DataFrames use "
- "`.dtypes.value_counts()",
- FutureWarning,
- )
- if not isinstance(self.dtypes, Iterable):
- dtypes = [self.dtypes]
- else:
- dtypes = list(self.dtypes)
- return pd.Series(dict(Counter([d.name for d in dtypes])))
-
def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
r"""
Apply func(self, \*args, \*\*kwargs).
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]