This is an automated email from the ASF dual-hosted git repository.
ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ebb681758857 [SPARK-55672][PS][TESTS] Fix error message check due to
StringDtype(na_value=nan)
ebb681758857 is described below
commit ebb6817588575d9a4fb6794924c6be0b0af3eb05
Author: Takuya Ueshin <[email protected]>
AuthorDate: Wed Feb 25 11:50:27 2026 -0800
[SPARK-55672][PS][TESTS] Fix error message check due to
StringDtype(na_value=nan)
### What changes were proposed in this pull request?
Fixes error message check due to `StringDtype(na_value=nan)`.
### Why are the changes needed?
The dtype for the default string Series is `StringDtype(na_value=nan)` in
pandas 3, which causes to change the error messages.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Updated the related tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54465 from ueshin/issues/SPARK-55672/errmsg.
Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Takuya Ueshin <[email protected]>
---
.../pandas/tests/computation/test_compute.py | 8 +++++--
.../pyspark/pandas/tests/computation/test_stats.py | 12 ++++++----
.../pandas/tests/diff_frames_ops/test_cov.py | 4 +---
.../pyspark/pandas/tests/series/test_cumulative.py | 8 +++++--
python/pyspark/pandas/tests/series/test_stat.py | 26 +++++++++++++---------
5 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/python/pyspark/pandas/tests/computation/test_compute.py
b/python/pyspark/pandas/tests/computation/test_compute.py
index a45132a20a2e..a8bfa245c1e6 100644
--- a/python/pyspark/pandas/tests/computation/test_compute.py
+++ b/python/pyspark/pandas/tests/computation/test_compute.py
@@ -384,9 +384,13 @@ class FrameComputeMixin:
pdf.quantile([0.25, 0.5, 0.75], numeric_only=True),
)
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
psdf.quantile(0.5, numeric_only=False)
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
psdf.quantile([0.25, 0.5, 0.75], numeric_only=False)
def test_product(self):
diff --git a/python/pyspark/pandas/tests/computation/test_stats.py
b/python/pyspark/pandas/tests/computation/test_stats.py
index 05e9bdd3c679..6a3ce30060a5 100644
--- a/python/pyspark/pandas/tests/computation/test_stats.py
+++ b/python/pyspark/pandas/tests/computation/test_stats.py
@@ -149,11 +149,11 @@ class StatsTestsMixin:
self.assert_eq(psdf[["E"]].abs(), pdf[["E"]].abs())
with self.assertRaisesRegex(
- TypeError, "bad operand type for abs\\(\\): object \\(string\\)"
+ TypeError, r"bad operand type for abs\(\): (object|str) \(string\)"
):
psdf.abs()
with self.assertRaisesRegex(
- TypeError, "bad operand type for abs\\(\\): object \\(string\\)"
+ TypeError, r"bad operand type for abs\(\): (object|str) \(string\)"
):
psdf.D.abs()
@@ -299,10 +299,14 @@ class StatsTestsMixin:
psdf[["i", "b"]].sum(numeric_only=False), pdf[["i",
"b"]].sum(numeric_only=False)
)
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
psdf.sum(numeric_only=False)
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
psdf.s.sum()
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
b/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
index 0240b347081b..d4b8f6de1350 100644
--- a/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
@@ -50,9 +50,7 @@ class DiffFramesCovMixin:
psser1 = ps.from_pandas(pser1)
with self.assertRaisesRegex(TypeError, "unsupported type: <class
'list'>"):
psser1.cov([0.12528585, 0.26962463, 0.51111198])
- with self.assertRaisesRegex(
- TypeError, "unsupported type: <class 'pandas.core.series.Series'>"
- ):
+ with self.assertRaisesRegex(TypeError, f"unsupported type:
{pd.Series}"):
psser1.cov(pser2)
def _test_cov(self, pser1, pser2):
diff --git a/python/pyspark/pandas/tests/series/test_cumulative.py
b/python/pyspark/pandas/tests/series/test_cumulative.py
index a75fe6c0c132..f24d38a2d894 100644
--- a/python/pyspark/pandas/tests/series/test_cumulative.py
+++ b/python/pyspark/pandas/tests/series/test_cumulative.py
@@ -68,7 +68,9 @@ class SeriesCumulativeMixin:
self.assert_eq(pser.cumsum().astype(int), psser.cumsum())
self.assert_eq(pser.cumsum(skipna=False).astype(int),
psser.cumsum(skipna=False))
- with self.assertRaisesRegex(TypeError, r"Could not convert object
\(string\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
ps.Series(["a", "b", "c", "d"]).cumsum()
def test_cumprod(self):
@@ -109,7 +111,9 @@ class SeriesCumulativeMixin:
self.assert_eq(pser.cumprod(), psser.cumprod())
self.assert_eq(pser.cumprod(skipna=False).astype(int),
psser.cumprod(skipna=False))
- with self.assertRaisesRegex(TypeError, r"Could not convert object
\(string\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
ps.Series(["a", "b", "c", "d"]).cumprod()
diff --git a/python/pyspark/pandas/tests/series/test_stat.py
b/python/pyspark/pandas/tests/series/test_stat.py
index 31965e9cdd59..c94c01583b20 100644
--- a/python/pyspark/pandas/tests/series/test_stat.py
+++ b/python/pyspark/pandas/tests/series/test_stat.py
@@ -382,9 +382,13 @@ class SeriesStatMixin:
):
ps.Series([24.0, 21.0, 25.0, 33.0, 26.0]).quantile(q=1.1)
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
ps.Series(["a", "b", "c"]).quantile()
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
ps.Series(["a", "b", "c"]).quantile([0.25, 0.5, 0.75])
def test_pct_change(self):
@@ -544,7 +548,9 @@ class SeriesStatMixin:
ps.Series([]).prod(numeric_only=True)
with self.assertRaisesRegex(TypeError, "Could not convert object
\\(void\\) to numeric"):
ps.Series([]).prod(min_count=1)
- with self.assertRaisesRegex(TypeError, "Could not convert object
\\(string\\) to numeric"):
+ with self.assertRaisesRegex(
+ TypeError, r"Could not convert (object|str) \(string\) to numeric"
+ ):
ps.Series(["a", "b", "c"]).prod()
with self.assertRaisesRegex(
TypeError, "Could not convert datetime64\\[ns\\] \\(timestamp.*\\)
to numeric"
@@ -680,19 +686,19 @@ class SeriesStatMixin:
)
def test_series_stat_fail(self):
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).mean()
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).skew()
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).kurtosis()
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).std()
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).var()
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).median()
- with self.assertRaisesRegex(TypeError, "Could not convert object"):
+ with self.assertRaisesRegex(TypeError, "Could not convert
(object|str)"):
ps.Series(["a", "b", "c"]).sem()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]