This is an automated email from the ASF dual-hosted git repository.

ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new ebb681758857 [SPARK-55672][PS][TESTS] Fix error message check due to 
StringDtype(na_value=nan)
ebb681758857 is described below

commit ebb6817588575d9a4fb6794924c6be0b0af3eb05
Author: Takuya Ueshin <[email protected]>
AuthorDate: Wed Feb 25 11:50:27 2026 -0800

    [SPARK-55672][PS][TESTS] Fix error message check due to 
StringDtype(na_value=nan)
    
    ### What changes were proposed in this pull request?
    
    Fixes error message check due to `StringDtype(na_value=nan)`.
    
    ### Why are the changes needed?
    
    The dtype for the default string Series is `StringDtype(na_value=nan)` in 
pandas 3, which causes to change the error messages.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Updated the related tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #54465 from ueshin/issues/SPARK-55672/errmsg.
    
    Authored-by: Takuya Ueshin <[email protected]>
    Signed-off-by: Takuya Ueshin <[email protected]>
---
 .../pandas/tests/computation/test_compute.py       |  8 +++++--
 .../pyspark/pandas/tests/computation/test_stats.py | 12 ++++++----
 .../pandas/tests/diff_frames_ops/test_cov.py       |  4 +---
 .../pyspark/pandas/tests/series/test_cumulative.py |  8 +++++--
 python/pyspark/pandas/tests/series/test_stat.py    | 26 +++++++++++++---------
 5 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/python/pyspark/pandas/tests/computation/test_compute.py 
b/python/pyspark/pandas/tests/computation/test_compute.py
index a45132a20a2e..a8bfa245c1e6 100644
--- a/python/pyspark/pandas/tests/computation/test_compute.py
+++ b/python/pyspark/pandas/tests/computation/test_compute.py
@@ -384,9 +384,13 @@ class FrameComputeMixin:
             pdf.quantile([0.25, 0.5, 0.75], numeric_only=True),
         )
 
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             psdf.quantile(0.5, numeric_only=False)
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             psdf.quantile([0.25, 0.5, 0.75], numeric_only=False)
 
     def test_product(self):
diff --git a/python/pyspark/pandas/tests/computation/test_stats.py 
b/python/pyspark/pandas/tests/computation/test_stats.py
index 05e9bdd3c679..6a3ce30060a5 100644
--- a/python/pyspark/pandas/tests/computation/test_stats.py
+++ b/python/pyspark/pandas/tests/computation/test_stats.py
@@ -149,11 +149,11 @@ class StatsTestsMixin:
         self.assert_eq(psdf[["E"]].abs(), pdf[["E"]].abs())
 
         with self.assertRaisesRegex(
-            TypeError, "bad operand type for abs\\(\\): object \\(string\\)"
+            TypeError, r"bad operand type for abs\(\): (object|str) \(string\)"
         ):
             psdf.abs()
         with self.assertRaisesRegex(
-            TypeError, "bad operand type for abs\\(\\): object \\(string\\)"
+            TypeError, r"bad operand type for abs\(\): (object|str) \(string\)"
         ):
             psdf.D.abs()
 
@@ -299,10 +299,14 @@ class StatsTestsMixin:
             psdf[["i", "b"]].sum(numeric_only=False), pdf[["i", 
"b"]].sum(numeric_only=False)
         )
 
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             psdf.sum(numeric_only=False)
 
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             psdf.s.sum()
 
 
diff --git a/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py 
b/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
index 0240b347081b..d4b8f6de1350 100644
--- a/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
+++ b/python/pyspark/pandas/tests/diff_frames_ops/test_cov.py
@@ -50,9 +50,7 @@ class DiffFramesCovMixin:
         psser1 = ps.from_pandas(pser1)
         with self.assertRaisesRegex(TypeError, "unsupported type: <class 
'list'>"):
             psser1.cov([0.12528585, 0.26962463, 0.51111198])
-        with self.assertRaisesRegex(
-            TypeError, "unsupported type: <class 'pandas.core.series.Series'>"
-        ):
+        with self.assertRaisesRegex(TypeError, f"unsupported type: 
{pd.Series}"):
             psser1.cov(pser2)
 
     def _test_cov(self, pser1, pser2):
diff --git a/python/pyspark/pandas/tests/series/test_cumulative.py 
b/python/pyspark/pandas/tests/series/test_cumulative.py
index a75fe6c0c132..f24d38a2d894 100644
--- a/python/pyspark/pandas/tests/series/test_cumulative.py
+++ b/python/pyspark/pandas/tests/series/test_cumulative.py
@@ -68,7 +68,9 @@ class SeriesCumulativeMixin:
         self.assert_eq(pser.cumsum().astype(int), psser.cumsum())
         self.assert_eq(pser.cumsum(skipna=False).astype(int), 
psser.cumsum(skipna=False))
 
-        with self.assertRaisesRegex(TypeError, r"Could not convert object 
\(string\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             ps.Series(["a", "b", "c", "d"]).cumsum()
 
     def test_cumprod(self):
@@ -109,7 +111,9 @@ class SeriesCumulativeMixin:
         self.assert_eq(pser.cumprod(), psser.cumprod())
         self.assert_eq(pser.cumprod(skipna=False).astype(int), 
psser.cumprod(skipna=False))
 
-        with self.assertRaisesRegex(TypeError, r"Could not convert object 
\(string\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             ps.Series(["a", "b", "c", "d"]).cumprod()
 
 
diff --git a/python/pyspark/pandas/tests/series/test_stat.py 
b/python/pyspark/pandas/tests/series/test_stat.py
index 31965e9cdd59..c94c01583b20 100644
--- a/python/pyspark/pandas/tests/series/test_stat.py
+++ b/python/pyspark/pandas/tests/series/test_stat.py
@@ -382,9 +382,13 @@ class SeriesStatMixin:
         ):
             ps.Series([24.0, 21.0, 25.0, 33.0, 26.0]).quantile(q=1.1)
 
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             ps.Series(["a", "b", "c"]).quantile()
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             ps.Series(["a", "b", "c"]).quantile([0.25, 0.5, 0.75])
 
     def test_pct_change(self):
@@ -544,7 +548,9 @@ class SeriesStatMixin:
             ps.Series([]).prod(numeric_only=True)
         with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(void\\) to numeric"):
             ps.Series([]).prod(min_count=1)
-        with self.assertRaisesRegex(TypeError, "Could not convert object 
\\(string\\) to numeric"):
+        with self.assertRaisesRegex(
+            TypeError, r"Could not convert (object|str) \(string\) to numeric"
+        ):
             ps.Series(["a", "b", "c"]).prod()
         with self.assertRaisesRegex(
             TypeError, "Could not convert datetime64\\[ns\\] \\(timestamp.*\\) 
to numeric"
@@ -680,19 +686,19 @@ class SeriesStatMixin:
         )
 
     def test_series_stat_fail(self):
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).mean()
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).skew()
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).kurtosis()
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).std()
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).var()
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).median()
-        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+        with self.assertRaisesRegex(TypeError, "Could not convert 
(object|str)"):
             ps.Series(["a", "b", "c"]).sem()
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to