(spark) branch master updated: [SPARK-55296][PS][FOLLOW-UP] Disconnect the anchor for more cases to mimic the CoW mode behavior

ueshin Tue, 24 Feb 2026 12:17:33 -0800

This is an automated email from the ASF dual-hosted git repository.

ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new f54644719535 [SPARK-55296][PS][FOLLOW-UP] Disconnect the anchor for 
more cases to mimic the CoW mode behavior
f54644719535 is described below

commit f546447195356ab5e838cd4170fb8fe16f7973ba
Author: Takuya Ueshin <[email protected]>
AuthorDate: Tue Feb 24 12:15:16 2026 -0800

    [SPARK-55296][PS][FOLLOW-UP] Disconnect the anchor for more cases to mimic 
the CoW mode behavior
    
    ### What changes were proposed in this pull request?
    
    This is another follow-up of apache/spark#54375.
    
    Disconnects the anchor for more cases.
    
    ### Why are the changes needed?
    
    The anchor can be disconnect in most cases with pandas 3 to mimic the CoW 
mode behavior.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it will behave more like pandas 3.
    
    ### How was this patch tested?
    
    The existing tests should pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #54437 from ueshin/issues/SPARK-55296/cow_series.
    
    Authored-by: Takuya Ueshin <[email protected]>
    Signed-off-by: Takuya Ueshin <[email protected]>
---
 python/pyspark/pandas/frame.py    |  5 ++++-
 python/pyspark/pandas/indexing.py | 12 ++++--------
 python/pyspark/pandas/series.py   | 16 ++++++++++++----
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index aeb47709766c..a9ffcbc9d59d 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -663,7 +663,7 @@ class DataFrame(Frame, Generic[T]):
         self,
         internal: InternalFrame,
         check_same_anchor: bool = True,
-        anchor_force_disconnect: bool = False,
+        anchor_force_disconnect: Optional[bool] = None,
     ) -> None:
         """
         Update InternalFrame with the given one.
@@ -687,6 +687,9 @@ class DataFrame(Frame, Generic[T]):
         """
         from pyspark.pandas.series import Series
 
+        if anchor_force_disconnect is None:
+            anchor_force_disconnect = LooseVersion(pd.__version__) >= "3.0.0"
+
         if hasattr(self, "_psseries"):
             psseries = {}
 
diff --git a/python/pyspark/pandas/indexing.py 
b/python/pyspark/pandas/indexing.py
index f5f42b6fda89..7f26f4ce7595 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -636,7 +636,7 @@ class LocIndexerLike(IndexerLike, metaclass=ABCMeta):
                 if self._psdf_or_psser.name is None:
                     psser = psser.rename()
 
-                self._psdf_or_psser._psdf._update_internal_frame(
+                self._psdf_or_psser._update_internal_frame(
                     psser._psdf[
                         self._psdf_or_psser._psdf._internal.column_labels
                     ]._internal.resolved_copy,
@@ -673,7 +673,7 @@ class LocIndexerLike(IndexerLike, metaclass=ABCMeta):
             internal = self._internal.with_new_spark_column(
                 self._psdf_or_psser._column_label, scol  # TODO: dtype?
             )
-            self._psdf_or_psser._psdf._update_internal_frame(internal, 
check_same_anchor=False)
+            self._psdf_or_psser._update_internal_frame(internal, 
check_same_anchor=False)
         else:
             assert self._is_df
 
@@ -821,11 +821,7 @@ class LocIndexerLike(IndexerLike, metaclass=ABCMeta):
             internal = self._internal.with_new_columns(
                 new_data_spark_columns, column_labels=column_labels, 
data_fields=new_fields
             )
-            self._psdf_or_psser._update_internal_frame(
-                internal,
-                check_same_anchor=False,
-                anchor_force_disconnect=LooseVersion(pd.__version__) >= 
"3.0.0",
-            )
+            self._psdf_or_psser._update_internal_frame(internal, 
check_same_anchor=False)
 
 
 class LocIndexer(LocIndexerLike):
@@ -1872,7 +1868,7 @@ class iLocIndexer(LocIndexerLike):
                         )
         super().__setitem__(key, value)
         # Update again with resolved_copy to drop extra columns.
-        self._psdf._update_internal_frame(
+        self._psdf_or_psser._update_internal_frame(
             self._psdf._internal.resolved_copy, check_same_anchor=False
         )
 
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 3f8a2e57792d..1015ff4db4d9 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -495,6 +495,14 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
     def _column_label(self) -> Optional[Label]:
         return self._col_label
 
+    def _update_internal_frame(
+        self, internal: InternalFrame, check_same_anchor: bool = True
+    ) -> None:
+        if LooseVersion(pd.__version__) < "3.0.0":
+            self._psdf._update_internal_frame(internal, 
check_same_anchor=check_same_anchor)
+        else:
+            
self._update_anchor(DataFrame(internal.select_column(self._column_label)))
+
     def _update_anchor(self, psdf: DataFrame) -> None:
         assert psdf._internal.column_labels == [self._column_label], (
             psdf._internal.column_labels,
@@ -2220,7 +2228,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
 
         inplace = validate_bool_kwarg(inplace, "inplace")
         if inplace:
-            self._psdf._update_internal_frame(psser._psdf._internal, 
check_same_anchor=False)
+            self._update_internal_frame(psser._psdf._internal, 
check_same_anchor=False)
             return None
         else:
             return psser.copy()
@@ -2529,7 +2537,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
                     
data_spark_columns=[scol.alias(self._internal.data_spark_column_names[0])],
                     data_fields=[self._internal.data_fields[0]],
                 )
-                self._psdf._update_internal_frame(internal, 
check_same_anchor=False)
+                self._update_internal_frame(internal, check_same_anchor=False)
                 return None
             else:
                 return self._with_new_scol(
@@ -5332,7 +5340,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
             internal = self._psdf._internal.with_new_spark_column(
                 self._column_label, scol  # TODO: dtype?
             )
-            self._psdf._update_internal_frame(internal)
+            self._update_internal_frame(internal)
         else:
             combined = combine_frames(self._psdf, other._psdf, how="leftouter")
 
@@ -5349,7 +5357,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
                 self._column_label, scol  # TODO: dtype?
             )
 
-            self._psdf._update_internal_frame(internal.resolved_copy, 
check_same_anchor=False)
+            self._update_internal_frame(internal.resolved_copy, 
check_same_anchor=False)
 
     def where(self, cond: "Series", other: Any = np.nan) -> "Series":
         """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-55296][PS][FOLLOW-UP] Disconnect the anchor for more cases to mimic the CoW mode behavior

Reply via email to