This is an automated email from the ASF dual-hosted git repository.
ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f54644719535 [SPARK-55296][PS][FOLLOW-UP] Disconnect the anchor for
more cases to mimic the CoW mode behavior
f54644719535 is described below
commit f546447195356ab5e838cd4170fb8fe16f7973ba
Author: Takuya Ueshin <[email protected]>
AuthorDate: Tue Feb 24 12:15:16 2026 -0800
[SPARK-55296][PS][FOLLOW-UP] Disconnect the anchor for more cases to mimic
the CoW mode behavior
### What changes were proposed in this pull request?
This is another follow-up of apache/spark#54375.
Disconnects the anchor for more cases.
### Why are the changes needed?
The anchor can be disconnect in most cases with pandas 3 to mimic the CoW
mode behavior.
### Does this PR introduce _any_ user-facing change?
Yes, it will behave more like pandas 3.
### How was this patch tested?
The existing tests should pass.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54437 from ueshin/issues/SPARK-55296/cow_series.
Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Takuya Ueshin <[email protected]>
---
python/pyspark/pandas/frame.py | 5 ++++-
python/pyspark/pandas/indexing.py | 12 ++++--------
python/pyspark/pandas/series.py | 16 ++++++++++++----
3 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index aeb47709766c..a9ffcbc9d59d 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -663,7 +663,7 @@ class DataFrame(Frame, Generic[T]):
self,
internal: InternalFrame,
check_same_anchor: bool = True,
- anchor_force_disconnect: bool = False,
+ anchor_force_disconnect: Optional[bool] = None,
) -> None:
"""
Update InternalFrame with the given one.
@@ -687,6 +687,9 @@ class DataFrame(Frame, Generic[T]):
"""
from pyspark.pandas.series import Series
+ if anchor_force_disconnect is None:
+ anchor_force_disconnect = LooseVersion(pd.__version__) >= "3.0.0"
+
if hasattr(self, "_psseries"):
psseries = {}
diff --git a/python/pyspark/pandas/indexing.py
b/python/pyspark/pandas/indexing.py
index f5f42b6fda89..7f26f4ce7595 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -636,7 +636,7 @@ class LocIndexerLike(IndexerLike, metaclass=ABCMeta):
if self._psdf_or_psser.name is None:
psser = psser.rename()
- self._psdf_or_psser._psdf._update_internal_frame(
+ self._psdf_or_psser._update_internal_frame(
psser._psdf[
self._psdf_or_psser._psdf._internal.column_labels
]._internal.resolved_copy,
@@ -673,7 +673,7 @@ class LocIndexerLike(IndexerLike, metaclass=ABCMeta):
internal = self._internal.with_new_spark_column(
self._psdf_or_psser._column_label, scol # TODO: dtype?
)
- self._psdf_or_psser._psdf._update_internal_frame(internal,
check_same_anchor=False)
+ self._psdf_or_psser._update_internal_frame(internal,
check_same_anchor=False)
else:
assert self._is_df
@@ -821,11 +821,7 @@ class LocIndexerLike(IndexerLike, metaclass=ABCMeta):
internal = self._internal.with_new_columns(
new_data_spark_columns, column_labels=column_labels,
data_fields=new_fields
)
- self._psdf_or_psser._update_internal_frame(
- internal,
- check_same_anchor=False,
- anchor_force_disconnect=LooseVersion(pd.__version__) >=
"3.0.0",
- )
+ self._psdf_or_psser._update_internal_frame(internal,
check_same_anchor=False)
class LocIndexer(LocIndexerLike):
@@ -1872,7 +1868,7 @@ class iLocIndexer(LocIndexerLike):
)
super().__setitem__(key, value)
# Update again with resolved_copy to drop extra columns.
- self._psdf._update_internal_frame(
+ self._psdf_or_psser._update_internal_frame(
self._psdf._internal.resolved_copy, check_same_anchor=False
)
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 3f8a2e57792d..1015ff4db4d9 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -495,6 +495,14 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
def _column_label(self) -> Optional[Label]:
return self._col_label
+ def _update_internal_frame(
+ self, internal: InternalFrame, check_same_anchor: bool = True
+ ) -> None:
+ if LooseVersion(pd.__version__) < "3.0.0":
+ self._psdf._update_internal_frame(internal,
check_same_anchor=check_same_anchor)
+ else:
+
self._update_anchor(DataFrame(internal.select_column(self._column_label)))
+
def _update_anchor(self, psdf: DataFrame) -> None:
assert psdf._internal.column_labels == [self._column_label], (
psdf._internal.column_labels,
@@ -2220,7 +2228,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
inplace = validate_bool_kwarg(inplace, "inplace")
if inplace:
- self._psdf._update_internal_frame(psser._psdf._internal,
check_same_anchor=False)
+ self._update_internal_frame(psser._psdf._internal,
check_same_anchor=False)
return None
else:
return psser.copy()
@@ -2529,7 +2537,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
data_spark_columns=[scol.alias(self._internal.data_spark_column_names[0])],
data_fields=[self._internal.data_fields[0]],
)
- self._psdf._update_internal_frame(internal,
check_same_anchor=False)
+ self._update_internal_frame(internal, check_same_anchor=False)
return None
else:
return self._with_new_scol(
@@ -5332,7 +5340,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
internal = self._psdf._internal.with_new_spark_column(
self._column_label, scol # TODO: dtype?
)
- self._psdf._update_internal_frame(internal)
+ self._update_internal_frame(internal)
else:
combined = combine_frames(self._psdf, other._psdf, how="leftouter")
@@ -5349,7 +5357,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
self._column_label, scol # TODO: dtype?
)
- self._psdf._update_internal_frame(internal.resolved_copy,
check_same_anchor=False)
+ self._update_internal_frame(internal.resolved_copy,
check_same_anchor=False)
def where(self, cond: "Series", other: Any = np.nan) -> "Series":
"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]