Copilot commented on code in PR #2038:
URL: https://github.com/apache/sedona/pull/2038#discussion_r2180582238
##########
python/sedona/geopandas/geoseries.py:
##########
@@ -778,6 +796,215 @@ def union_all(self, method="unary", grid_size=None):
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
+ def intersects(
+ self, other: Union["GeoSeries", BaseGeometry], align: Union[bool,
None] = None
+ ) -> pspd.Series:
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ each aligned geometry that intersects `other`.
+
+ An object is said to intersect `other` if its `boundary` and `interior`
+ intersects in any way with those of the other.
+
+ The operation works on a 1-to-1 row-wise manner:
+
+ Parameters
+ ----------
+ other : GeoSeries or geometric object
+ The GeoSeries (elementwise) or geometric object to test if is
+ intersected.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved. (not supported in
Sedona Geopandas)
+
+ Returns
+ -------
+ Series (bool)
+
+ Examples
+ --------
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(0, 1),
+ ... ],
+ ... )
+ >>> s2 = geopandas.GeoSeries(
+ ... [
+ ... LineString([(1, 0), (1, 3)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(1, 1),
+ ... Point(-100, -100),
+ ... ],
+ ... index=range(1, 5),
+ ... )
+
+ We can check two GeoSeries against each other, row by row.
+ The GeoSeries above have different indices. We align both GeoSeries
+ based on index values and compare elements with the same index:
+
+ >>> s.intersects(s2)
+ 0 True
+ 1 True
+ 2 True
+ 3 False
+ dtype: bool
+
+ We can also check if each geometry of GeoSeries intersects a single
+ geometry:
+
+ >>> line = LineString([(-1, 1), (3, 1)])
+ >>> s.intersects(line)
+ 0 True
+ 1 True
+ 2 True
+ 3 True
+ dtype: bool
+
+ Notes
+ -----
+ This method works in a row-wise manner. It does not check if an element
+ of one GeoSeries ``crosses`` *any* element of the other one.
+
+ See also
+ --------
+ GeoSeries.disjoint
+ GeoSeries.crosses
+ GeoSeries.touches
+ GeoSeries.intersection
+ """
+ return (
+ self._row_wise_operation(
+ "ST_Intersects(`L`, `R`)", other, align, rename="intersects"
+ )
+ .to_spark_pandas()
+ .astype("bool")
+ )
+
+ def intersection(
+ self, other: Union["GeoSeries", BaseGeometry], align: Union[bool,
None] = None
+ ) -> "GeoSeries":
+ """Returns a ``GeoSeries`` of the intersection of points in each
+ aligned geometry with `other`.
+
+ The operation works on a 1-to-1 row-wise manner:
+
+ Parameters
+ ----------
+ other : Geoseries or geometric object
+ The Geoseries (elementwise) or geometric object to find the
+ intersection with.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved. (not supported in
Sedona Geopandas)
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(0, 1),
+ ... ],
+ ... )
+ >>> s2 = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... LineString([(1, 0), (1, 3)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(1, 1),
+ ... Point(-100, -100),
+ ... ],
+ ... )
+
+ We can do an intersection of each geometry and a single
+ shapely geometry:
+
+ >>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5,
-0.5), (-0.5, -0.5)])
+ >>> s.intersection(geom)
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ LineString([(2, 0), (0, 2)]),
+ Point(0, 1),
+ dtype: geometry
+
+ >>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5,
-0.5), (-0.5, -0.5)])
+ >>> s.intersection(Polygon([(0, 0), (1, 1), (0, 1)]))
+ 0 POLYGON ((0 0, 2 2, 0 2))
+ 1 POLYGON ((0 0, 2 2, 0 2))
+ 2 LINESTRING (0 0, 2 2)
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (0 1)
+ dtype: geometry
+
+ We can also check two GeoSeries against each other, row by row.
+ The GeoSeries above have different indices. We align both GeoSeries
+ based on index values and compare elements with the same index.
+
+ >>> s.intersection(s2)
+ 0 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 1 LINESTRING (1 1, 1 2)
+ 2 POINT (1 1)
+ 3 POINT (1 1)
+ 4 POLYGON EMPTY
+ dtype: geometry
+
+ See Also
+ --------
+ GeoSeries.difference
+ GeoSeries.symmetric_difference
+ GeoSeries.union
+ """
+ return self._row_wise_operation(
+ "ST_Intersection(`L`, `R`)", other, align, rename="intersection"
+ )
+
+ def _row_wise_operation(
+ self,
+ select: str,
+ other: Union["GeoSeries", BaseGeometry],
+ align: Union[bool, None],
+ rename: str,
+ ):
+ """
+ Helper function to perform a row-wise operation on two GeoSeries.
+ The self column and other column are aliased to `L` and `R`,
respectively.
+ """
+ from pyspark.sql.functions import col
+
+ # Note: this is specifically False. None is valid since it defaults to
True similar to geopandas
+ if align is False:
+ raise NotImplementedError("Sedona Geopandas does not support
align=False")
+
+ if isinstance(other, BaseGeometry):
+ other = GeoSeries([other] * len(self))
+
+ assert isinstance(other, GeoSeries), f"Invalid type for other:
{type(other)}"
+
+ df = self._internal.spark_frame.select(
+ col(self.get_first_geometry_column()).alias("L"), col(PS_INDEX_COL)
+ )
+ other_df = other._internal.spark_frame.select(
+ col(other.get_first_geometry_column()).alias("R"),
col(PS_INDEX_COL)
+ )
+ joined_df = df.join(other_df, on=PS_INDEX_COL, how="outer")
Review Comment:
Using an outer join can introduce rows for indices that exist only in one
series, potentially yielding `NULL` geometries and unexpected results. To
better mirror pandas-style alignment for row-wise operations, consider using a
left join (`how="left"`) or explicitly handling missing values before applying
the spatial predicate.
```suggestion
joined_df = df.join(other_df, on=PS_INDEX_COL, how="left")
```
##########
python/sedona/geopandas/geoseries.py:
##########
@@ -778,6 +796,215 @@ def union_all(self, method="unary", grid_size=None):
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
+ def intersects(
+ self, other: Union["GeoSeries", BaseGeometry], align: Union[bool,
None] = None
+ ) -> pspd.Series:
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ each aligned geometry that intersects `other`.
+
+ An object is said to intersect `other` if its `boundary` and `interior`
+ intersects in any way with those of the other.
+
+ The operation works on a 1-to-1 row-wise manner:
+
+ Parameters
+ ----------
+ other : GeoSeries or geometric object
+ The GeoSeries (elementwise) or geometric object to test if is
+ intersected.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved. (not supported in
Sedona Geopandas)
+
+ Returns
+ -------
+ Series (bool)
+
+ Examples
+ --------
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(0, 1),
+ ... ],
+ ... )
+ >>> s2 = geopandas.GeoSeries(
+ ... [
+ ... LineString([(1, 0), (1, 3)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(1, 1),
+ ... Point(-100, -100),
+ ... ],
+ ... index=range(1, 5),
+ ... )
+
+ We can check two GeoSeries against each other, row by row.
+ The GeoSeries above have different indices. We align both GeoSeries
+ based on index values and compare elements with the same index:
+
+ >>> s.intersects(s2)
+ 0 True
+ 1 True
+ 2 True
+ 3 False
+ dtype: bool
+
+ We can also check if each geometry of GeoSeries intersects a single
+ geometry:
+
+ >>> line = LineString([(-1, 1), (3, 1)])
+ >>> s.intersects(line)
+ 0 True
+ 1 True
+ 2 True
+ 3 True
+ dtype: bool
+
+ Notes
+ -----
+ This method works in a row-wise manner. It does not check if an element
+ of one GeoSeries ``crosses`` *any* element of the other one.
+
+ See also
+ --------
+ GeoSeries.disjoint
+ GeoSeries.crosses
+ GeoSeries.touches
+ GeoSeries.intersection
+ """
+ return (
+ self._row_wise_operation(
+ "ST_Intersects(`L`, `R`)", other, align, rename="intersects"
+ )
+ .to_spark_pandas()
+ .astype("bool")
+ )
+
+ def intersection(
+ self, other: Union["GeoSeries", BaseGeometry], align: Union[bool,
None] = None
+ ) -> "GeoSeries":
+ """Returns a ``GeoSeries`` of the intersection of points in each
+ aligned geometry with `other`.
+
+ The operation works on a 1-to-1 row-wise manner:
+
+ Parameters
+ ----------
+ other : Geoseries or geometric object
+ The Geoseries (elementwise) or geometric object to find the
+ intersection with.
+ align : bool | None (default None)
+ If True, automatically aligns GeoSeries based on their indices.
None defaults to True.
+ If False, the order of elements is preserved. (not supported in
Sedona Geopandas)
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from shapely.geometry import Polygon, LineString, Point
+ >>> s = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... Polygon([(0, 0), (2, 2), (0, 2)]),
+ ... LineString([(0, 0), (2, 2)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(0, 1),
+ ... ],
+ ... )
+ >>> s2 = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... LineString([(1, 0), (1, 3)]),
+ ... LineString([(2, 0), (0, 2)]),
+ ... Point(1, 1),
+ ... Point(-100, -100),
+ ... ],
+ ... )
+
+ We can do an intersection of each geometry and a single
+ shapely geometry:
+
+ >>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5,
-0.5), (-0.5, -0.5)])
+ >>> s.intersection(geom)
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ Polygon([(0, 0), (2, 2), (0, 2)]),
+ LineString([(0, 0), (2, 2)]),
+ LineString([(2, 0), (0, 2)]),
+ Point(0, 1),
+ dtype: geometry
+
+ >>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5,
-0.5), (-0.5, -0.5)])
+ >>> s.intersection(Polygon([(0, 0), (1, 1), (0, 1)]))
+ 0 POLYGON ((0 0, 2 2, 0 2))
+ 1 POLYGON ((0 0, 2 2, 0 2))
+ 2 LINESTRING (0 0, 2 2)
+ 3 LINESTRING (2 0, 0 2)
+ 4 POINT (0 1)
+ dtype: geometry
+
+ We can also check two GeoSeries against each other, row by row.
+ The GeoSeries above have different indices. We align both GeoSeries
+ based on index values and compare elements with the same index.
+
+ >>> s.intersection(s2)
+ 0 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 1 LINESTRING (1 1, 1 2)
+ 2 POINT (1 1)
+ 3 POINT (1 1)
+ 4 POLYGON EMPTY
+ dtype: geometry
+
+ See Also
+ --------
+ GeoSeries.difference
+ GeoSeries.symmetric_difference
+ GeoSeries.union
+ """
+ return self._row_wise_operation(
+ "ST_Intersection(`L`, `R`)", other, align, rename="intersection"
+ )
+
+ def _row_wise_operation(
+ self,
+ select: str,
+ other: Union["GeoSeries", BaseGeometry],
+ align: Union[bool, None],
+ rename: str,
+ ):
+ """
+ Helper function to perform a row-wise operation on two GeoSeries.
+ The self column and other column are aliased to `L` and `R`,
respectively.
+ """
+ from pyspark.sql.functions import col
+
+ # Note: this is specifically False. None is valid since it defaults to
True similar to geopandas
+ if align is False:
+ raise NotImplementedError("Sedona Geopandas does not support
align=False")
+
+ if isinstance(other, BaseGeometry):
+ other = GeoSeries([other] * len(self))
Review Comment:
When wrapping a single `BaseGeometry` into a `GeoSeries`, the original index
is not preserved. This can lead to incorrect alignment when the series has a
custom index. Consider passing `index=self.index` to the constructor, e.g.,
`GeoSeries([other] * len(self), index=self.index)`.
```suggestion
other = GeoSeries([other] * len(self), index=self.index)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]