Copilot commented on code in PR #799:
URL: https://github.com/apache/sedona-db/pull/799#discussion_r3158619608


##########
rust/sedona-spatial-join/src/utils/bounds.rs:
##########
@@ -0,0 +1,277 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use float_next_after::NextAfter;
+use sedona_geometry::{
+    bounding_box::BoundingBox,
+    interval::{Interval, IntervalTrait, WraparoundInterval},
+};
+
+/// A float32 bounding box with wraparound support
+///
+/// This struct is conceptually similar to the Rect<f32> but explicitly 
supports
+/// wraparound x intervals to ensure raw xmin and xmax values are not misused.
+#[derive(Debug, Clone, PartialEq)]
+pub struct Bounds2D {
+    x: (f32, f32),
+    y: (f32, f32),
+}
+
+impl Bounds2D {
+    /// Create a new empty rectangle
+    pub fn empty() -> Self {
+        Self::new(WraparoundInterval::empty(), Interval::empty())
+    }
+
+    /// Returns true if this rectangle is empty
+    pub fn is_empty(&self) -> bool {
+        self.x().is_empty() && self.y().is_empty()

Review Comment:
   `Bounds2D::is_empty()` currently returns true only when *both* x and y 
intervals are empty. A 2D bounds should be considered empty if either dimension 
is empty; otherwise you can end up treating a bounds with an empty x (or y) 
interval as non-empty (e.g., after `split()`/intersection), which can lead to 
invalid index inserts/queries and partitioning decisions. Consider changing 
this to use `||` (or otherwise ensure `Bounds2D::new` normalizes any 
partially-empty inputs to fully-empty).
   ```suggestion
           self.x().is_empty() || self.y().is_empty()
   ```



##########
rust/sedona-spatial-join/src/index/default_spatial_index_builder.rs:
##########
@@ -142,25 +164,41 @@ impl DefaultSpatialIndexBuilder {
     fn build_rtree(&mut self) -> Result<RTreeBuildResult> {
         let build_timer = self.metrics.build_time.timer();
 
+        // Each item will add 0 (empty), 1 (regular) or 2 (wraparound)
+        // rectangles to the index.
         let num_rects = self
             .indexed_batches
             .iter()
-            .map(|batch| batch.geom_array.rects().iter().flatten().count())
-            .sum::<usize>();
+            .flat_map(|batch| batch.geom_array.rects().iter())
+            .map(|rect| {
+                if rect.is_empty() {
+                    0
+                } else if rect.is_wraparound() {
+                    2
+                } else {
+                    1
+                }
+            })
+            .sum();
 
         let mut rtree_builder = RTreeBuilder::<f32>::new(num_rects as u32);
         let mut batch_pos_vec = vec![(-1, -1); num_rects];
 
         for (batch_idx, batch) in self.indexed_batches.iter().enumerate() {
             let rects = batch.geom_array.rects();
-            for (idx, rect_opt) in rects.iter().enumerate() {
-                let Some(rect) = rect_opt else {
-                    continue;
-                };
-                let min = rect.min();
-                let max = rect.max();
-                let data_idx = rtree_builder.add(min.x, min.y, max.x, max.y);
-                batch_pos_vec[data_idx as usize] = (batch_idx as i32, idx as 
i32);
+            for (idx, rect) in rects.iter().enumerate() {
+                let (left, right) = 
rect.split(&self.wraparound.unwrap_or(Interval::empty()));
+                if !left.is_empty() {

Review Comment:
   `build_rtree()` uses 
`rect.split(&self.wraparound.unwrap_or(Interval::empty()))`. If a wraparound 
bounds is encountered while `self.wraparound` is `None`, this will silently 
split against an empty interval and drop the geometry from the index (false 
negatives). Since the docs say this configuration is invalid, it would be safer 
to detect `rect.is_wraparound() && self.wraparound.is_none()` and return an 
explicit error early.



##########
rust/sedona-spatial-join/src/partitioning/stream_repartitioner.rs:
##########
@@ -601,26 +600,27 @@ pub(crate) fn assign_rows(
         PartitionedSide::BuildSide => {
             let mut cnt = 0;
             let num_regular_partitions = partitioner.num_regular_partitions() 
as u32;
-            for rect_opt in batch.geom_array.rects() {
-                let partition = match rect_opt {
-                    Some(rect) => 
partitioner.partition_no_multi(&geo_rect_to_bbox(rect))?,
-                    None => {
-                        // Round-robin empty geometries through regular 
partitions to avoid
-                        // overloading a single slot when the build side is 
mostly empty.
-                        let p = SpatialPartition::Regular(cnt);
-                        cnt = (cnt + 1) % num_regular_partitions;
-                        p
-                    }
+            for rect in batch.geom_array.rects() {
+                let partition = if rect.is_empty() {
+                    // Round-robin empty geometries through regular partitions 
to avoid
+                    // overloading a single slot when the build side is mostly 
empty.
+                    let p = SpatialPartition::Regular(cnt);
+                    cnt = (cnt + 1) % num_regular_partitions;
+                    p
+                } else {
+                    partitioner.partition_no_multi(&BoundingBox::xy(rect.x(), 
rect.y()))?
                 };
                 assignments.push(partition);
             }
         }
         PartitionedSide::ProbeSide => {
-            for rect_opt in batch.geom_array.rects() {
-                let partition = match rect_opt {
-                    Some(rect) => 
partitioner.partition(&geo_rect_to_bbox(rect))?,
-                    None => SpatialPartition::None,
+            for rect in batch.geom_array.rects() {
+                let partition = if rect.is_empty() {
+                    SpatialPartition::None
+                } else {
+                    partitioner.partition(&BoundingBox::xy(rect.x(), 
rect.y()))?
                 };

Review Comment:
   `assign_rows()` now passes `BoundingBox::xy(rect.x(), rect.y())` directly 
into the partitioner. For wraparound x-intervals this will currently error in 
the partitioning utilities (e.g., `bbox_to_f32_rect` returns an Execution error 
when `bbox.x().is_wraparound()`). This means repartitioning/out-of-core paths 
can fail for geography antimeridian-crossing geometries. Consider making 
`assign_rows()` wraparound-aware (e.g., split bounds using the configured 
absolute wraparound interval and union the partition results / force `Multi`), 
or ensure geography joins avoid partitioners that require non-wraparound 
rectangles until wraparound partitioning is implemented.



##########
rust/sedona-spatial-join/src/index/default_spatial_index.rs:
##########
@@ -519,13 +532,26 @@ impl SpatialIndex for DefaultSpatialIndex {
         let mut current_row_idx = range.start;
         for row_idx in range {
             current_row_idx = row_idx;
-            let Some(probe_rect) = rects[row_idx] else {
+            let probe_rect = &rects[row_idx];
+            if probe_rect.is_empty() {
                 continue;
+            }
+
+            let mut candidates = if let Some(wraparound) = 
&self.inner.wraparound {
+                let (left, right) = probe_rect.split(wraparound);
+                let (x, y) = left.into_inner();
+                let mut candidates = self.inner.rtree.search(x.0, y.0, x.1, 
y.1);
+                if !right.is_empty() {
+                    let (x, y) = right.into_inner();
+                    candidates.extend(self.inner.rtree.search(x.0, y.0, x.1, 
y.1));
+                }

Review Comment:
   `query_batch()` always performs an R-tree search using the `left` bounds 
returned by `probe_rect.split(wraparound)` even if `left` is empty. If 
`wraparound` is misconfigured or the split produces an empty half, this can 
result in searches with nonsensical extents (e.g., INF/-INF) and potentially 
incorrect results or panics in the underlying index. Guard the left search with 
`if !left.is_empty()` (mirroring the builder’s insertion logic) and return an 
empty candidate set if both halves are empty.



##########
c/sedona-libgpuspatial/src/lib.rs:
##########
@@ -113,9 +112,10 @@ mod sys {
         /// Inserts a batch of bounding boxes into the index.
         /// Each rectangle is represented as a `Rect<f32>` with minimum and 
maximum x and y coordinates.
         /// This method accumulates these rectangles until `finish_building` 
is called to finalize the index.
-        /// The method can be called multiple times to insert data in batches 
before finalizing.
-        pub fn push_build(&mut self, rects: &[Rect<f32>]) -> Result<()> {
-            // Re-interpreting Rect<f32> as flat f32 array (xmin, ymin, xmax, 
ymax)
+        /// The method can be called multiple times to insert data in batches 
before finalizing. The values
+        /// in rects are ordered (xmin, ymin, xmax, ymax).
+        pub fn push_build(&mut self, rects: &[(f32, f32, f32, f32)]) -> 
Result<()> {
+            // Re-interpreting rects as a flat f32 array (xmin, ymin, xmax, 
ymax)
             let raw_ptr = rects.as_ptr() as *const f32;
             self.inner.push_build(raw_ptr, rects.len() as u32)
         }

Review Comment:
   `push_build()`/`probe()` cast `&[(f32, f32, f32, f32)]` to `*const f32` and 
pass it to the CUDA layer as a flat f32 array. Rust does not guarantee the 
memory layout of tuples for FFI / raw reinterpretation, so this cast can be 
undefined behavior. To make this sound, consider introducing a `#[repr(C)]` 
struct (e.g., `struct RawRect { xmin: f32, ymin: f32, xmax: f32, ymax: f32 }`) 
or using a proven-bytemuck `Pod` type for the buffer, and update the doc 
comment (it still mentions `Rect<f32>`).



##########
rust/sedona-spatial-join/src/index/default_spatial_index.rs:
##########
@@ -69,6 +70,15 @@ struct DefaultSpatialIndexInner {
     /// to get the prepared geometries array index.
     pub(crate) rtree: RTree<f32>,
 
+    /// Absolute bounds of the coordinate system
+    ///
+    /// If this value should be `None` if each input feature has exactly one 
item in the tree and
+    /// each probe input is guaranteed to have non-wraparound bounds. This is 
the case when using
+    /// non-wraparound bounds like those computed by the default evaluated 
geometry array
+    /// constructor. If this is some, it must contain the bounds of any 
rectangle in the tree. This
+    /// is needed to split probe rectangles into finite queries when querying 
the tree.

Review Comment:
   Doc comment grammar: “If this value should be `None` if …” reads like a 
duplicated conditional and is confusing. Consider rewording to “This value 
should be `None` if …” and clarifying the invariants for `Some(_)` vs `None`.
   ```suggestion
       /// This value should be `None` when each input feature has exactly one 
item in the tree and
       /// each probe input is guaranteed to have non-wraparound bounds. This 
is the case when using
       /// non-wraparound bounds like those computed by the default evaluated 
geometry array
       /// constructor. If this is `Some(bounds)`, `bounds` must contain the 
bounds of every
       /// rectangle in the tree. These bounds are needed to split probe 
rectangles into finite
       /// queries when querying the tree.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to