gene-bordegaray commented on code in PR #21931:
URL: https://github.com/apache/datafusion/pull/21931#discussion_r3250295194


##########
datafusion/physical-plan/src/joins/hash_join/shared_bounds.rs:
##########
@@ -202,6 +195,64 @@ fn combine_membership_and_bounds(
     }
 }
 
+/// Compute the global (envelope) min/max bounds across a set of partition 
bounds.
+///
+/// For each column index, returns the smallest min seen and the largest max 
seen.
+/// Columns where any partition is missing bounds, or where bounds are not 
totally
+/// ordered (e.g. mixed-type comparisons), are dropped from the global 
envelope.
+fn compute_global_bounds(per_partition: &[&PartitionBounds]) -> 
Option<PartitionBounds> {
+    let mut iter = per_partition.iter();
+    let first = iter.next()?;
+    let mut acc: Vec<Option<ColumnBounds>> = first
+        .column_bounds
+        .iter()
+        .map(|cb| Some(cb.clone()))
+        .collect();
+
+    for partition in iter {
+        if partition.column_bounds.len() != acc.len() {
+            return None;
+        }
+        for (slot, cb) in acc.iter_mut().zip(partition.column_bounds.iter()) {
+            let Some(existing) = slot.as_mut() else {
+                continue;
+            };
+            match cb.min.partial_cmp(&existing.min) {
+                Some(std::cmp::Ordering::Less) => existing.min = 
cb.min.clone(),
+                Some(_) => {}
+                None => {
+                    *slot = None;
+                    continue;
+                }
+            }
+            match cb.max.partial_cmp(&existing.max) {
+                Some(std::cmp::Ordering::Greater) => existing.max = 
cb.max.clone(),
+                Some(_) => {}
+                None => *slot = None,
+            }
+        }
+    }
+
+    let merged: Vec<ColumnBounds> = acc.into_iter().flatten().collect();

Review Comment:
   I noticed you pushed more commits but this is still here. The regression 
test does not replicate because it drops the second column



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to