asubiotto commented on code in PR #9621:
URL: https://github.com/apache/arrow-rs/pull/9621#discussion_r3030428008


##########
arrow-ord/src/cmp.rs:
##########
@@ -373,16 +394,93 @@ fn apply<T: ArrayOrd>(
             Op::GreaterEqual => apply_op(l, l_s, r, r_s, true, T::is_lt),
         };
 
-        // If a side had a dictionary, and was not scalar, we need to 
materialize this
-        Some(match (l_v, r_v) {
-            (Some(l_v), _) if l_s.is_none() => take_bits(l_v, buffer),
-            (_, Some(r_v)) if r_s.is_none() => take_bits(r_v, buffer),
-            _ => buffer,
+        // Expand the physical-length result back to logical length.
+        // Find the non-scalar side that needs expansion (at most one).
+        let (dict, ree) = if l_s.is_none() {
+            (l_v, l_ree)
+        } else if r_s.is_none() {
+            (r_v, r_ree)
+        } else {
+            (None, None)
+        };
+        let buffer = match dict {
+            Some(d) => take_bits(d, buffer),
+            None => buffer,
+        };
+        Some(match ree {
+            Some(info) => expand_from_runs(info, buffer),
+            None => buffer,
         })
     }
 }
 
-/// Perform a take operation on `buffer` with the given dictionary
+/// Build a logical→physical index vector for one side of a non-scalar 
comparison.
+fn logical_indices(
+    len: usize,
+    dict: Option<&dyn AnyDictionaryArray>,
+    ree: Option<&ReeInfo>,
+) -> Vec<usize> {
+    match (dict, ree) {
+        (Some(d), Some(info)) => {
+            let keys = d.normalized_keys();
+            ree_physical_indices(info)
+                .iter()
+                .map(|&i| keys[i])
+                .collect()
+        }
+        (Some(d), None) => d.normalized_keys(),
+        (None, Some(info)) => ree_physical_indices(info),
+        (None, None) => (0..len).collect(),
+    }
+}
+
+fn ree_physical_indices(info: &ReeInfo) -> Vec<usize> {
+    let run_ends = info.run_ends_as_usize();
+    let end = info.offset + info.len;
+    let mut indices = Vec::with_capacity(info.len);
+    let mut pos = info.offset;
+    for (physical, &run_end) in 
run_ends.iter().enumerate().skip(info.start_physical) {
+        let run_end = run_end.min(end);
+        indices.extend(std::iter::repeat_n(physical, run_end - pos));
+        pos = run_end;
+        if pos >= end {
+            break;
+        }
+    }
+    indices
+}
+
+fn scalar_index(dict: Option<&dyn AnyDictionaryArray>, ree: Option<&ReeInfo>) 
-> usize {
+    let idx = ree.map(|r| r.start_physical).unwrap_or(0);

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to