alamb commented on code in PR #21932:
URL: https://github.com/apache/datafusion/pull/21932#discussion_r3162724928


##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -216,126 +224,7 @@ primitive_static_filter!(UInt64StaticFilter, UInt64Type);
 // Floats require a wrapper type (OrderedFloat*) to implement Hash/Eq due to 
NaN semantics
 macro_rules! float_static_filter {
     ($Name:ident, $ArrowType:ty, $OrderedType:ty) => {
-        pub(super) struct $Name {
-            null_count: usize,
-            values: HashSet<$OrderedType>,
-        }
-
-        impl $Name {
-            pub(super) fn try_new(in_array: &ArrayRef) -> Result<Self> {
-                let in_array = in_array
-                    .as_primitive_opt::<$ArrowType>()
-                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an 
array to a '{}' array", stringify!($ArrowType)))?;
-
-                let mut values = HashSet::with_capacity(in_array.len());
-                let null_count = in_array.null_count();
-
-                for v in in_array.iter().flatten() {
-                    values.insert(<$OrderedType>::from(v));
-                }
-
-                Ok(Self { null_count, values })
-            }
-        }
-
-        impl StaticFilter for $Name {
-            fn null_count(&self) -> usize {
-                self.null_count
-            }
-
-            fn contains(&self, v: &dyn Array, negated: bool) -> 
Result<BooleanArray> {
-                // Handle dictionary arrays by recursing on the values
-                downcast_dictionary_array! {
-                    v => {
-                        let values_contains = 
self.contains(v.values().as_ref(), negated)?;
-                        let result = take(&values_contains, v.keys(), None)?;
-                        return Ok(downcast_array(result.as_ref()))
-                    }
-                    _ => {}
-                }
-
-                let v = v
-                    .as_primitive_opt::<$ArrowType>()
-                    .ok_or_else(|| exec_datafusion_err!("Failed to downcast an 
array to a '{}' array", stringify!($ArrowType)))?;
-
-                let haystack_has_nulls = self.null_count > 0;
-                let needle_values = v.values();
-                let needle_nulls = v.nulls();
-                let needle_has_nulls = v.null_count() > 0;
-
-                // Truth table for `value [NOT] IN (set)` with SQL 
three-valued logic:
-                // ("-" means the value doesn't affect the result)
-                //
-                // | needle_null | haystack_null | negated | in set? | result |
-                // |-------------|---------------|---------|---------|--------|
-                // | true        | -             | false   | -       | null   |
-                // | true        | -             | true    | -       | null   |
-                // | false       | true          | false   | yes     | true   |
-                // | false       | true          | false   | no      | null   |
-                // | false       | true          | true    | yes     | false  |
-                // | false       | true          | true    | no      | null   |
-                // | false       | false         | false   | yes     | true   |
-                // | false       | false         | false   | no      | false  |
-                // | false       | false         | true    | yes     | false  |
-                // | false       | false         | true    | no      | true   |
-
-                // Compute the "contains" result using collect_bool (fast 
batched approach)
-                // This ignores nulls - we handle them separately
-                let contains_buffer = if negated {
-                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
-                        
!self.values.contains(&<$OrderedType>::from(needle_values[i]))
-                    })
-                } else {
-                    BooleanBuffer::collect_bool(needle_values.len(), |i| {
-                        
self.values.contains(&<$OrderedType>::from(needle_values[i]))
-                    })
-                };
-
-                // Compute the null mask
-                // Output is null when:
-                // 1. needle value is null, OR
-                // 2. needle value is not in set AND haystack has nulls
-                let result_nulls = match (needle_has_nulls, 
haystack_has_nulls) {
-                    (false, false) => {
-                        // No nulls anywhere
-                        None
-                    }
-                    (true, false) => {
-                        // Only needle has nulls - just use needle's null mask
-                        needle_nulls.cloned()
-                    }
-                    (false, true) => {
-                        // Only haystack has nulls - result is null when value 
not in set
-                        // Valid (not null) when original "in set" is true
-                        // For NOT IN: contains_buffer = !original, so 
validity = !contains_buffer
-                        let validity = if negated {
-                            !&contains_buffer
-                        } else {
-                            contains_buffer.clone()
-                        };
-                        Some(NullBuffer::new(validity))
-                    }
-                    (true, true) => {
-                        // Both have nulls - combine needle nulls with 
haystack-induced nulls
-                        let needle_validity = needle_nulls.map(|n| 
n.inner().clone())
-                            .unwrap_or_else(|| 
BooleanBuffer::new_set(needle_values.len()));
-
-                        // Valid when original "in set" is true (see above)
-                        let haystack_validity = if negated {
-                            !&contains_buffer
-                        } else {
-                            contains_buffer.clone()
-                        };
-
-                        // Combined validity: valid only where both are valid
-                        let combined_validity = &needle_validity & 
&haystack_validity;
-                        Some(NullBuffer::new(combined_validity))
-                    }
-                };
-
-                Ok(BooleanArray::new(contains_buffer, result_nulls))
-            }
-        }
+        primitive_static_filter!($Name, $ArrowType, $OrderedType, 
<$OrderedType>::from);

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to