sdd commented on code in PR #404: URL: https://github.com/apache/iceberg-rust/pull/404#discussion_r1640925156
########## crates/iceberg/src/arrow/reader.rs: ########## @@ -741,42 +742,98 @@ impl<'a> BoundPredicateVisitor for PredicateConverter<'a> { fn starts_with( &mut self, - _reference: &BoundReference, - _literal: &Datum, + reference: &BoundReference, + literal: &Datum, _predicate: &BoundPredicate, ) -> Result<Box<PredicateResult>> { - // TODO: Implement starts_with - self.build_always_true() + if let Some(idx) = self.bound_reference(reference)? { + let literal = get_arrow_datum(literal)?; + + Ok(Box::new(move |batch| { + let left = project_column(&batch, idx)?; + starts_with(&left, literal.as_ref()) + })) + } else { + // A missing column, treating it as null. + self.build_always_false() + } } fn not_starts_with( &mut self, - _reference: &BoundReference, - _literal: &Datum, + reference: &BoundReference, + literal: &Datum, _predicate: &BoundPredicate, ) -> Result<Box<PredicateResult>> { - // TODO: Implement not_starts_with - self.build_always_true() + if let Some(idx) = self.bound_reference(reference)? { + let literal = get_arrow_datum(literal)?; + + Ok(Box::new(move |batch| { + let left = project_column(&batch, idx)?; + + // update here if arrow ever adds a native not_starts_with + not(&starts_with(&left, literal.as_ref())?) + })) + } else { + // A missing column, treating it as null. + self.build_always_false() Review Comment: Aah, I'd not spotted that these differ depending on if the op is negative or not. Fixed. Good spot! ########## crates/iceberg/src/arrow/reader.rs: ########## @@ -741,42 +742,98 @@ impl<'a> BoundPredicateVisitor for PredicateConverter<'a> { fn starts_with( &mut self, - _reference: &BoundReference, - _literal: &Datum, + reference: &BoundReference, + literal: &Datum, _predicate: &BoundPredicate, ) -> Result<Box<PredicateResult>> { - // TODO: Implement starts_with - self.build_always_true() + if let Some(idx) = self.bound_reference(reference)? { + let literal = get_arrow_datum(literal)?; + + Ok(Box::new(move |batch| { + let left = project_column(&batch, idx)?; + starts_with(&left, literal.as_ref()) + })) + } else { + // A missing column, treating it as null. + self.build_always_false() + } } fn not_starts_with( &mut self, - _reference: &BoundReference, - _literal: &Datum, + reference: &BoundReference, + literal: &Datum, _predicate: &BoundPredicate, ) -> Result<Box<PredicateResult>> { - // TODO: Implement not_starts_with - self.build_always_true() + if let Some(idx) = self.bound_reference(reference)? { + let literal = get_arrow_datum(literal)?; + + Ok(Box::new(move |batch| { + let left = project_column(&batch, idx)?; + + // update here if arrow ever adds a native not_starts_with + not(&starts_with(&left, literal.as_ref())?) + })) + } else { + // A missing column, treating it as null. + self.build_always_false() + } } fn r#in( &mut self, - _reference: &BoundReference, - _literals: &FnvHashSet<Datum>, + reference: &BoundReference, + literals: &FnvHashSet<Datum>, _predicate: &BoundPredicate, ) -> Result<Box<PredicateResult>> { - // TODO: Implement in - self.build_always_true() + if let Some(idx) = self.bound_reference(reference)? { + let literals: Vec<_> = literals + .iter() + .map(|lit| get_arrow_datum(lit).unwrap()) + .collect(); + + Ok(Box::new(move |batch| { + // update this if arrow ever adds a native is_in kernel + let left = project_column(&batch, idx)?; + let mut acc = BooleanArray::from(vec![false; batch.num_rows()]); + for literal in &literals { + acc = or(&acc, &eq(&left, literal.as_ref())?)? + } + + Ok(acc) + })) + } else { + // A missing column, treating it as null. + self.build_always_false() + } } fn not_in( &mut self, - _reference: &BoundReference, - _literals: &FnvHashSet<Datum>, + reference: &BoundReference, + literals: &FnvHashSet<Datum>, _predicate: &BoundPredicate, ) -> Result<Box<PredicateResult>> { - // TODO: Implement not_in - self.build_always_true() + if let Some(idx) = self.bound_reference(reference)? { + let literals: Vec<_> = literals + .iter() + .map(|lit| get_arrow_datum(lit).unwrap()) + .collect(); + + Ok(Box::new(move |batch| { + // update this if arrow ever adds a native not_in kernel + let left = project_column(&batch, idx)?; + let mut acc = BooleanArray::from(vec![true; batch.num_rows()]); + for literal in &literals { + acc = and(&acc, &neq(&left, literal.as_ref())?)? + } + + Ok(acc) + })) + } else { + // A missing column, treating it as null. + self.build_always_false() Review Comment: Also fixed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org