ZENOTME commented on code in PR #946: URL: https://github.com/apache/iceberg-rust/pull/946#discussion_r1947484109
########## crates/iceberg/src/spec/transform.rs: ########## @@ -296,6 +297,165 @@ impl Transform { } } + /// Strictly projects a given predicate according to the transformation + /// specified by the `Transform` instance. + /// + /// This method ensures that the projected predicate is strictly aligned + /// with the transformation logic, providing a more precise filtering + /// mechanism for transformed data. + /// + /// # Example + /// Suppose, we have row filter `a = 10`, and a partition spec + /// `bucket(a, 37) as bs`, if one row matches `a = 10`, then its partition + /// value should match `bucket(10, 37) as bs`, and we project `a = 10` to + /// `bs = bucket(10, 37)` + pub fn strict_project( + &self, + name: &str, + predicate: &BoundPredicate, + ) -> Result<Option<Predicate>> { + let func = create_transform_function(self)?; + + match self { + Transform::Identity => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => Ok(Some(Predicate::Binary(BinaryExpression::new( + expr.op(), + Reference::new(name), + expr.literal().to_owned(), + )))), + BoundPredicate::Set(expr) => Ok(Some(Predicate::Set(SetExpression::new( + expr.op(), + Reference::new(name), + expr.literals().to_owned(), + )))), + _ => Ok(None), + }, + Transform::Bucket(_) => match predicate { + BoundPredicate::Unary(expr) => Self::project_unary(expr.op(), name), + BoundPredicate::Binary(expr) => { + self.project_binary_expr(name, PredicateOperator::NotEq, expr, &func) + } + BoundPredicate::Set(expr) => { + self.project_set_expr(expr, PredicateOperator::NotIn, name, &func) + } + _ => Ok(None), + }, + Transform::Truncate(width) => { + match predicate { + BoundPredicate::Unary(expr) => { + if matches!( + expr.term().field().field_type.as_primitive_type(), + Some(&PrimitiveType::Int) + | Some(&PrimitiveType::Long) + | Some(&PrimitiveType::Decimal { .. }) + ) { + Self::project_unary(expr.op(), name) + } else { + // #TODO: Why other type is not supported? Review Comment: It's a question actually. I refer the implementation from: https://github.com/apache/iceberg-python/blob/0aecc0365b08617deee97d8a88c16d5e3aa0724e/pyiceberg/transforms.py#L822. But I'm not sure why it can't apply to other type like string, bytes? cc @Fokko -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org