HonahX commented on code in PR #539: URL: https://github.com/apache/iceberg-python/pull/539#discussion_r1537148759
########## pyiceberg/transforms.py: ########## @@ -521,6 +556,16 @@ def project(self, name: str, pred: BoundPredicate[L]) -> Optional[UnboundPredica else: raise ValueError(f"Could not project: {pred}") + def strict_project(self, name: str, pred: BoundPredicate[Any]) -> Optional[UnboundPredicate[Any]]: + if isinstance(pred, BoundUnaryPredicate): + return pred.as_unbound(Reference(name)) + elif isinstance(pred, BoundLiteralPredicate): + return pred.as_unbound(Reference(name), pred.literal) + elif isinstance(pred, BoundSetPredicate): + return pred.as_unbound(Reference(name), pred.literals) + else: + raise ValueError(f"Could not project: {pred}") Review Comment: I am a little bit confused here: why do we only raise error in IdentityTransform? ########## pyiceberg/expressions/visitors.py: ########## @@ -1433,6 +1433,30 @@ def visit_not_starts_with(self, term: BoundTerm[L], literal: Literal[L]) -> bool return ROWS_MIGHT_MATCH +def strict_projection( + schema: Schema, spec: PartitionSpec, case_sensitive: bool = True +) -> Callable[[BooleanExpression], BooleanExpression]: + return StrictProjection(schema, spec, case_sensitive).project + + +class StrictProjection(ProjectionEvaluator): + def visit_bound_predicate(self, predicate: BoundPredicate[Any]) -> BooleanExpression: + parts = self.spec.fields_by_source_id(predicate.term.ref().field.field_id) + + result: BooleanExpression = AlwaysFalse() + for part in parts: + # consider (ts > 2019-01-01T01:00:00) with day(ts) and hour(ts) + # projections: d >= 2019-01-02 and h >= 2019-01-01-02 (note the inclusive bounds). + # any timestamp where either projection predicate is true must match the original + # predicate. For example, ts = 2019-01-01T03:00:00 matches the hour projection but not + # the day, but does match the original predicate. + incl_projection = part.transform.strict_project(name=part.name, pred=predicate) Review Comment: ```suggestion strict_projection = part.transform.strict_project(name=part.name, pred=predicate) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org