Fokko commented on code in PR #6437: URL: https://github.com/apache/iceberg/pull/6437#discussion_r1056875859
########## python/pyiceberg/expressions/visitors.py: ########## @@ -753,3 +756,89 @@ def inclusive_projection( schema: Schema, spec: PartitionSpec, case_sensitive: bool = True ) -> Callable[[BooleanExpression], BooleanExpression]: return InclusiveProjection(schema, spec, case_sensitive).project + + +class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]): + """Converts the column names with the ones in the actual file + + Args: + file_schema (Schema): The schema of the file + case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True + + Raises: + TypeError: In the case of an UnboundPredicate + ValueError: When a column name cannot be found + """ + + file_schema: Schema + case_sensitive: bool + + def __init__(self, file_schema: Schema, case_sensitive: bool) -> None: + self.file_schema = file_schema + self.case_sensitive = case_sensitive + + def visit_true(self) -> BooleanExpression: + return AlwaysTrue() + + def visit_false(self) -> BooleanExpression: + return AlwaysFalse() + + def visit_not(self, child_result: BooleanExpression) -> BooleanExpression: + return Not(child=child_result) + + def visit_and(self, left_result: BooleanExpression, right_result: BooleanExpression) -> BooleanExpression: + return And(left=left_result, right=right_result) + + def visit_or(self, left_result: BooleanExpression, right_result: BooleanExpression) -> BooleanExpression: + return Or(left=left_result, right=right_result) + + def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> BooleanExpression: + raise TypeError(f"Expected Bound Predicate, got: {predicate.term}") + + def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> BooleanExpression: + file_column_name = self.file_schema.find_column_name(predicate.term.ref().field.field_id) + + if not file_column_name: + raise ValueError(f"Not found in schema: {file_column_name}") + + if isinstance(predicate, BoundUnaryPredicate): + return predicate.as_unbound(file_column_name) + elif isinstance(predicate, BoundLiteralPredicate): + return predicate.as_unbound(file_column_name, predicate.literal) + elif isinstance(predicate, BoundSetPredicate): + return predicate.as_unbound(file_column_name, predicate.literals) + else: + raise ValueError(f"Unknown predicate: {predicate}") Review Comment: Good call -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org