rdblue commented on code in PR #6437:
URL: https://github.com/apache/iceberg/pull/6437#discussion_r1056510924


##########
python/pyiceberg/expressions/visitors.py:
##########
@@ -753,3 +756,89 @@ def inclusive_projection(
     schema: Schema, spec: PartitionSpec, case_sensitive: bool = True
 ) -> Callable[[BooleanExpression], BooleanExpression]:
     return InclusiveProjection(schema, spec, case_sensitive).project
+
+
+class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]):
+    """Converts the column names with the ones in the actual file
+
+    Args:
+      file_schema (Schema): The schema of the file
+      case_sensitive (bool): Whether to consider case when binding a reference 
to a field in a schema, defaults to True
+
+    Raises:
+        TypeError: In the case of an UnboundPredicate
+        ValueError: When a column name cannot be found
+    """
+
+    file_schema: Schema
+    case_sensitive: bool
+
+    def __init__(self, file_schema: Schema, case_sensitive: bool) -> None:
+        self.file_schema = file_schema
+        self.case_sensitive = case_sensitive
+
+    def visit_true(self) -> BooleanExpression:
+        return AlwaysTrue()
+
+    def visit_false(self) -> BooleanExpression:
+        return AlwaysFalse()
+
+    def visit_not(self, child_result: BooleanExpression) -> BooleanExpression:
+        return Not(child=child_result)
+
+    def visit_and(self, left_result: BooleanExpression, right_result: 
BooleanExpression) -> BooleanExpression:
+        return And(left=left_result, right=right_result)
+
+    def visit_or(self, left_result: BooleanExpression, right_result: 
BooleanExpression) -> BooleanExpression:
+        return Or(left=left_result, right=right_result)
+
+    def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> 
BooleanExpression:
+        raise TypeError(f"Expected Bound Predicate, got: {predicate.term}")
+
+    def visit_bound_predicate(self, predicate: BoundPredicate[L]) -> 
BooleanExpression:
+        file_column_name = 
self.file_schema.find_column_name(predicate.term.ref().field.field_id)
+
+        if not file_column_name:
+            raise ValueError(f"Not found in schema: {file_column_name}")
+
+        if isinstance(predicate, BoundUnaryPredicate):
+            return predicate.as_unbound(file_column_name)
+        elif isinstance(predicate, BoundLiteralPredicate):
+            return predicate.as_unbound(file_column_name, predicate.literal)
+        elif isinstance(predicate, BoundSetPredicate):
+            return predicate.as_unbound(file_column_name, predicate.literals)
+        else:
+            raise ValueError(f"Unknown predicate: {predicate}")
+
+
+def translate_column_names(expr: BooleanExpression, file_schema: Schema, 
case_sensitive: bool) -> BooleanExpression:
+    return visit(expr, _ColumnNameTranslator(file_schema, case_sensitive))
+
+
+class ExpressionFieldIDs(BooleanExpressionVisitor[Set[int]]):

Review Comment:
   In some cases, we prefix the visitor classes with `_`, but I think it's 
really easy to forget to do it so we're inconsistent. Is there a good way to 
signal that people shouldn't rely on the API or behavior of these classes? I'm 
also fine with just not worrying about it and using normal names.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to