rdblue commented on code in PR #6437:
URL: https://github.com/apache/iceberg/pull/6437#discussion_r1050284725


##########
python/pyiceberg/expressions/visitors.py:
##########
@@ -753,3 +757,68 @@ def inclusive_projection(
     schema: Schema, spec: PartitionSpec, case_sensitive: bool = True
 ) -> Callable[[BooleanExpression], BooleanExpression]:
     return InclusiveProjection(schema, spec, case_sensitive).project
+
+
+class _ExpressionProjector(BooleanExpressionVisitor[BooleanExpression]):
+    """Rewrites a boolean expression by replacing unbound references with 
references to fields in a struct schema
+
+    Args:
+      table_schema (Schema): The schema of the table
+      file_schema (Schema): The schema of the file
+      case_sensitive (bool): Whether to consider case when binding a reference 
to a field in a schema, defaults to True
+
+    Raises:
+        TypeError: In the case a predicate is already bound
+    """
+
+    table_schema: Schema
+    file_schema: Schema
+    case_sensitive: bool
+
+    def __init__(self, table_schema: Schema, file_schema: Schema, 
case_sensitive: bool) -> None:
+        self.table_schema = table_schema
+        self.file_schema = file_schema
+        self.case_sensitive = case_sensitive
+
+    def visit_true(self) -> BooleanExpression:
+        return AlwaysTrue()
+
+    def visit_false(self) -> BooleanExpression:
+        return AlwaysFalse()
+
+    def visit_not(self, child_result: BooleanExpression) -> BooleanExpression:
+        return Not(child=child_result)
+
+    def visit_and(self, left_result: BooleanExpression, right_result: 
BooleanExpression) -> BooleanExpression:
+        return And(left=left_result, right=right_result)
+
+    def visit_or(self, left_result: BooleanExpression, right_result: 
BooleanExpression) -> BooleanExpression:
+        return Or(left=left_result, right=right_result)
+
+    def visit_unbound_predicate(self, predicate: UnboundPredicate[L]) -> 
BooleanExpression:
+        if not isinstance(predicate.term, Reference):
+            raise ValueError(f"Exprected reference: {predicate.term}")
+
+        field = self.table_schema.find_field(predicate.term.name, 
case_sensitive=self.case_sensitive)
+        file_column_name = self.file_schema.find_column_name(field.field_id)
+
+        if not file_column_name:
+            raise ValueError(f"Not found in schema: {file_column_name}")
+
+        if isinstance(predicate, UnaryPredicate):
+            return predicate.__class__(Reference(file_column_name))

Review Comment:
   Oh, nevermind. I see that the input expression is unbound. I'm not sure 
whether it would be better to do this to a bound expression or not. That seems 
like good separation of responsibilities and a good way to have consistent 
error messages when an expression isn't valid for a particular schema.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to