ZENOTME commented on code in PR #241:
URL: https://github.com/apache/iceberg-rust/pull/241#discussion_r1528775973


##########
crates/iceberg/src/scan.rs:
##########
@@ -214,11 +273,236 @@ impl FileScanTask {
     }
 }
 
+/// Evaluates manifest files to see if their partition values comply with a 
filter predicate
+pub struct PartitionEvaluator {
+    manifest_eval_visitor: ManifestEvalVisitor,
+}
+
+impl PartitionEvaluator {
+    pub(crate) fn new(
+        partition_spec: PartitionSpecRef,
+        partition_filter: BoundPredicate,
+        table_schema: SchemaRef,
+    ) -> crate::Result<Self> {
+        let manifest_eval_visitor = ManifestEvalVisitor::manifest_evaluator(
+            partition_spec,
+            table_schema,
+            partition_filter,
+            true,
+        )?;
+
+        Ok(PartitionEvaluator {
+            manifest_eval_visitor,
+        })
+    }
+
+    pub(crate) fn filter_manifest_file(&self, _manifest_file: &ManifestFile) 
-> bool {
+        self.manifest_eval_visitor.eval(_manifest_file)
+    }
+}
+
+struct ManifestEvalVisitor {
+    partition_schema: SchemaRef,
+    partition_filter: BoundPredicate,
+    case_sensitive: bool,
+}
+
+impl ManifestEvalVisitor {
+    fn new(
+        partition_schema: SchemaRef,
+        partition_filter: Predicate,
+        case_sensitive: bool,
+    ) -> crate::Result<Self> {
+        let partition_filter = partition_filter.bind(partition_schema.clone(), 
case_sensitive)?;
+
+        Ok(Self {
+            partition_schema,
+            partition_filter,
+            case_sensitive,
+        })
+    }
+
+    pub(crate) fn manifest_evaluator(
+        partition_spec: PartitionSpecRef,
+        table_schema: SchemaRef,
+        partition_filter: BoundPredicate,
+        case_sensitive: bool,
+    ) -> crate::Result<Self> {
+        let partition_type = partition_spec.partition_type(&table_schema)?;
+
+        // this is needed as SchemaBuilder.with_fields expects an iterator over
+        // Arc<NestedField> rather than &Arc<NestedField>
+        let cloned_partition_fields: Vec<_> = 
partition_type.fields().iter().map(Arc::clone).collect();
+
+        let partition_schema = Schema::builder()
+            .with_fields(cloned_partition_fields)
+            .build()?;
+
+        let partition_schema_ref = Arc::new(partition_schema);
+
+        let inclusive_projection =
+            InclusiveProjection::new(table_schema.clone(), 
partition_spec.clone());
+        let unbound_partition_filter = 
inclusive_projection.project(&partition_filter);
+
+        Ok(Self::new(
+            partition_schema_ref.clone(),
+            unbound_partition_filter,
+            case_sensitive,
+        )?)
+    }
+
+    pub(crate) fn eval(&self, manifest_file: &ManifestFile) -> bool {
+        if manifest_file.partitions.is_empty() {
+            return true;
+        }
+
+        self.visit(&self.partition_filter, &manifest_file.partitions)
+    }
+
+    // see 
https://github.com/apache/iceberg-python/blob/ea9da8856a686eaeda0d5c2be78d5e3102b67c44/pyiceberg/expressions/visitors.py#L548
+    fn visit(&self, predicate: &BoundPredicate, partitions: 
&Vec<FieldSummary>) -> bool {
+        match predicate {
+            AlwaysTrue => true,
+            BoundPredicate::AlwaysFalse => false,
+            BoundPredicate::And(expr) => {
+                self.visit(expr.inputs()[0], partitions) && 
self.visit(expr.inputs()[1], partitions)

Review Comment:
   Maybe we can have something like `fn eval(&self, Vec<Literal>)->bool` for 
the `Predicate`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to