ZENOTME commented on code in PR #241: URL: https://github.com/apache/iceberg-rust/pull/241#discussion_r1528775973
########## crates/iceberg/src/scan.rs: ########## @@ -214,11 +273,236 @@ impl FileScanTask { } } +/// Evaluates manifest files to see if their partition values comply with a filter predicate +pub struct PartitionEvaluator { + manifest_eval_visitor: ManifestEvalVisitor, +} + +impl PartitionEvaluator { + pub(crate) fn new( + partition_spec: PartitionSpecRef, + partition_filter: BoundPredicate, + table_schema: SchemaRef, + ) -> crate::Result<Self> { + let manifest_eval_visitor = ManifestEvalVisitor::manifest_evaluator( + partition_spec, + table_schema, + partition_filter, + true, + )?; + + Ok(PartitionEvaluator { + manifest_eval_visitor, + }) + } + + pub(crate) fn filter_manifest_file(&self, _manifest_file: &ManifestFile) -> bool { + self.manifest_eval_visitor.eval(_manifest_file) + } +} + +struct ManifestEvalVisitor { + partition_schema: SchemaRef, + partition_filter: BoundPredicate, + case_sensitive: bool, +} + +impl ManifestEvalVisitor { + fn new( + partition_schema: SchemaRef, + partition_filter: Predicate, + case_sensitive: bool, + ) -> crate::Result<Self> { + let partition_filter = partition_filter.bind(partition_schema.clone(), case_sensitive)?; + + Ok(Self { + partition_schema, + partition_filter, + case_sensitive, + }) + } + + pub(crate) fn manifest_evaluator( + partition_spec: PartitionSpecRef, + table_schema: SchemaRef, + partition_filter: BoundPredicate, + case_sensitive: bool, + ) -> crate::Result<Self> { + let partition_type = partition_spec.partition_type(&table_schema)?; + + // this is needed as SchemaBuilder.with_fields expects an iterator over + // Arc<NestedField> rather than &Arc<NestedField> + let cloned_partition_fields: Vec<_> = partition_type.fields().iter().map(Arc::clone).collect(); + + let partition_schema = Schema::builder() + .with_fields(cloned_partition_fields) + .build()?; + + let partition_schema_ref = Arc::new(partition_schema); + + let inclusive_projection = + InclusiveProjection::new(table_schema.clone(), partition_spec.clone()); + let unbound_partition_filter = inclusive_projection.project(&partition_filter); + + Ok(Self::new( + partition_schema_ref.clone(), + unbound_partition_filter, + case_sensitive, + )?) + } + + pub(crate) fn eval(&self, manifest_file: &ManifestFile) -> bool { + if manifest_file.partitions.is_empty() { + return true; + } + + self.visit(&self.partition_filter, &manifest_file.partitions) + } + + // see https://github.com/apache/iceberg-python/blob/ea9da8856a686eaeda0d5c2be78d5e3102b67c44/pyiceberg/expressions/visitors.py#L548 + fn visit(&self, predicate: &BoundPredicate, partitions: &Vec<FieldSummary>) -> bool { + match predicate { + AlwaysTrue => true, + BoundPredicate::AlwaysFalse => false, + BoundPredicate::And(expr) => { + self.visit(expr.inputs()[0], partitions) && self.visit(expr.inputs()[1], partitions) Review Comment: Maybe we can have something like `fn eval(&self, Vec<Literal>)->bool` for the `Predicate`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org