Re: [PR] feat: Convert predicate to arrow filter and push down to parquet reader [iceberg-rust]

via GitHub Sat, 27 Apr 2024 18:46:59 -0700


viirya commented on code in PR #295:
URL: https://github.com/apache/iceberg-rust/pull/295#discussion_r1581994781



##########
crates/iceberg/src/arrow/reader.rs:
##########
@@ -186,4 +216,399 @@ impl ArrowReader {
             Ok(ProjectionMask::leaves(parquet_schema, indices))
         }
     }
+
+    fn get_row_filter(&self, parquet_schema: &SchemaDescriptor) -> 
Result<Option<RowFilter>> {
+        if let Some(predicates) = &self.predicates {
+            let field_id_map = self.build_field_id_map(parquet_schema)?;
+
+            // Collect Parquet column indices from field ids
+            let mut collector = CollectFieldIdVisitor { field_ids: vec![] };
+            visit_predicate(&mut collector, predicates).unwrap();
+            let column_indices = collector
+                .field_ids
+                .iter()
+                .map(|field_id| {
+                    field_id_map.get(field_id).cloned().ok_or_else(|| {
+                        Error::new(ErrorKind::DataInvalid, "Field id not found 
in schema")
+                    })
+                })
+                .collect::<Result<Vec<_>>>()?;
+
+            // Convert BoundPredicates to ArrowPredicates
+            let mut converter = PredicateConverter {
+                columns: &column_indices,
+                projection_mask: ProjectionMask::leaves(parquet_schema, 
column_indices.clone()),
+                parquet_schema,
+                column_map: &field_id_map,
+            };
+            let arrow_predicate = visit_predicate(&mut converter, predicates)?;
+            Ok(Some(RowFilter::new(vec![arrow_predicate])))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Build the map of field id to Parquet column index in the schema.
+    fn build_field_id_map(&self, parquet_schema: &SchemaDescriptor) -> 
Result<HashMap<i32, usize>> {

Review Comment:
   Yea, removed it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat: Convert predicate to arrow filter and push down to parquet reader [iceberg-rust]

Reply via email to