Xuanwo commented on code in PR #861: URL: https://github.com/apache/iceberg-rust/pull/861#discussion_r1900169821
########## crates/iceberg/src/metadata_scan.rs: ########## @@ -50,6 +52,13 @@ impl MetadataTable { } } + /// Get the manifests table. + pub fn manifests(&self) -> ManifestsTable { + ManifestsTable { + metadata_table: self, Review Comment: Hi, I think we can simply use `Table` here, which suggests that `MetadataTable` is merely a wrapper and doesn't implement any additional API. ########## crates/iceberg/src/metadata_scan.rs: ########## @@ -128,6 +137,135 @@ impl<'a> SnapshotsTable<'a> { } } +/// Manifests table. +pub struct ManifestsTable<'a> { + metadata_table: &'a MetadataTable, +} + +impl<'a> ManifestsTable<'a> { + fn partition_summary_fields(&self) -> Vec<Field> { + vec![ + Field::new("contains_null", DataType::Boolean, false), + Field::new("contains_nan", DataType::Boolean, true), + Field::new("lower_bound", DataType::Utf8, true), + Field::new("upper_bound", DataType::Utf8, true), + ] + } + + fn schema(&self) -> Schema { + Schema::new(vec![ + Field::new("content", DataType::Int8, false), + Field::new("path", DataType::Utf8, false), + Field::new("length", DataType::Int64, false), + Field::new("partition_spec_id", DataType::Int32, false), + Field::new("added_snapshot_id", DataType::Int64, false), + Field::new("added_data_files_count", DataType::Int32, false), + Field::new("existing_data_files_count", DataType::Int32, false), + Field::new("deleted_data_files_count", DataType::Int32, false), + Field::new("added_delete_files_count", DataType::Int32, false), + Field::new("existing_delete_files_count", DataType::Int32, false), + Field::new("deleted_delete_files_count", DataType::Int32, false), + Field::new( + "partition_summaries", + DataType::List(Arc::new(Field::new_struct( + "item", + self.partition_summary_fields(), + false, + ))), + false, + ), + ]) + } + + /// Scans the manifests table. + pub async fn scan(&self) -> Result<RecordBatch> { + let mut content = PrimitiveBuilder::<Int8Type>::new(); + let mut path = StringBuilder::new(); + let mut length = PrimitiveBuilder::<Int64Type>::new(); + let mut partition_spec_id = PrimitiveBuilder::<Int32Type>::new(); + let mut added_snapshot_id = PrimitiveBuilder::<Int64Type>::new(); + let mut added_data_files_count = PrimitiveBuilder::<Int32Type>::new(); + let mut existing_data_files_count = PrimitiveBuilder::<Int32Type>::new(); + let mut deleted_data_files_count = PrimitiveBuilder::<Int32Type>::new(); + let mut added_delete_files_count = PrimitiveBuilder::<Int32Type>::new(); + let mut existing_delete_files_count = PrimitiveBuilder::<Int32Type>::new(); + let mut deleted_delete_files_count = PrimitiveBuilder::<Int32Type>::new(); + let mut partition_summaries = ListBuilder::new(StructBuilder::from_fields( + Fields::from(self.partition_summary_fields()), + 0, + )) + .with_field(Arc::new(Field::new_struct( + "item", + self.partition_summary_fields(), + false, + ))); + + if let Some(snapshot) = self.metadata_table.metadata().current_snapshot() { + let manifest_list = snapshot + .load_manifest_list( + self.metadata_table.0.file_io(), + &self.metadata_table.0.metadata_ref(), + ) + .await?; + for manifest in manifest_list.entries() { + content.append_value(manifest.content.clone() as i8); Review Comment: It's a bit unusual to see something that can use `as u8` but still requires `clone`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org