viirya commented on code in PR #243: URL: https://github.com/apache/iceberg-rust/pull/243#discussion_r1518675699
########## crates/iceberg/src/scan.rs: ########## @@ -143,37 +144,43 @@ pub type FileScanTaskStream = BoxStream<'static, crate::Result<FileScanTask>>; impl TableScan { /// Returns a stream of file scan tasks. pub async fn plan_files(&self) -> crate::Result<FileScanTaskStream> { - let manifest_list = self - .snapshot - .load_manifest_list(&self.file_io, &self.table_metadata) + let snapshot = self.snapshot.clone(); + let table_metadata = self.table_metadata.clone(); + let file_io = self.file_io.clone(); + + Ok(try_stream! { + let manifest_list = snapshot + .clone() + .load_manifest_list(&file_io, &table_metadata) .await?; - // Generate data file stream - let mut file_scan_tasks = Vec::with_capacity(manifest_list.entries().len()); - for manifest_list_entry in manifest_list.entries().iter() { - // Data file - let manifest = manifest_list_entry.load_manifest(&self.file_io).await?; - - for manifest_entry in manifest.entries().iter().filter(|e| e.is_alive()) { - match manifest_entry.content_type() { - DataContentType::EqualityDeletes | DataContentType::PositionDeletes => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Delete files are not supported yet.", - )); - } Review Comment: Besides, as the return type is `FileScanTaskStream` which is a stream of `crate::Result<FileScanTask>`, I feel that it is more proper to produce an `Err` for the entry into the stream, instead of returning an `Err` for the whole stream. ########## crates/iceberg/src/scan.rs: ########## @@ -143,37 +144,43 @@ pub type FileScanTaskStream = BoxStream<'static, crate::Result<FileScanTask>>; impl TableScan { /// Returns a stream of file scan tasks. pub async fn plan_files(&self) -> crate::Result<FileScanTaskStream> { - let manifest_list = self - .snapshot - .load_manifest_list(&self.file_io, &self.table_metadata) + let snapshot = self.snapshot.clone(); + let table_metadata = self.table_metadata.clone(); + let file_io = self.file_io.clone(); + + Ok(try_stream! { + let manifest_list = snapshot + .clone() + .load_manifest_list(&file_io, &table_metadata) .await?; - // Generate data file stream - let mut file_scan_tasks = Vec::with_capacity(manifest_list.entries().len()); - for manifest_list_entry in manifest_list.entries().iter() { - // Data file - let manifest = manifest_list_entry.load_manifest(&self.file_io).await?; - - for manifest_entry in manifest.entries().iter().filter(|e| e.is_alive()) { - match manifest_entry.content_type() { - DataContentType::EqualityDeletes | DataContentType::PositionDeletes => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - "Delete files are not supported yet.", - )); - } Review Comment: Besides, as the return type is `FileScanTaskStream` which is a stream of `crate::Result<FileScanTask>` for each entry, I feel that it is more proper to produce an `Err` for the entry into the stream, instead of returning an `Err` for the whole stream. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org