jshmchenxi commented on code in PR #960: URL: https://github.com/apache/iceberg-rust/pull/960#discussion_r1976556165
########## crates/iceberg/src/transaction.rs: ########## @@ -205,8 +208,86 @@ impl<'a> FastAppendAction<'a> { Ok(self) } + /// Adds existing parquet files + pub async fn add_parquet_files(mut self, file_path: Vec<String>) -> Result<Transaction<'a>> { + if !self + .snapshot_produce_action + .tx + .table + .metadata() + .default_spec + .is_unpartitioned() + { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Appending to partitioned tables is not supported", + )); + } + + let table_metadata = self.snapshot_produce_action.tx.table.metadata(); + + let data_files = ParquetWriter::parquet_files_to_data_files( + self.snapshot_produce_action.tx.table.file_io(), + file_path, + table_metadata, + ) + .await?; + + self.add_data_files(data_files)?; + + self.apply().await + } + /// Finished building the action and apply it to the transaction. pub async fn apply(self) -> Result<Transaction<'a>> { + // Checks duplicate files Review Comment: Given the potential performance impact of iterating manifests, should we add a flag `check_duplicate_files` to control the behaviour, similar to Spark [AddFilesProcedure](https://github.com/apache/iceberg/blob/main/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/procedures/AddFilesProcedure.java)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org