Sl1mb0 commented on code in PR #1987:
URL: https://github.com/apache/iceberg-rust/pull/1987#discussion_r2662211763
##########
crates/iceberg/src/transaction/snapshot.rs:
##########
@@ -163,14 +166,129 @@ impl<'a> SnapshotProducer<'a> {
Ok(())
}
+ /// Validates added delete files.
+ ///
+ /// Checks that:
+ /// - Delete files are not used with format version 1
+ /// - Delete files have valid content types (PositionDeletes or
EqualityDeletes)
+ /// - Equality delete files have equality_ids set
+ /// - Delete files reference valid partition specs
+ /// - Partition values are compatible with partition types
+ pub(crate) fn validate_added_delete_files(&self) -> Result<()> {
+ let format_version = self.table.metadata().format_version();
+ if format_version == FormatVersion::V1 &&
!self.added_delete_files.is_empty() {
+ return Err(Error::new(
+ ErrorKind::FeatureUnsupported,
+ "Delete files are not supported in format version 1. Upgrade
the table to format version 2 or later.",
+ ));
+ }
+
+ for delete_file in &self.added_delete_files {
+ match delete_file.content_type() {
+ DataContentType::PositionDeletes => {
+ if delete_file.equality_ids().is_some() {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Position delete files must not have equality_ids
set",
+ ));
+ }
+ }
+ DataContentType::EqualityDeletes => {
+ let ids = delete_file.equality_ids().ok_or_else(|| {
+ Error::new(
+ ErrorKind::DataInvalid,
+ "Equality delete files must have equality_ids set",
+ )
+ })?;
+ if ids.is_empty() {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Equality delete files must have equality_ids set",
+ ));
+ }
+ }
+ DataContentType::Data => {
+ return Err(Error::new(
+ ErrorKind::DataInvalid,
+ "Data content type is not allowed for delete files",
+ ));
+ }
+ }
+
+ // TODO: This validation is too strict for partition evolution
scenarios where delete
+ // files may reference older partition specs. Once
manifest-per-spec is implemented,
+ // relax this to check that the spec_id exists rather than
matching the default.
+ if self.table.metadata().default_partition_spec_id() !=
delete_file.partition_spec_id {
Review Comment:
Should a ticket be filed for this `TODO`? Not sure if one already exists.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]