liurenjie1024 commented on code in PR #62: URL: https://github.com/apache/iceberg-rust/pull/62#discussion_r1363528032
########## crates/iceberg/src/spec/table_metadata.rs: ########## @@ -93,22 +116,278 @@ pub struct TableMetadata { /// previous metadata file location should be added to the list. /// Tables can be configured to remove oldest metadata log entries and /// keep a fixed-size log of the most recent entries after a commit. + #[builder(default, setter(custom))] metadata_log: Vec<MetadataLog>, /// A list of sort orders, stored as full sort order objects. + #[builder(default, setter(custom))] sort_orders: HashMap<i64, SortOrder>, /// Default sort order id of the table. Note that this could be used by /// writers, but is not used when reading because reads use the specs /// stored in manifest files. + #[builder(default = "DEFAULT_SORT_ORDER_ID", setter(custom))] default_sort_order_id: i64, ///A map of snapshot references. The map keys are the unique snapshot reference /// names in the table, and the map values are snapshot reference objects. /// There is always a main branch reference pointing to the current-snapshot-id /// even if the refs map is null. + #[builder(default = "Self::default_ref()", setter(custom))] refs: HashMap<String, SnapshotReference>, } +// We define a from implementation from builder Error to Iceberg Error +impl From<UninitializedFieldError> for Error { + fn from(ufe: UninitializedFieldError) -> Error { + Error::new(ErrorKind::DataInvalid, ufe.to_string()) + } +} + +impl TableMetadataBuilder { + /// Get current time in ms + fn current_time_ms() -> i64 { + UNIX_EPOCH + .elapsed() + .unwrap() + .as_millis() + .try_into() + .unwrap() + } + + fn default_ref() -> HashMap<String, SnapshotReference> { + HashMap::from([( + "main".to_string(), + SnapshotReference { + snapshot_id: -1, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: None, + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }, + )]) + } + + /// Add or replace a snapshot_reference + /// branch : branch id of the snapshot + /// snapshot_ref : SnapshotReference to add or update + fn with_ref(&mut self, branch: String, snapshot_ref: SnapshotReference) -> &mut Self { + if branch == "main" { + self.current_snapshot_id = Some(Some(snapshot_ref.snapshot_id)); + if let Some(vec) = self.snapshot_log.as_mut() { + vec.push(SnapshotLog { + snapshot_id: snapshot_ref.snapshot_id, + timestamp_ms: self.last_updated_ms.unwrap_or(Self::current_time_ms()), + }) + } else { + self.snapshot_log = Some(vec![SnapshotLog { + snapshot_id: snapshot_ref.snapshot_id, + timestamp_ms: self.last_updated_ms.unwrap_or(Self::current_time_ms()), + }]) + } + } + if let Some(map) = self.refs.as_mut() { + map.insert(branch, snapshot_ref); + } else { + self.refs = Some(HashMap::from([(branch, snapshot_ref)])); + } + self + } + + /// Initialize a TableMetadata with a TableCreation struct + /// the Schema, sortOrder and PartitionSpec will be set as current + pub fn from_table_creation(&mut self, tc: TableCreation) -> &mut Self { + self.with_location(tc.location) + .with_properties(tc.properties) + .with_default_sort_order(tc.sort_order) + .with_current_schema(tc.schema); + + if let Some(partition_spec) = tc.partition_spec { + self.with_default_partition_spec(partition_spec); + } + self + } + + /// Add or replace a schema + /// schema : Schema to be added or replaced + pub fn with_schema(&mut self, schema: Schema) -> &mut Self { Review Comment: > the problematic with failing in the builder method is that we will not respect the builder pattern that should return "Self". Instead we will have a Result struct. Why builder pattern should return `Self` rather `Result`? In fact, I don't want to make a normal builder, since it's quite easy to construct an invalid table metadata. > I think it's better to reject schema creation if no id is set if we want to avoid this corner case. This doesn't work. Thinking about the transaction api, the user wants to change the schema of the table, why he needs to know the scheme id in advance which should be allocated by catalog? > For adding Schema the method should be on the TableMetadata struct itself as it may fails in case of duplicate id. I prefer to make `TableMetadata` immutable and leave the states for validity checking in `TableMetadataBuilder`. Please be aware that the main use case of `TableMetadataBuilder` is transaction api, so we should ensure that the result is valid. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org