kevinjqliu commented on code in PR #977:
URL: https://github.com/apache/iceberg-python/pull/977#discussion_r1700690161


##########
pyiceberg/table/__init__.py:
##########
@@ -1189,15 +1197,48 @@ def update_table_metadata(
         new_metadata = _apply_table_update(update, new_metadata, context)
 
     # Update last_updated_ms if it was not updated by update operations
-    if context.has_changes() and base_metadata.last_updated_ms == 
new_metadata.last_updated_ms:
-        new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
+    if context.has_changes():
+        if metadata_location:
+            new_metadata = _update_table_metadata_log(new_metadata, 
metadata_location, base_metadata.last_updated_ms)
+        if base_metadata.last_updated_ms == new_metadata.last_updated_ms:
+            new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
 
     if enforce_validation:
         return TableMetadataUtil.parse_obj(new_metadata.model_dump())
     else:
         return new_metadata.model_copy(deep=True)
 
 
+def _update_table_metadata_log(base_metadata: TableMetadata, 
metadata_location: str, last_updated_ms: int) -> TableMetadata:
+    """
+    Update the metadata log of the table.
+
+    Args:
+        base_metadata: The base metadata to be updated.
+        metadata_location: Current metadata location of the table
+        last_updated_ms: The timestamp of the last update of table metadata
+
+    Returns:
+        The metadata with the updates applied to metadata-log.
+    """
+    max_metadata_log_entries = max(
+        1,
+        PropertyUtil.property_as_int(
+            base_metadata.properties,
+            TableProperties.METADATA_PREVIOUS_VERSIONS_MAX,
+            TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT,
+        ),  # type: ignore
+    )
+    previous_metadata_log = base_metadata.metadata_log
+    if len(base_metadata.metadata_log) >= max_metadata_log_entries:  # type: 
ignore
+        remove_index = len(base_metadata.metadata_log) - 
max_metadata_log_entries + 1  # type: ignore
+        previous_metadata_log = base_metadata.metadata_log[remove_index:]
+    metadata_updates: Dict[str, Any] = {
+        "metadata_log": previous_metadata_log + 
[MetadataLogEntry(metadata_file=metadata_location, 
timestamp_ms=last_updated_ms)]

Review Comment:
   is there a specify order? append to front vs append to back



##########
pyiceberg/table/__init__.py:
##########
@@ -1189,15 +1197,48 @@ def update_table_metadata(
         new_metadata = _apply_table_update(update, new_metadata, context)
 
     # Update last_updated_ms if it was not updated by update operations
-    if context.has_changes() and base_metadata.last_updated_ms == 
new_metadata.last_updated_ms:
-        new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
+    if context.has_changes():
+        if metadata_location:
+            new_metadata = _update_table_metadata_log(new_metadata, 
metadata_location, base_metadata.last_updated_ms)
+        if base_metadata.last_updated_ms == new_metadata.last_updated_ms:
+            new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
 
     if enforce_validation:
         return TableMetadataUtil.parse_obj(new_metadata.model_dump())
     else:
         return new_metadata.model_copy(deep=True)
 
 
+def _update_table_metadata_log(base_metadata: TableMetadata, 
metadata_location: str, last_updated_ms: int) -> TableMetadata:
+    """
+    Update the metadata log of the table.
+
+    Args:
+        base_metadata: The base metadata to be updated.
+        metadata_location: Current metadata location of the table
+        last_updated_ms: The timestamp of the last update of table metadata
+
+    Returns:
+        The metadata with the updates applied to metadata-log.
+    """
+    max_metadata_log_entries = max(
+        1,

Review Comment:
   whats the reasoning behind the `max` here? Can we just use the default value?



##########
pyiceberg/table/__init__.py:
##########
@@ -1170,14 +1174,18 @@ def _(
 
 
 def update_table_metadata(
-    base_metadata: TableMetadata, updates: Tuple[TableUpdate, ...], 
enforce_validation: bool = False
+    base_metadata: TableMetadata,
+    updates: Tuple[TableUpdate, ...],
+    enforce_validation: bool = False,
+    metadata_location: Optional[str] = None,

Review Comment:
   Is `base_metadata.location` the same as `metadata_location`? 



##########
pyiceberg/table/__init__.py:
##########
@@ -1189,15 +1197,48 @@ def update_table_metadata(
         new_metadata = _apply_table_update(update, new_metadata, context)
 
     # Update last_updated_ms if it was not updated by update operations
-    if context.has_changes() and base_metadata.last_updated_ms == 
new_metadata.last_updated_ms:
-        new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
+    if context.has_changes():
+        if metadata_location:
+            new_metadata = _update_table_metadata_log(new_metadata, 
metadata_location, base_metadata.last_updated_ms)
+        if base_metadata.last_updated_ms == new_metadata.last_updated_ms:
+            new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
 
     if enforce_validation:
         return TableMetadataUtil.parse_obj(new_metadata.model_dump())
     else:
         return new_metadata.model_copy(deep=True)
 
 
+def _update_table_metadata_log(base_metadata: TableMetadata, 
metadata_location: str, last_updated_ms: int) -> TableMetadata:
+    """
+    Update the metadata log of the table.
+
+    Args:
+        base_metadata: The base metadata to be updated.
+        metadata_location: Current metadata location of the table
+        last_updated_ms: The timestamp of the last update of table metadata
+
+    Returns:
+        The metadata with the updates applied to metadata-log.
+    """
+    max_metadata_log_entries = max(
+        1,
+        PropertyUtil.property_as_int(
+            base_metadata.properties,
+            TableProperties.METADATA_PREVIOUS_VERSIONS_MAX,
+            TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT,
+        ),  # type: ignore
+    )
+    previous_metadata_log = base_metadata.metadata_log
+    if len(base_metadata.metadata_log) >= max_metadata_log_entries:  # type: 
ignore
+        remove_index = len(base_metadata.metadata_log) - 
max_metadata_log_entries + 1  # type: ignore
+        previous_metadata_log = base_metadata.metadata_log[remove_index:]
+    metadata_updates: Dict[str, Any] = {

Review Comment:
   maybe append first then truncate the log to preserve max size



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to