pdames commented on code in PR #140:
URL: https://github.com/apache/iceberg-python/pull/140#discussion_r1436639053
##########
pyiceberg/catalog/__init__.py:
##########
@@ -587,8 +590,34 @@ def _write_metadata(metadata: TableMetadata, io: FileIO,
metadata_path: str) ->
ToOutputFile.table_metadata(metadata, io.new_output(metadata_path))
@staticmethod
- def _get_metadata_location(location: str) -> str:
- return f"{location}/metadata/00000-{uuid.uuid4()}.metadata.json"
+ def _get_metadata_location(location: str, new_version: int = 0) -> str:
+ if new_version < 0:
+ raise ValueError(f"Table metadata version: {new_version} cannot be
negative")
+ version_str = f"{new_version:05d}"
+ return
f"{location}/metadata/{version_str}-{uuid.uuid4()}.metadata.json"
+
+ @staticmethod
+ def _parse_metadata_version(metadata_location: str) -> int:
+ """Parse the version from the metadata location.
+
+ The version is the first part of the file name, before the first dash.
+ For example, the version of the metadata file
+
`s3://bucket/db/tb/metadata/00001-6c97e413-d51b-4538-ac70-12fe2a85cb83.metadata.json`
+ is 1.
+ If the path does not comply with the pattern, the version is defaulted
to be -1, ensuring
+ that the next metadata file is treated as having version 0.
+
+ Args:
+ metadata_location (str): The location of the metadata file.
+
+ Returns:
+ int: The version of the metadata file. -1 if the file name does
not have valid version string
+ """
+ file_name = metadata_location.split("/")[-1]
+ if file_name_match :=
TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name):
+ return int(file_name_match.group(1))
+ else:
+ return -1
Review Comment:
If we take the above change to add a capturing group around the UUID, we can
assert its validity here with something like:
```suggestion
file_name = metadata_location.split("/")[-1] if metadata_location
else ""
if file_name_match :=
TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name):
try:
UUID(file_name_match.group(2))
except ValueError:
return -1
return int(file_name_match.group(1))
return -1
```
(Again, assuming this type of stricter assertion against table metadata file
names is actually desirable - not sure if @Fokko or @jackye1995 have any
opinions or additional context here).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]