Fokko commented on code in PR #498: URL: https://github.com/apache/iceberg-python/pull/498#discussion_r1549588018
########## pyiceberg/table/__init__.py: ########## @@ -760,7 +852,10 @@ def update_table_metadata(base_metadata: TableMetadata, updates: Tuple[TableUpda for update in updates: new_metadata = _apply_table_update(update, new_metadata, context) - return new_metadata.model_copy(deep=True) + if enforce_validation: + return TableMetadataUtil.parse_obj(new_metadata.model_dump()) Review Comment: This is probably quite expensive, but I think it is worth paying. ########## mkdocs/docs/api.md: ########## @@ -165,6 +165,25 @@ catalog.create_table( ) ``` +To create a table with some subsequent changes in a transaction: Review Comment: ```suggestion To create a table with some subsequent changes atomically in transaction: ``` ########## pyiceberg/catalog/__init__.py: ########## @@ -288,6 +291,78 @@ def __init__(self, name: str, **properties: str): def _load_file_io(self, properties: Properties = EMPTY_DICT, location: Optional[str] = None) -> FileIO: return load_file_io({**self.properties, **properties}, location) + def _create_staged_table( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + location: Optional[str] = None, + partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC, + sort_order: SortOrder = UNSORTED_SORT_ORDER, + properties: Properties = EMPTY_DICT, + ) -> StagedTable: + """Create a table and return the table instance without committing the changes. + + Args: + identifier (str | Identifier): Table identifier. + schema (Schema): Table's schema. + location (str | None): Location for the table. Optional Argument. + partition_spec (PartitionSpec): PartitionSpec for the table. + sort_order (SortOrder): SortOrder for the table. + properties (Properties): Table properties that can be a string based dictionary. + + Returns: + Table: the created table instance. + + Raises: + TableAlreadyExistsError: If a table with the name already exists. + """ + schema: Schema = self._convert_schema_if_needed(schema) # type: ignore + + database_name, table_name = self.identifier_to_database_and_table(identifier) + + location = self._resolve_table_location(location, database_name, table_name) + metadata_location = self._get_metadata_location(location=location) + metadata = new_table_metadata( + location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties + ) + io = load_file_io(properties=self.properties, location=metadata_location) + return StagedTable( + identifier=(self.name, database_name, table_name), + metadata=metadata, + metadata_location=metadata_location, + io=io, + catalog=self, + ) + + def create_table_transaction( Review Comment: Thanks @syun64 for breaking the tie here, let me mark this conversation as resolved :) ########## pyiceberg/catalog/__init__.py: ########## @@ -717,6 +791,10 @@ def _get_updated_props_and_update_summary( return properties_update_summary, updated_properties + @staticmethod + def empty_table_metadata() -> TableMetadata: + return TableMetadataV1(location="", last_column_id=-1, schema=Schema()) Review Comment: Thanks for the detailed explanation, that makes sense to me 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org