kevinjqliu commented on code in PR #569: URL: https://github.com/apache/iceberg-python/pull/569#discussion_r1581278513
########## pyiceberg/table/__init__.py: ########## @@ -434,6 +456,9 @@ def overwrite( if table_arrow_schema != df.schema: df = df.cast(table_arrow_schema) + with self.update_snapshot(snapshot_properties=snapshot_properties).delete() as delete_snapshot: + delete_snapshot.delete_by_predicate(overwrite_filter) + with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as update_snapshot: # skip writing data files if the dataframe is empty Review Comment: does the order of operation matter here? Should update be first or delete? ########## tests/integration/test_writes/test_writes.py: ########## @@ -119,52 +119,55 @@ def test_query_count(spark: SparkSession, format_version: int) -> None: @pytest.mark.integration -@pytest.mark.parametrize("col", TEST_DATA_WITH_NULL.keys()) @pytest.mark.parametrize("format_version", [1, 2]) -def test_query_filter_null(spark: SparkSession, col: str, format_version: int) -> None: +def test_query_filter_null(spark: SparkSession, arrow_table_with_null: pa.Table, format_version: int) -> None: Review Comment: great! this should cut down the number of parameterized tests ########## pyiceberg/table/__init__.py: ########## @@ -292,7 +303,13 @@ def _apply(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequ requirement.validate(self.table_metadata) self._updates += updates - self._requirements += requirements + + # For the requirements, it does not make sense to add a requirement more than once + # For example, you cannot assert that the current schema has two different IDs + existing_requirements = {type(requirement) for requirement in self._requirements} + for new_requirement in requirements: + if type(new_requirement) not in existing_requirements: + self._requirements = self._requirements + requirements Review Comment: nit: should we add a warning about dropped requirements here? Alternatively, instead of dropping requirements, it might make sense to throw if there are multiple requirements of the same type. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org