kevinjqliu commented on code in PR #392: URL: https://github.com/apache/iceberg-python/pull/392#discussion_r1482123800
########## tests/integration/test_rest_schema.py: ########## @@ -2497,3 +2500,32 @@ def test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None: assert "Updates in a single commit need to be unique, duplicate: <class 'pyiceberg.table.AddSchemaUpdate'>" in str( exc_info.value ) + + +@pytest.mark.integration +def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) -> None: + schema = Schema( + NestedField(field_id=5, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=4, name="col_fixed", field_type=FixedType(25), required=False), + ) + partition_spec = PartitionSpec( + PartitionField(source_id=5, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + sort_order = SortOrder(SortField(source_id=4, transform=IdentityTransform())) + tbl_name = "default.test_create_integrity" + try: + catalog.drop_table(tbl_name) + except NoSuchTableError: + pass + tbl = catalog.create_table(identifier=tbl_name, schema=schema, partition_spec=partition_spec, sort_order=sort_order) + expected_schema = Schema( + NestedField(field_id=1, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=2, name="col_fixed", field_type=FixedType(25), required=False), + ) + expected_spec = PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + expected_sort_order = SortOrder(SortField(source_id=2, transform=IdentityTransform())) + assert tbl.schema() == expected_schema + assert tbl.spec() == expected_spec + assert tbl.sort_order() == expected_sort_order Review Comment: `SortOrder` doesn't seem to have a `__eq__` function defined. https://github.com/apache/iceberg-python/blob/cec051f230edfb584f1267e505ee218305389c11/pyiceberg/table/sorting.py#L127-L164 What is the behavior here? ########## tests/integration/test_rest_schema.py: ########## @@ -2497,3 +2500,32 @@ def test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None: assert "Updates in a single commit need to be unique, duplicate: <class 'pyiceberg.table.AddSchemaUpdate'>" in str( exc_info.value ) + + +@pytest.mark.integration +def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) -> None: + schema = Schema( + NestedField(field_id=5, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=4, name="col_fixed", field_type=FixedType(25), required=False), + ) + partition_spec = PartitionSpec( + PartitionField(source_id=5, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + sort_order = SortOrder(SortField(source_id=4, transform=IdentityTransform())) + tbl_name = "default.test_create_integrity" + try: + catalog.drop_table(tbl_name) + except NoSuchTableError: + pass + tbl = catalog.create_table(identifier=tbl_name, schema=schema, partition_spec=partition_spec, sort_order=sort_order) + expected_schema = Schema( + NestedField(field_id=1, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=2, name="col_fixed", field_type=FixedType(25), required=False), + ) + expected_spec = PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + expected_sort_order = SortOrder(SortField(source_id=2, transform=IdentityTransform())) + assert tbl.schema() == expected_schema Review Comment: nit, should we also check `tbl.schema().schema_id` since `Schema`'s `__eq__` doesn't check for that https://github.com/apache/iceberg-python/blob/cec051f230edfb584f1267e505ee218305389c11/pyiceberg/schema.py#L104-L118 ########## tests/integration/test_rest_schema.py: ########## @@ -2497,3 +2500,32 @@ def test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None: assert "Updates in a single commit need to be unique, duplicate: <class 'pyiceberg.table.AddSchemaUpdate'>" in str( exc_info.value ) + + +@pytest.mark.integration +def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) -> None: + schema = Schema( + NestedField(field_id=5, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=4, name="col_fixed", field_type=FixedType(25), required=False), + ) + partition_spec = PartitionSpec( + PartitionField(source_id=5, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + sort_order = SortOrder(SortField(source_id=4, transform=IdentityTransform())) + tbl_name = "default.test_create_integrity" + try: + catalog.drop_table(tbl_name) + except NoSuchTableError: + pass + tbl = catalog.create_table(identifier=tbl_name, schema=schema, partition_spec=partition_spec, sort_order=sort_order) + expected_schema = Schema( + NestedField(field_id=1, name="col_uuid", field_type=UUIDType(), required=False), + NestedField(field_id=2, name="col_fixed", field_type=FixedType(25), required=False), + ) + expected_spec = PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="col_uuid"), spec_id=0 + ) + expected_sort_order = SortOrder(SortField(source_id=2, transform=IdentityTransform())) + assert tbl.schema() == expected_schema + assert tbl.spec() == expected_spec Review Comment: `PartitionSpec`'s `__eq__` checks for the `spec_id` https://github.com/apache/iceberg-python/blob/cec051f230edfb584f1267e505ee218305389c11/pyiceberg/partitioning.py#L110-L119 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org