kevinjqliu commented on code in PR #1615: URL: https://github.com/apache/iceberg-python/pull/1615#discussion_r1948161771
########## pyiceberg/manifest.py: ########## @@ -320,6 +320,34 @@ def data_file_with_partition(partition_type: StructType, format_version: TableVe ) +class PositionDelete(Record): Review Comment: 👍 same as https://github.com/apache/iceberg/blob/1d9fefeb9680d782dc128f242604903e71c32f97/core/src/main/java/org/apache/iceberg/deletes/PositionDelete.java#L28-L30 ########## pyiceberg/table/metadata.py: ########## @@ -279,6 +279,37 @@ def specs_struct(self) -> StructType: return StructType(*nested_fields) + def spec_struct(self, spec_id: Optional[int] = None) -> StructType: + """Produce for a spec_id a struct of PartitionSpecs. Review Comment: ```suggestion """Produce for a spec_id a struct of PartitionSpecs. ``` ########## pyiceberg/io/pyarrow.py: ########## @@ -889,6 +890,19 @@ def _construct_fragment(fs: FileSystem, data_file: DataFile, file_format_kwargs: return _get_file_format(data_file.file_format, **file_format_kwargs).make_fragment(path, fs) +def _read_delete_file(fs: FileSystem, data_file: DataFile) -> Iterator[PositionDelete]: Review Comment: this is very similar to `_read_deletes`, do you think there's a way we can reimplement `_read_deletes` to return `PositionDelete` and refactor its usage? ########## pyiceberg/table/inspect.py: ########## @@ -384,6 +384,26 @@ def _get_all_manifests_schema(self) -> "pa.Schema": all_manifests_schema = all_manifests_schema.append(pa.field("reference_snapshot_id", pa.int64(), nullable=False)) return all_manifests_schema + def _get_positional_deletes_schema(self) -> "pa.Schema": + import pyarrow as pa + + from pyiceberg.io.pyarrow import schema_to_pyarrow + + partition_struct = self.tbl.metadata.spec_struct() + pa_partition_struct = schema_to_pyarrow(partition_struct) + pa_row_struct = schema_to_pyarrow(self.tbl.schema().as_struct()) Review Comment: this is using the table's current metadata, perhaps we want to pass it these based on which snapshot we're using -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org