kevinjqliu commented on code in PR #1533: URL: https://github.com/apache/iceberg-python/pull/1533#discussion_r1920533638
########## pyiceberg/table/inspect.py: ########## @@ -637,17 +778,48 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: ) def files(self, snapshot_id: Optional[int] = None) -> "pa.Table": + """ + Retrieves a table of files from the current snapshot or the specified snapshot ID. + + Args: + snapshot_id (Optional[int]): The snapshot ID to retrieve files for. + + Returns: + pa.Table: The table of files. + """ return self._files(snapshot_id) def data_files(self, snapshot_id: Optional[int] = None) -> "pa.Table": + """ + Retrieves a table of data files from the current snapshot or specified snapshot ID. + + Args: + snapshot_id (Optional[int]): The snapshot ID to filter the data files. + + Returns: + pa.Table: The table of data files. + """ return self._files(snapshot_id, {DataFileContent.DATA}) def delete_files(self, snapshot_id: Optional[int] = None) -> "pa.Table": + """ + Retrieves a table of delete files from the current snapshot or specified snapshot ID. + + Args: + snapshot_id (Optional[int]): The snapshot ID to filter the delete files. + + Returns: + pa.Table: The table of delete files. + """ return self._files(snapshot_id, {DataFileContent.POSITION_DELETES, DataFileContent.EQUALITY_DELETES}) def all_manifests(self) -> "pa.Table": - import pyarrow as pa Review Comment: thanks! ########## pyiceberg/table/inspect.py: ########## @@ -225,7 +267,14 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: schema=entries_schema, ) - def refs(self) -> "pa.Table": + def refs(self) -> "pa.Table": + """shot ID, + and optional configuration parameters. + Retrieve references from the Iceberg table metadata as a PyArrow Table. Review Comment: this is cut off ########## pyiceberg/table/inspect.py: ########## @@ -347,9 +413,27 @@ def update_partitions_map( schema=table_schema, ) - def _get_manifests_schema(self) -> "pa.Schema": - import pyarrow as pa + import pyarrow as pa +from typing import Optional, List, Dict, Any, Set +from datetime import datetime, timezone +from pyiceberg.table.snapshots import MetadataLogEntry +from pyiceberg.io.pyarrow import schema_to_pyarrow +from pyiceberg.table import Snapshot, ManifestContent, DataFileContent, PartitionSpec +from pyiceberg.utils import from_bytes +from executor_factory import ExecutorFactory + +class IcebergTableUtils: Review Comment: ```suggestion ``` i dont think we need a new class here ########## pyiceberg/table/inspect.py: ########## @@ -347,9 +413,27 @@ def update_partitions_map( schema=table_schema, ) - def _get_manifests_schema(self) -> "pa.Schema": - import pyarrow as pa + import pyarrow as pa +from typing import Optional, List, Dict, Any, Set +from datetime import datetime, timezone +from pyiceberg.table.snapshots import MetadataLogEntry +from pyiceberg.io.pyarrow import schema_to_pyarrow +from pyiceberg.table import Snapshot, ManifestContent, DataFileContent, PartitionSpec +from pyiceberg.utils import from_bytes +from executor_factory import ExecutorFactory Review Comment: imports should be on top of the file ########## pyiceberg/table/inspect.py: ########## @@ -256,8 +305,16 @@ def refs(self) -> "pa.Table": return pa.Table.from_pylist(ref_results, schema=ref_schema) def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table": - import pyarrow as pa + """ + Retrieve partition information from the Iceberg table as a PyArrow Table. + + Args: + snapshot_id (Optional[int]): The snapshot ID to filter partitions. If not provided, all partitions are included. Review Comment: > If not provided, all partitions are included. I dont think this is right, the `snapshot_id` for all of these functions are used to time travel to a specific snapshot. otherwise the current snapshot will be used -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org