Fokko commented on code in PR #1043: URL: https://github.com/apache/iceberg-python/pull/1043#discussion_r1722881230
########## pyiceberg/io/pyarrow.py: ########## @@ -1304,6 +1305,195 @@ def _read_all_delete_files(fs: FileSystem, tasks: Iterable[FileScanTask]) -> Dic return deletes_per_file +def _fs_from_file_path(file_path: str, io: FileIO) -> FileSystem: + scheme, netloc, _ = _parse_location(file_path) + if isinstance(io, PyArrowFileIO): + return io.fs_by_scheme(scheme, netloc) + else: + try: + from pyiceberg.io.fsspec import FsspecFileIO + + if isinstance(io, FsspecFileIO): + from pyarrow.fs import PyFileSystem + + return PyFileSystem(FSSpecHandler(io.get_fs(scheme))) + else: + raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") + except ModuleNotFoundError as e: + # When FsSpec is not installed + raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, got: {io}") from e + + +class ArrowScan: + _table_metadata: TableMetadata + _io: FileIO + _fs: FileSystem + _projected_schema: Schema + _bound_row_filter: BooleanExpression + _case_sensitive: bool + _limit: Optional[int] + """Scan the Iceberg Table and create an Arrow construct. + + Attributes: Review Comment: nit: More of a style thing. I think it is more valuable to have a docstring at the `__init__` method since that's wat probably will show up people's IDE. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org