sfc-gh-tbenroeck commented on issue #1146: URL: https://github.com/apache/iceberg-python/issues/1146#issuecomment-2450669506
I created a custom FileIO fix as a temporary workaround and I've submitted [Polaris #418](https://github.com/apache/polaris/issues/) ``` catalog = load_catalog( **{ "type": "rest", "header.X-Iceberg-Access-Delegation": "vended-credentials", "uri": f"https://{account}.snowflakecomputing.com/polaris/api/catalog", "credential": f"{principal_client_id}:{principal_secret}", "warehouse": catalog_name, "scope": role, "token-refresh-enabled": "true", "py-io-impl": "custom_fsspec.CustomFsspecFileIO", } ) ``` ```python from pyiceberg.io.fsspec import FsspecFileIO, _adls from urllib.parse import urlparse from pyiceberg.io import (ADLS_ACCOUNT_NAME,ADLS_SAS_TOKEN, ADLFS_ACCOUNT_NAME, ADLFS_SAS_TOKEN) from pyiceberg.utils.properties import get_first_property_value from fsspec import AbstractFileSystem from pyiceberg.typedef import Properties class CustomFsspecFileIO(FsspecFileIO): def __init__(self, properties): # Short term fix for https://github.com/apache/iceberg-python/issues/961 and https://github.com/apache/iceberg-python/issues/1146 base_location = properties.get('default-base-location') if base_location and base_location.startswith('abfs'): account_name = get_first_property_value(properties,ADLS_ACCOUNT_NAME,ADLFS_ACCOUNT_NAME) sas_token = get_first_property_value(properties,ADLS_SAS_TOKEN,ADLFS_SAS_TOKEN) if sas_token is None: for key, value in properties.items(): key = key.replace('adlfs.', 'adls.') if key.startswith(ADLS_SAS_TOKEN): properties[ADLS_SAS_TOKEN] = value if key.endswith('.windows.net'): if account_name is None: account_host = key.removeprefix(f"{ADLS_SAS_TOKEN}.") account_name = account_host.split('.')[0] properties[ADLS_ACCOUNT_NAME] = account_name properties['adls.account-host'] = account_host break # Exit loop after finding the first match super().__init__(properties) def _get_fs(self, scheme: str): if scheme in ["abfs", "abfss", "wasb", "wasbs"]: if scheme in ["wasb"]: scheme = 'abfs' if scheme in ["wasbs"]: scheme = 'abfss' adls_fs = _adls(self.properties) return adls_fs # If not adls proceed with the original behavior return super()._get_fs(scheme) def new_input(self, location: str): # Replace wasb(s):// with adfs(s):// in the location uri = urlparse(location) if uri.scheme in ["wasb"]: location = location.replace(f"{uri.scheme}://", "abfs://") if uri.scheme in ["wasbs"]: location = location.replace(f"{uri.scheme}://", "abfss://") return super().new_input(location) def new_output(self, location: str): # Replace wasb(s):// with adfs:// in the location uri = urlparse(location) if uri.scheme in ["wasb"]: location = location.replace(f"{uri.scheme}://", "abfs://") if uri.scheme in ["wasbs"]: location = location.replace(f"{uri.scheme}://", "abfss://") return super().new_output(location) def _adls(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem return AzureBlobFileSystem( account_host = properties['adls.account-host'], account_name=properties[ADLS_ACCOUNT_NAME], sas_token=properties[ADLS_SAS_TOKEN] ) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org