kevinjqliu commented on code in PR #2332:
URL: https://github.com/apache/iceberg-python/pull/2332#discussion_r2281015227


##########
pyiceberg/io/fsspec.py:
##########
@@ -383,14 +389,17 @@ def delete(self, location: Union[str, InputFile, 
OutputFile]) -> None:
             str_location = location
 
         uri = urlparse(str_location)
-        fs = self.get_fs(uri.scheme)
+        fs = self.get_fs(uri.scheme, uri.netloc)
         fs.rm(str_location)
 
-    def _get_fs(self, scheme: str) -> AbstractFileSystem:
-        """Get a filesystem for a specific scheme."""
+    def _get_fs(self, scheme: str, netloc: Optional[str] = None) -> 
AbstractFileSystem:
+        """Get a filesystem for a specific scheme and netloc."""
         if scheme not in self._scheme_to_fs:
             raise ValueError(f"No registered filesystem for scheme: {scheme}")
-        return self._scheme_to_fs[scheme](self.properties)
+        properties = self.properties.copy()

Review Comment:
   is it necessary to make another copy of properties? 



##########
pyiceberg/io/fsspec.py:
##########
@@ -194,13 +195,18 @@ def _gs(properties: Properties) -> AbstractFileSystem:
 def _adls(properties: Properties) -> AbstractFileSystem:
     from adlfs import AzureBlobFileSystem
 
-    for key, sas_token in {
-        key.replace(f"{ADLS_SAS_TOKEN}.", ""): value for key, value in 
properties.items() if key.startswith(ADLS_SAS_TOKEN)
-    }.items():
-        if ADLS_ACCOUNT_NAME not in properties:
-            properties[ADLS_ACCOUNT_NAME] = key.split(".")[0]
-        if ADLS_SAS_TOKEN not in properties:
-            properties[ADLS_SAS_TOKEN] = sas_token
+    # 
https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri#uri-syntax
+    if netloc := properties.get("netloc"):
+        account_uri = netloc.split("@")[-1]
+    else:
+        account_uri = None
+
+    if not properties.get(ADLS_ACCOUNT_NAME) and account_uri:
+        properties[ADLS_ACCOUNT_NAME] = account_uri.split(".")[0]
+
+    # Fixes https://github.com/apache/iceberg-python/issues/1146
+    if not properties.get(ADLS_SAS_TOKEN) and account_uri:
+        properties[ADLS_SAS_TOKEN] = 
properties.get(f"{ADLS_SAS_TOKEN}.{account_uri}")

Review Comment:
   I'm a bit confused by this part. 
   
   Given the uri 
`abfss://<file_system>@<account_name>.dfs.core.windows.net/<path>/<file_name>`, 
the `netloc` would be `<file_system>@<account_name>.dfs.core.windows.net`, and 
`account_uri` would be `<account_name>.dfs.core.windows.net` 
   
   I presume we need to create a new `AzureBlobFileSystem` object for each 
unique netloc. We can continue to follow the pyarrow implementation here by 
providing both the `scheme` and `netloc`  to the FS initialization function
   
https://github.com/apache/iceberg-python/blob/80135451d030569259d83674ef147e0d6f62fd51/pyiceberg/io/pyarrow.py#L570



##########
pyiceberg/io/fsspec.py:
##########
@@ -383,14 +389,17 @@ def delete(self, location: Union[str, InputFile, 
OutputFile]) -> None:
             str_location = location
 
         uri = urlparse(str_location)
-        fs = self.get_fs(uri.scheme)
+        fs = self.get_fs(uri.scheme, uri.netloc)
         fs.rm(str_location)
 
-    def _get_fs(self, scheme: str) -> AbstractFileSystem:
-        """Get a filesystem for a specific scheme."""
+    def _get_fs(self, scheme: str, netloc: Optional[str] = None) -> 
AbstractFileSystem:
+        """Get a filesystem for a specific scheme and netloc."""
         if scheme not in self._scheme_to_fs:
             raise ValueError(f"No registered filesystem for scheme: {scheme}")
-        return self._scheme_to_fs[scheme](self.properties)
+        properties = self.properties.copy()
+        if netloc:
+            properties["netloc"] = netloc

Review Comment:
   i feel that passing the netloc this way, via properties, make the codebase 
harder to maintain. maybe we can find another way to pass `netloc` to `_adls`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to