amogh-jahagirdar commented on code in PR #13241:
URL: https://github.com/apache/iceberg/pull/13241#discussion_r2157937322


##########
build.gradle:
##########
@@ -558,6 +558,12 @@ project(':iceberg-azure') {
     testImplementation libs.testcontainers
     testImplementation libs.mockserver.netty
     testImplementation libs.mockserver.client.java
+    testImplementation(libs.hadoop3.common) {

Review Comment:
   Why did this need to change? what are we pulling from hadoop for the new 
tests?



##########
azure/src/integration/java/org/apache/iceberg/azure/adlsv2/ADLSFileIOTest.java:
##########
@@ -190,4 +200,188 @@ public void 
testSerialization(TestHelpers.RoundTripSerializer<FileIO> roundTripS
 
     
assertThat(testFileIO.properties()).isEqualTo(roundTripSerializedFileIO.properties());
   }
+
+  @Test
+  public void noStorageCredentialConfigured() {
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of());
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    assertThat(fileIO.clientForStoragePath(abfsPath("my-container1", 
"account1", "/path/to/file")))
+        .isSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container2", "account2", 
"/path/to/file")))
+        .isSameAs(fileIO.clientForStoragePath(wasbPath("container", "account", 
"/path/to/file")))
+        .isSameAs(fileIO.clientForStoragePath(abfsPath("random", "account", 
"/path/to/file")));
+
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("my-container1", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromProperties1")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+  }
+
+  @Test
+  public void singleStorageCredentialConfigured() {
+    StorageCredential adlsCredential =
+        StorageCredential.create(
+            abfsPath("custom-container", "account1", ""),
+            ImmutableMap.of(sasTokenForAccount("account1"), 
"sasTokenFromCredential"));
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of(adlsCredential));
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    // verify that the generic ADLS client is used if the storage prefix 
doesn't match the prefixes
+    // in the storage credentials
+    assertThat(
+            fileIO.clientForStoragePath(abfsPath("custom-container", 
"account1", "/path/to/file")))
+        .isNotSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container", "account1", 
"/path/to/file")));
+
+    assertThat(fileIO.clientForStoragePath(abfsPath("my-container", 
"account1", "/path/to/file")))
+        .isSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container2", "account2", 
"/path/to/file")));
+
+    // Custom credentials from storage credentials.
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("custom-container", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromCredential")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+
+    // Generic credentials from properties
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("my-container1", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromProperties1")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+  }
+
+  @Test
+  public void multipleStorageCredentialsConfigured() {
+    StorageCredential adlsCredential1 =
+        StorageCredential.create(
+            abfsPath("custom-container", "account1", "/table1"),

Review Comment:
   Can we move some of these abfsPath values into separate variables; it'll 
reduce duplication and make it easier to verify these tests are doing what they 
should be



##########
azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java:
##########
@@ -103,37 +102,32 @@ public Map<String, String> properties() {
   }
 
   public DataLakeFileSystemClient client(String path) {
-    ADLSLocation location = new ADLSLocation(path);
-    return client(location);
+    return clientForStoragePath(path).client(path);
   }
 
   @VisibleForTesting
-  DataLakeFileSystemClient client(ADLSLocation location) {
-    DataLakeFileSystemClientBuilder clientBuilder =
-        new DataLakeFileSystemClientBuilder().httpClient(HTTP);
-
-    location.container().ifPresent(clientBuilder::fileSystemName);
-    Optional.ofNullable(vendedAdlsCredentialProvider)
-        .map(p -> new VendedAzureSasCredentialPolicy(location.host(), p))
-        .ifPresent(clientBuilder::addPolicy);
-    azureProperties.applyClientConfiguration(location.host(), clientBuilder);
+  PrefixedADLSClient clientForStoragePath(String path) {
+    PrefixedADLSClient prefixedADLSClient;
+    String matchingPrefix = ABFS_PREFIX;
 
-    return clientBuilder.buildClient();
-  }
+    for (String storagePrefix : clientByPrefix().keySet()) {
+      if (path.startsWith(storagePrefix) && storagePrefix.length() > 
matchingPrefix.length()) {
+        matchingPrefix = storagePrefix;
+      }
+    }
+    prefixedADLSClient = clientByPrefix().getOrDefault(matchingPrefix, null);
 
-  private DataLakeFileClient fileClient(String path) {
-    ADLSLocation location = new ADLSLocation(path);
-    return client(location).getFileClient(location.path());
+    Preconditions.checkState(
+        null != prefixedADLSClient,
+        "[BUG] ADLS client-builder for storage path not available: %s",

Review Comment:
   the "[BUG]" callout doesn't really fit in the pattern of error messages in 
the project, can we just say "Cannot build ADLS cllient for path: %s" 



##########
azure/src/integration/java/org/apache/iceberg/azure/adlsv2/ADLSFileIOTest.java:
##########
@@ -190,4 +200,188 @@ public void 
testSerialization(TestHelpers.RoundTripSerializer<FileIO> roundTripS
 
     
assertThat(testFileIO.properties()).isEqualTo(roundTripSerializedFileIO.properties());
   }
+
+  @Test
+  public void noStorageCredentialConfigured() {
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of());
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    assertThat(fileIO.clientForStoragePath(abfsPath("my-container1", 
"account1", "/path/to/file")))
+        .isSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container2", "account2", 
"/path/to/file")))
+        .isSameAs(fileIO.clientForStoragePath(wasbPath("container", "account", 
"/path/to/file")))
+        .isSameAs(fileIO.clientForStoragePath(abfsPath("random", "account", 
"/path/to/file")));
+
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("my-container1", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromProperties1")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+  }
+
+  @Test
+  public void singleStorageCredentialConfigured() {
+    StorageCredential adlsCredential =
+        StorageCredential.create(
+            abfsPath("custom-container", "account1", ""),
+            ImmutableMap.of(sasTokenForAccount("account1"), 
"sasTokenFromCredential"));
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of(adlsCredential));
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    // verify that the generic ADLS client is used if the storage prefix 
doesn't match the prefixes
+    // in the storage credentials
+    assertThat(
+            fileIO.clientForStoragePath(abfsPath("custom-container", 
"account1", "/path/to/file")))
+        .isNotSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container", "account1", 
"/path/to/file")));
+
+    assertThat(fileIO.clientForStoragePath(abfsPath("my-container", 
"account1", "/path/to/file")))
+        .isSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container2", "account2", 
"/path/to/file")));
+

Review Comment:
   Same as what I mentioned somewhere else in this test, could we make sure 
some of these `abfsPath(...)` are moved to separate values and can we make sure 
these tests are strong as possible beyond just comparing if it's the same as 
another path.



##########
azure/src/main/java/org/apache/iceberg/azure/adlsv2/PrefixedADLSClient.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.azure.adlsv2;
+
+import com.azure.core.http.HttpClient;
+import com.azure.storage.file.datalake.DataLakeFileClient;
+import com.azure.storage.file.datalake.DataLakeFileSystemClient;
+import com.azure.storage.file.datalake.DataLakeFileSystemClientBuilder;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.iceberg.azure.AzureProperties;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.base.Strings;
+
+class PrefixedADLSClient implements AutoCloseable {
+
+  private static final HttpClient HTTP = HttpClient.createDefault();
+  private final String storagePrefix;
+  private final AzureProperties azureProperties;
+
+  private VendedAdlsCredentialProvider vendedAdlsCredentialProvider;
+
+  PrefixedADLSClient(String storagePrefix, Map<String, String> properties) {
+    Preconditions.checkArgument(
+        !Strings.isNullOrEmpty(storagePrefix), "Invalid storage prefix: null 
or empty");
+    Preconditions.checkArgument(null != properties, "Invalid properties: 
null");
+    this.storagePrefix = storagePrefix;
+    this.azureProperties = new AzureProperties(properties);
+    this.azureProperties
+        .vendedAdlsCredentialProvider()
+        .ifPresent(provider -> this.vendedAdlsCredentialProvider = provider);
+  }
+
+  DataLakeFileClient fileClient(String storagePath) {
+    ADLSLocation location = new ADLSLocation(storagePath);
+    return client(storagePath).getFileClient(location.path());
+  }
+
+  DataLakeFileSystemClient client(String storagePath) {
+    ADLSLocation location = new ADLSLocation(storagePath);
+    DataLakeFileSystemClientBuilder clientBuilder =
+        new DataLakeFileSystemClientBuilder().httpClient(HTTP);
+
+    location.container().ifPresent(clientBuilder::fileSystemName);
+    Optional.ofNullable(vendedAdlsCredentialProvider)
+        .map(p -> new VendedAzureSasCredentialPolicy(location.host(), p))
+        .ifPresent(clientBuilder::addPolicy);

Review Comment:
   Nit: I know this was just copied over from what already existed but imo this 
logic would just be a bit clearer with plain old null check and assignment:
   
   ```
   if (credentialProvider != null) {
         clientBuilder.addPolicy(new 
VendedAzureSasCredentialPolicy(location.host(), credentialProvider)))
   }
   
   ```



##########
azure/src/integration/java/org/apache/iceberg/azure/adlsv2/ADLSFileIOTest.java:
##########
@@ -190,4 +200,188 @@ public void 
testSerialization(TestHelpers.RoundTripSerializer<FileIO> roundTripS
 
     
assertThat(testFileIO.properties()).isEqualTo(roundTripSerializedFileIO.properties());
   }
+
+  @Test
+  public void noStorageCredentialConfigured() {
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of());
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    assertThat(fileIO.clientForStoragePath(abfsPath("my-container1", 
"account1", "/path/to/file")))
+        .isSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container2", "account2", 
"/path/to/file")))
+        .isSameAs(fileIO.clientForStoragePath(wasbPath("container", "account", 
"/path/to/file")))
+        .isSameAs(fileIO.clientForStoragePath(abfsPath("random", "account", 
"/path/to/file")));
+
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("my-container1", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromProperties1")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+  }
+
+  @Test
+  public void singleStorageCredentialConfigured() {
+    StorageCredential adlsCredential =
+        StorageCredential.create(
+            abfsPath("custom-container", "account1", ""),
+            ImmutableMap.of(sasTokenForAccount("account1"), 
"sasTokenFromCredential"));
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of(adlsCredential));
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    // verify that the generic ADLS client is used if the storage prefix 
doesn't match the prefixes
+    // in the storage credentials
+    assertThat(
+            fileIO.clientForStoragePath(abfsPath("custom-container", 
"account1", "/path/to/file")))
+        .isNotSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container", "account1", 
"/path/to/file")));
+
+    assertThat(fileIO.clientForStoragePath(abfsPath("my-container", 
"account1", "/path/to/file")))
+        .isSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container2", "account2", 
"/path/to/file")));
+
+    // Custom credentials from storage credentials.
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("custom-container", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromCredential")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+
+    // Generic credentials from properties
+    assertThat(
+            fileIO
+                .clientForStoragePath(abfsPath("my-container1", "account1", 
"/path/to/file"))
+                .azureProperties())
+        .extracting("adlsSasTokens")
+        .asInstanceOf(InstanceOfAssertFactories.map(String.class, 
String.class))
+        .containsEntry(accountHost("account1"), "sasTokenFromProperties1")
+        .containsEntry(accountHost("account2"), "sasTokenFromProperties2");
+  }
+
+  @Test
+  public void multipleStorageCredentialsConfigured() {
+    StorageCredential adlsCredential1 =
+        StorageCredential.create(
+            abfsPath("custom-container", "account1", "/table1"),
+            ImmutableMap.of(sasTokenForAccount("account1"), 
"sasTokenFromCredential1"));
+    StorageCredential adlsCredential2 =
+        StorageCredential.create(
+            abfsPath("custom-container", "account1", "/table2"),
+            ImmutableMap.of(sasTokenForAccount("account1"), 
"sasTokenFromCredential2"));
+    ADLSFileIO fileIO = new ADLSFileIO();
+    fileIO.setCredentials(ImmutableList.of(adlsCredential1, adlsCredential2));
+    fileIO.initialize(
+        ImmutableMap.of(
+            sasTokenForAccount("account1"), "sasTokenFromProperties1",
+            sasTokenForAccount("account2"), "sasTokenFromProperties2"));
+
+    // verify that the generic ADLS client is used if the storage prefix 
doesn't match the prefixes
+    // in the storage credentials
+    assertThat(fileIO.clientForStoragePath(abfsPath("custom-container", 
"account1", "/table1")))
+        .isNotSameAs(
+            fileIO.clientForStoragePath(abfsPath("custom-container", 
"account1", "/table2")))
+        .isNotSameAs(
+            fileIO.clientForStoragePath(abfsPath("my-container", "account1", 
"/path/to/file")));

Review Comment:
   I feel like this test isn't as strong as it could be; we're asserting that 
the clients for these paths are all different which is true but we're not 
asserting the actual client right?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to