amogh-jahagirdar commented on code in PR #13241: URL: https://github.com/apache/iceberg/pull/13241#discussion_r2157937322
########## build.gradle: ########## @@ -558,6 +558,12 @@ project(':iceberg-azure') { testImplementation libs.testcontainers testImplementation libs.mockserver.netty testImplementation libs.mockserver.client.java + testImplementation(libs.hadoop3.common) { Review Comment: Why did this need to change? what are we pulling from hadoop for the new tests? ########## azure/src/integration/java/org/apache/iceberg/azure/adlsv2/ADLSFileIOTest.java: ########## @@ -190,4 +200,188 @@ public void testSerialization(TestHelpers.RoundTripSerializer<FileIO> roundTripS assertThat(testFileIO.properties()).isEqualTo(roundTripSerializedFileIO.properties()); } + + @Test + public void noStorageCredentialConfigured() { + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of()); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + assertThat(fileIO.clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file"))) + .isSameAs( + fileIO.clientForStoragePath(abfsPath("my-container2", "account2", "/path/to/file"))) + .isSameAs(fileIO.clientForStoragePath(wasbPath("container", "account", "/path/to/file"))) + .isSameAs(fileIO.clientForStoragePath(abfsPath("random", "account", "/path/to/file"))); + + assertThat( + fileIO + .clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromProperties1") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + } + + @Test + public void singleStorageCredentialConfigured() { + StorageCredential adlsCredential = + StorageCredential.create( + abfsPath("custom-container", "account1", ""), + ImmutableMap.of(sasTokenForAccount("account1"), "sasTokenFromCredential")); + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of(adlsCredential)); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + // verify that the generic ADLS client is used if the storage prefix doesn't match the prefixes + // in the storage credentials + assertThat( + fileIO.clientForStoragePath(abfsPath("custom-container", "account1", "/path/to/file"))) + .isNotSameAs( + fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))); + + assertThat(fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))) + .isSameAs( + fileIO.clientForStoragePath(abfsPath("my-container2", "account2", "/path/to/file"))); + + // Custom credentials from storage credentials. + assertThat( + fileIO + .clientForStoragePath(abfsPath("custom-container", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromCredential") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + + // Generic credentials from properties + assertThat( + fileIO + .clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromProperties1") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + } + + @Test + public void multipleStorageCredentialsConfigured() { + StorageCredential adlsCredential1 = + StorageCredential.create( + abfsPath("custom-container", "account1", "/table1"), Review Comment: Can we move some of these abfsPath values into separate variables; it'll reduce duplication and make it easier to verify these tests are doing what they should be ########## azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java: ########## @@ -103,37 +102,32 @@ public Map<String, String> properties() { } public DataLakeFileSystemClient client(String path) { - ADLSLocation location = new ADLSLocation(path); - return client(location); + return clientForStoragePath(path).client(path); } @VisibleForTesting - DataLakeFileSystemClient client(ADLSLocation location) { - DataLakeFileSystemClientBuilder clientBuilder = - new DataLakeFileSystemClientBuilder().httpClient(HTTP); - - location.container().ifPresent(clientBuilder::fileSystemName); - Optional.ofNullable(vendedAdlsCredentialProvider) - .map(p -> new VendedAzureSasCredentialPolicy(location.host(), p)) - .ifPresent(clientBuilder::addPolicy); - azureProperties.applyClientConfiguration(location.host(), clientBuilder); + PrefixedADLSClient clientForStoragePath(String path) { + PrefixedADLSClient prefixedADLSClient; + String matchingPrefix = ABFS_PREFIX; - return clientBuilder.buildClient(); - } + for (String storagePrefix : clientByPrefix().keySet()) { + if (path.startsWith(storagePrefix) && storagePrefix.length() > matchingPrefix.length()) { + matchingPrefix = storagePrefix; + } + } + prefixedADLSClient = clientByPrefix().getOrDefault(matchingPrefix, null); - private DataLakeFileClient fileClient(String path) { - ADLSLocation location = new ADLSLocation(path); - return client(location).getFileClient(location.path()); + Preconditions.checkState( + null != prefixedADLSClient, + "[BUG] ADLS client-builder for storage path not available: %s", Review Comment: the "[BUG]" callout doesn't really fit in the pattern of error messages in the project, can we just say "Cannot build ADLS cllient for path: %s" ########## azure/src/integration/java/org/apache/iceberg/azure/adlsv2/ADLSFileIOTest.java: ########## @@ -190,4 +200,188 @@ public void testSerialization(TestHelpers.RoundTripSerializer<FileIO> roundTripS assertThat(testFileIO.properties()).isEqualTo(roundTripSerializedFileIO.properties()); } + + @Test + public void noStorageCredentialConfigured() { + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of()); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + assertThat(fileIO.clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file"))) + .isSameAs( + fileIO.clientForStoragePath(abfsPath("my-container2", "account2", "/path/to/file"))) + .isSameAs(fileIO.clientForStoragePath(wasbPath("container", "account", "/path/to/file"))) + .isSameAs(fileIO.clientForStoragePath(abfsPath("random", "account", "/path/to/file"))); + + assertThat( + fileIO + .clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromProperties1") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + } + + @Test + public void singleStorageCredentialConfigured() { + StorageCredential adlsCredential = + StorageCredential.create( + abfsPath("custom-container", "account1", ""), + ImmutableMap.of(sasTokenForAccount("account1"), "sasTokenFromCredential")); + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of(adlsCredential)); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + // verify that the generic ADLS client is used if the storage prefix doesn't match the prefixes + // in the storage credentials + assertThat( + fileIO.clientForStoragePath(abfsPath("custom-container", "account1", "/path/to/file"))) + .isNotSameAs( + fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))); + + assertThat(fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))) + .isSameAs( + fileIO.clientForStoragePath(abfsPath("my-container2", "account2", "/path/to/file"))); + Review Comment: Same as what I mentioned somewhere else in this test, could we make sure some of these `abfsPath(...)` are moved to separate values and can we make sure these tests are strong as possible beyond just comparing if it's the same as another path. ########## azure/src/main/java/org/apache/iceberg/azure/adlsv2/PrefixedADLSClient.java: ########## @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.azure.adlsv2; + +import com.azure.core.http.HttpClient; +import com.azure.storage.file.datalake.DataLakeFileClient; +import com.azure.storage.file.datalake.DataLakeFileSystemClient; +import com.azure.storage.file.datalake.DataLakeFileSystemClientBuilder; +import java.util.Map; +import java.util.Optional; +import org.apache.iceberg.azure.AzureProperties; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Strings; + +class PrefixedADLSClient implements AutoCloseable { + + private static final HttpClient HTTP = HttpClient.createDefault(); + private final String storagePrefix; + private final AzureProperties azureProperties; + + private VendedAdlsCredentialProvider vendedAdlsCredentialProvider; + + PrefixedADLSClient(String storagePrefix, Map<String, String> properties) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(storagePrefix), "Invalid storage prefix: null or empty"); + Preconditions.checkArgument(null != properties, "Invalid properties: null"); + this.storagePrefix = storagePrefix; + this.azureProperties = new AzureProperties(properties); + this.azureProperties + .vendedAdlsCredentialProvider() + .ifPresent(provider -> this.vendedAdlsCredentialProvider = provider); + } + + DataLakeFileClient fileClient(String storagePath) { + ADLSLocation location = new ADLSLocation(storagePath); + return client(storagePath).getFileClient(location.path()); + } + + DataLakeFileSystemClient client(String storagePath) { + ADLSLocation location = new ADLSLocation(storagePath); + DataLakeFileSystemClientBuilder clientBuilder = + new DataLakeFileSystemClientBuilder().httpClient(HTTP); + + location.container().ifPresent(clientBuilder::fileSystemName); + Optional.ofNullable(vendedAdlsCredentialProvider) + .map(p -> new VendedAzureSasCredentialPolicy(location.host(), p)) + .ifPresent(clientBuilder::addPolicy); Review Comment: Nit: I know this was just copied over from what already existed but imo this logic would just be a bit clearer with plain old null check and assignment: ``` if (credentialProvider != null) { clientBuilder.addPolicy(new VendedAzureSasCredentialPolicy(location.host(), credentialProvider))) } ``` ########## azure/src/integration/java/org/apache/iceberg/azure/adlsv2/ADLSFileIOTest.java: ########## @@ -190,4 +200,188 @@ public void testSerialization(TestHelpers.RoundTripSerializer<FileIO> roundTripS assertThat(testFileIO.properties()).isEqualTo(roundTripSerializedFileIO.properties()); } + + @Test + public void noStorageCredentialConfigured() { + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of()); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + assertThat(fileIO.clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file"))) + .isSameAs( + fileIO.clientForStoragePath(abfsPath("my-container2", "account2", "/path/to/file"))) + .isSameAs(fileIO.clientForStoragePath(wasbPath("container", "account", "/path/to/file"))) + .isSameAs(fileIO.clientForStoragePath(abfsPath("random", "account", "/path/to/file"))); + + assertThat( + fileIO + .clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromProperties1") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + } + + @Test + public void singleStorageCredentialConfigured() { + StorageCredential adlsCredential = + StorageCredential.create( + abfsPath("custom-container", "account1", ""), + ImmutableMap.of(sasTokenForAccount("account1"), "sasTokenFromCredential")); + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of(adlsCredential)); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + // verify that the generic ADLS client is used if the storage prefix doesn't match the prefixes + // in the storage credentials + assertThat( + fileIO.clientForStoragePath(abfsPath("custom-container", "account1", "/path/to/file"))) + .isNotSameAs( + fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))); + + assertThat(fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))) + .isSameAs( + fileIO.clientForStoragePath(abfsPath("my-container2", "account2", "/path/to/file"))); + + // Custom credentials from storage credentials. + assertThat( + fileIO + .clientForStoragePath(abfsPath("custom-container", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromCredential") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + + // Generic credentials from properties + assertThat( + fileIO + .clientForStoragePath(abfsPath("my-container1", "account1", "/path/to/file")) + .azureProperties()) + .extracting("adlsSasTokens") + .asInstanceOf(InstanceOfAssertFactories.map(String.class, String.class)) + .containsEntry(accountHost("account1"), "sasTokenFromProperties1") + .containsEntry(accountHost("account2"), "sasTokenFromProperties2"); + } + + @Test + public void multipleStorageCredentialsConfigured() { + StorageCredential adlsCredential1 = + StorageCredential.create( + abfsPath("custom-container", "account1", "/table1"), + ImmutableMap.of(sasTokenForAccount("account1"), "sasTokenFromCredential1")); + StorageCredential adlsCredential2 = + StorageCredential.create( + abfsPath("custom-container", "account1", "/table2"), + ImmutableMap.of(sasTokenForAccount("account1"), "sasTokenFromCredential2")); + ADLSFileIO fileIO = new ADLSFileIO(); + fileIO.setCredentials(ImmutableList.of(adlsCredential1, adlsCredential2)); + fileIO.initialize( + ImmutableMap.of( + sasTokenForAccount("account1"), "sasTokenFromProperties1", + sasTokenForAccount("account2"), "sasTokenFromProperties2")); + + // verify that the generic ADLS client is used if the storage prefix doesn't match the prefixes + // in the storage credentials + assertThat(fileIO.clientForStoragePath(abfsPath("custom-container", "account1", "/table1"))) + .isNotSameAs( + fileIO.clientForStoragePath(abfsPath("custom-container", "account1", "/table2"))) + .isNotSameAs( + fileIO.clientForStoragePath(abfsPath("my-container", "account1", "/path/to/file"))); Review Comment: I feel like this test isn't as strong as it could be; we're asserting that the clients for these paths are all different which is true but we're not asserting the actual client right? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org