snleee commented on a change in pull request #6531:
URL: https://github.com/apache/incubator-pinot/pull/6531#discussion_r574148874



##########
File path: 
pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/ADLSGen2PinotFS.java
##########
@@ -106,24 +118,75 @@ public void init(PinotConfiguration config) {
     // TODO: consider to add the encryption of the following config
     String accessKey = config.getProperty(ACCESS_KEY);
     String fileSystemName = config.getProperty(FILE_SYSTEM_NAME);
+    String clientId = config.getProperty(CLIENT_ID);
+    String clientSecret = config.getProperty(CLIENT_SECRET);
+    String tenantId = config.getProperty(TENANT_ID);
 
     String dfsServiceEndpointUrl = HTTPS_URL_PREFIX + accountName + 
AZURE_STORAGE_DNS_SUFFIX;
     String blobServiceEndpointUrl = HTTPS_URL_PREFIX + accountName + 
AZURE_BLOB_DNS_SUFFIX;
 
-    StorageSharedKeyCredential sharedKeyCredential = new 
StorageSharedKeyCredential(accountName, accessKey);
+    DataLakeServiceClientBuilder dataLakeServiceClientBuilder = new 
DataLakeServiceClientBuilder().endpoint(dfsServiceEndpointUrl);
+    BlobServiceClientBuilder blobServiceClientBuilder = new 
BlobServiceClientBuilder().endpoint(blobServiceEndpointUrl);
+
+    if (accountName!= null && accessKey != null) {
+      LOGGER.info("Authenticating using the access key to the account.");
+      StorageSharedKeyCredential sharedKeyCredential = new 
StorageSharedKeyCredential(accountName, accessKey);
+      dataLakeServiceClientBuilder.credential(sharedKeyCredential);
+      blobServiceClientBuilder.credential(sharedKeyCredential);
+    } else if (clientId != null && clientSecret != null && tenantId != null) {
+      LOGGER.info("Authenticating using Azure Active Directory");
+      ClientSecretCredential clientSecretCredential = new 
ClientSecretCredentialBuilder()
+          .clientId(clientId)
+          .clientSecret(clientSecret)
+          .tenantId(tenantId)
+          .build();
+      dataLakeServiceClientBuilder.credential(clientSecretCredential);
+      blobServiceClientBuilder.credential(clientSecretCredential);
+    } else {
+      // Error out as at least one mode of auth info needed
+      throw new IllegalArgumentException("Expecting either (accountName, 
accessKey) or (clientId, clientSecret, tenantId)");
+    }
 
-    DataLakeServiceClient serviceClient = new 
DataLakeServiceClientBuilder().credential(sharedKeyCredential)
-        .endpoint(dfsServiceEndpointUrl)
-        .buildClient();
+    _blobServiceClient = blobServiceClientBuilder.buildClient();
+    DataLakeServiceClient serviceClient = 
dataLakeServiceClientBuilder.buildClient();
+    _fileSystemClient = getOrCreateClientWithFileSystem(serviceClient, 
fileSystemName);
 
-    _blobServiceClient =
-        new 
BlobServiceClientBuilder().credential(sharedKeyCredential).endpoint(blobServiceEndpointUrl).buildClient();
-    _fileSystemClient = serviceClient.getFileSystemClient(fileSystemName);
     LOGGER.info("ADLSGen2PinotFS is initialized (accountName={}, 
fileSystemName={}, dfsServiceEndpointUrl={}, "
             + "blobServiceEndpointUrl={}, enableChecksum={})", accountName, 
fileSystemName, dfsServiceEndpointUrl,
         blobServiceEndpointUrl, _enableChecksum);
   }
 
+  /**
+   * Returns the DataLakeFileSystemClient to the specified file system 
creating if it doesn't exist.
+   *
+   * @param serviceClient authenticated data lake service client to an account
+   * @param fileSystemName name of the file system (blob container)
+   * @return DataLakeFileSystemClient with the specified fileSystemName.
+   */
+  @VisibleForTesting
+  public DataLakeFileSystemClient 
getOrCreateClientWithFileSystem(DataLakeServiceClient serviceClient,

Review comment:
       @rkanumul I went over the Azure portal. I actually got confused between 
`storage account` and `container`.  `storage account` looks to be the top-level 
storage resource concept in Azure while `bucket` is the top level concept in 
AWS s3. I thought that `container` in Azure is an equivalent concept to 
`bucket` in AWS. 
   
   So yes, I actually agree with you that we can create the container here in 
the code. It's equivalent to creating a directory within the root path.
   
   Sorry for the misunderstanding.

##########
File path: 
pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/ADLSGen2PinotFS.java
##########
@@ -106,24 +118,75 @@ public void init(PinotConfiguration config) {
     // TODO: consider to add the encryption of the following config
     String accessKey = config.getProperty(ACCESS_KEY);
     String fileSystemName = config.getProperty(FILE_SYSTEM_NAME);
+    String clientId = config.getProperty(CLIENT_ID);
+    String clientSecret = config.getProperty(CLIENT_SECRET);
+    String tenantId = config.getProperty(TENANT_ID);
 
     String dfsServiceEndpointUrl = HTTPS_URL_PREFIX + accountName + 
AZURE_STORAGE_DNS_SUFFIX;
     String blobServiceEndpointUrl = HTTPS_URL_PREFIX + accountName + 
AZURE_BLOB_DNS_SUFFIX;
 
-    StorageSharedKeyCredential sharedKeyCredential = new 
StorageSharedKeyCredential(accountName, accessKey);
+    DataLakeServiceClientBuilder dataLakeServiceClientBuilder = new 
DataLakeServiceClientBuilder().endpoint(dfsServiceEndpointUrl);
+    BlobServiceClientBuilder blobServiceClientBuilder = new 
BlobServiceClientBuilder().endpoint(blobServiceEndpointUrl);
+
+    if (accountName!= null && accessKey != null) {
+      LOGGER.info("Authenticating using the access key to the account.");
+      StorageSharedKeyCredential sharedKeyCredential = new 
StorageSharedKeyCredential(accountName, accessKey);
+      dataLakeServiceClientBuilder.credential(sharedKeyCredential);
+      blobServiceClientBuilder.credential(sharedKeyCredential);
+    } else if (clientId != null && clientSecret != null && tenantId != null) {
+      LOGGER.info("Authenticating using Azure Active Directory");
+      ClientSecretCredential clientSecretCredential = new 
ClientSecretCredentialBuilder()
+          .clientId(clientId)
+          .clientSecret(clientSecret)
+          .tenantId(tenantId)
+          .build();
+      dataLakeServiceClientBuilder.credential(clientSecretCredential);
+      blobServiceClientBuilder.credential(clientSecretCredential);
+    } else {
+      // Error out as at least one mode of auth info needed
+      throw new IllegalArgumentException("Expecting either (accountName, 
accessKey) or (clientId, clientSecret, tenantId)");
+    }
 
-    DataLakeServiceClient serviceClient = new 
DataLakeServiceClientBuilder().credential(sharedKeyCredential)
-        .endpoint(dfsServiceEndpointUrl)
-        .buildClient();
+    _blobServiceClient = blobServiceClientBuilder.buildClient();
+    DataLakeServiceClient serviceClient = 
dataLakeServiceClientBuilder.buildClient();
+    _fileSystemClient = getOrCreateClientWithFileSystem(serviceClient, 
fileSystemName);
 
-    _blobServiceClient =
-        new 
BlobServiceClientBuilder().credential(sharedKeyCredential).endpoint(blobServiceEndpointUrl).buildClient();
-    _fileSystemClient = serviceClient.getFileSystemClient(fileSystemName);
     LOGGER.info("ADLSGen2PinotFS is initialized (accountName={}, 
fileSystemName={}, dfsServiceEndpointUrl={}, "
             + "blobServiceEndpointUrl={}, enableChecksum={})", accountName, 
fileSystemName, dfsServiceEndpointUrl,
         blobServiceEndpointUrl, _enableChecksum);
   }
 
+  /**
+   * Returns the DataLakeFileSystemClient to the specified file system 
creating if it doesn't exist.
+   *
+   * @param serviceClient authenticated data lake service client to an account
+   * @param fileSystemName name of the file system (blob container)
+   * @return DataLakeFileSystemClient with the specified fileSystemName.
+   */
+  @VisibleForTesting
+  public DataLakeFileSystemClient 
getOrCreateClientWithFileSystem(DataLakeServiceClient serviceClient,

Review comment:
       @rkanumul I went over the Azure portal. I actually got confused between 
`storage account` and `container` concept in Azure.  `storage account` looks to 
be the top-level storage resource concept in Azure while `bucket` is the top 
level concept in AWS s3. I thought that `container` in Azure is an equivalent 
concept to `bucket` in AWS. 
   
   So yes, I actually agree with you that we can create the container here in 
the code. It's equivalent to creating a directory within the root path.
   
   Sorry for the misunderstanding.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to