This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new ebe22bdf51d [fix](aws iam role) Fix some bug about iam role feature 
(#50682) (#50945)
ebe22bdf51d is described below

commit ebe22bdf51d0be5e49ad6892d9bcc170d973f7f2
Author: Lei Zhang <zhang...@selectdb.com>
AuthorDate: Fri May 16 17:36:40 2025 +0800

    [fix](aws iam role) Fix some bug about iam role feature (#50682) (#50945)
    
    * fix `S3FileSystem.globList` not support aws iam role
---
 fe/fe-common/pom.xml                               |   4 +
 fe/fe-core/pom.xml                                 |   4 +
 .../java/org/apache/doris/common/util/S3Util.java  |  32 +++-
 .../datasource/property/PropertyConverter.java     |  28 ++++
 .../property/constants/S3Properties.java           |   2 +-
 .../org/apache/doris/fs/obj/AzureObjStorage.java   |  19 +--
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java | 101 +++++++++++
 .../org/apache/doris/fs/remote/RemoteFile.java     |  13 +-
 .../org/apache/doris/fs/remote/S3FileSystem.java   |  27 ++-
 .../doris/fs/obj/S3ObjStorageGlobListTest.java     | 184 +++++++++++++++++++++
 fe/pom.xml                                         |   5 +
 .../vault_p0/create/test_create_vault.groovy       |  15 --
 12 files changed, 393 insertions(+), 41 deletions(-)

diff --git a/fe/fe-common/pom.xml b/fe/fe-common/pom.xml
index 55934fd4e24..8203b482410 100644
--- a/fe/fe-common/pom.xml
+++ b/fe/fe-common/pom.xml
@@ -120,6 +120,10 @@ under the License.
             <groupId>com.amazonaws</groupId>
             <artifactId>aws-java-sdk-s3</artifactId>
         </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-sts</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>log4j-web</artifactId>
diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml
index 52bb3ef7329..2938d032765 100644
--- a/fe/fe-core/pom.xml
+++ b/fe/fe-core/pom.xml
@@ -412,6 +412,10 @@ under the License.
             <groupId>com.amazonaws</groupId>
             <artifactId>aws-java-sdk-logs</artifactId>
         </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-sts</artifactId>
+        </dependency>
         <dependency>
             <groupId>com.huaweicloud</groupId>
             <artifactId>hadoop-huaweicloud</artifactId>
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index d0764385201..e204fab2e22 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -93,7 +93,8 @@ public class S3Util {
                 .putAdvancedOption(SdkAdvancedClientOption.SIGNER, 
AwsS3V4Signer.create())
                 .build();
         return S3Client.builder()
-                .httpClient(UrlConnectionHttpClient.create())
+                
.httpClient(UrlConnectionHttpClient.builder().socketTimeout(Duration.ofSeconds(30))
+                        .connectionTimeout(Duration.ofSeconds(30)).build())
                 .endpointOverride(endpoint)
                 .credentialsProvider(getAwsCredencialsProvider(credential))
                 .region(Region.of(region))
@@ -133,11 +134,15 @@ public class S3Util {
             StsClient stsClient = StsClient.builder()
                     
.credentialsProvider(InstanceProfileCredentialsProvider.create())
                     .build();
+
             return StsAssumeRoleCredentialsProvider.builder()
                     .stsClient(stsClient)
-                    .refreshRequest(r -> 
r.roleArn(roleArn).externalId(externalId)
-                            .roleSessionName("aws-sdk-java-v2-fe"))
-                    .build();
+                    .refreshRequest(builder -> {
+                        
builder.roleArn(roleArn).roleSessionName("aws-sdk-java-v2-fe");
+                        if (!Strings.isNullOrEmpty(externalId)) {
+                            builder.externalId(externalId);
+                        }
+                    }).build();
         }
         return 
AwsCredentialsProviderChain.of(SystemPropertyCredentialsProvider.create(),
                     EnvironmentVariableCredentialsProvider.create(),
@@ -167,7 +172,8 @@ public class S3Util {
                 .putAdvancedOption(SdkAdvancedClientOption.SIGNER, 
AwsS3V4Signer.create())
                 .build();
         return S3Client.builder()
-                .httpClient(UrlConnectionHttpClient.create())
+                
.httpClient(UrlConnectionHttpClient.builder().socketTimeout(Duration.ofSeconds(30))
+                        .connectionTimeout(Duration.ofSeconds(30)).build())
                 .endpointOverride(endpoint)
                 .credentialsProvider(getAwsCredencialsProvider(endpoint, 
region, accessKey, secretKey,
                         sessionToken, roleArn, externalId))
@@ -180,4 +186,20 @@ public class S3Util {
                         .build())
                 .build();
     }
+
+    public static String getLongestPrefix(String globPattern) {
+        int length = globPattern.length();
+        int earliestSpecialCharIndex = length;
+
+        char[] specialChars = {'*', '?', '[', '{', '\\'};
+
+        for (char specialChar : specialChars) {
+            int index = globPattern.indexOf(specialChar);
+            if (index != -1 && index < earliestSpecialCharIndex) {
+                earliestSpecialCharIndex = index;
+            }
+        }
+
+        return globPattern.substring(0, earliestSpecialCharIndex);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
index 47ae235e3e1..aa22a83e1b0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
@@ -36,6 +36,7 @@ import 
org.apache.doris.datasource.property.constants.PaimonProperties;
 import org.apache.doris.datasource.property.constants.S3Properties;
 
 import com.aliyun.datalake.metastore.common.DataLakeConfig;
+import com.amazonaws.auth.InstanceProfileCredentialsProvider;
 import com.amazonaws.glue.catalog.util.AWSGlueConfig;
 import com.google.common.base.Strings;
 import com.google.common.collect.Maps;
@@ -48,6 +49,7 @@ import org.apache.hadoop.fs.obs.OBSFileSystem;
 import org.apache.hadoop.fs.s3a.Constants;
 import org.apache.hadoop.fs.s3a.S3AFileSystem;
 import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider;
+import org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -225,6 +227,15 @@ public class PropertyConverter {
         if (properties.containsKey(S3Properties.Env.CONNECTION_TIMEOUT_MS)) {
             properties.put(S3Properties.REQUEST_TIMEOUT_MS, 
properties.get(S3Properties.Env.CONNECTION_TIMEOUT_MS));
         }
+
+        if (properties.containsKey(S3Properties.Env.ROLE_ARN)) {
+            properties.put(S3Properties.ROLE_ARN, 
properties.get(S3Properties.Env.ROLE_ARN));
+        }
+
+        if (properties.containsKey(S3Properties.Env.EXTERNAL_ID)) {
+            properties.put(S3Properties.EXTERNAL_ID, 
properties.get(S3Properties.Env.EXTERNAL_ID));
+        }
+
         if (isMeta) {
             return properties;
         }
@@ -252,10 +263,12 @@ public class PropertyConverter {
         if (properties.containsKey(S3Properties.CONNECTION_TIMEOUT_MS)) {
             s3Properties.put(Constants.SOCKET_TIMEOUT, 
properties.get(S3Properties.CONNECTION_TIMEOUT_MS));
         }
+
         setS3FsAccess(s3Properties, properties, credential);
         s3Properties.putAll(properties);
         // remove extra meta properties
         S3Properties.FS_KEYS.forEach(s3Properties::remove);
+
         return s3Properties;
     }
 
@@ -291,6 +304,21 @@ public class PropertyConverter {
                 s3Properties.put(entry.getKey(), entry.getValue());
             }
         }
+
+        if (properties.containsKey(S3Properties.ROLE_ARN)
+                && 
!Strings.isNullOrEmpty(properties.get(S3Properties.ROLE_ARN))) {
+            // refer to 
https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/assumed_roles.html
+            //          https://issues.apache.org/jira/browse/HADOOP-19201
+            s3Properties.put(Constants.AWS_CREDENTIALS_PROVIDER, 
AssumedRoleCredentialProvider.class.getName());
+            s3Properties.put(Constants.ASSUMED_ROLE_ARN, 
properties.get(S3Properties.ROLE_ARN));
+            s3Properties.put(Constants.ASSUMED_ROLE_CREDENTIALS_PROVIDER,
+                    InstanceProfileCredentialsProvider.class.getName());
+
+            if (properties.containsKey(S3Properties.EXTERNAL_ID)
+                    && 
!Strings.isNullOrEmpty(properties.get(S3Properties.EXTERNAL_ID))) {
+                LOG.warn("External ID is not supported for assumed role 
credential provider");
+            }
+        }
     }
 
     public static String getAWSCredentialsProviders(Map<String, String> 
properties) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
index 41f4de716e9..566546d0e5c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
@@ -86,8 +86,8 @@ public class S3Properties extends BaseProperties {
             SimpleAWSCredentialsProvider.class.getName(),
             EnvironmentVariableCredentialsProvider.class.getName(),
             SystemPropertiesCredentialsProvider.class.getName(),
-            ProfileCredentialsProvider.class.getName(),
             InstanceProfileCredentialsProvider.class.getName(),
+            ProfileCredentialsProvider.class.getName(),
             WebIdentityTokenCredentialsProvider.class.getName(),
             IAMInstanceCredentialsProvider.class.getName());
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 7e4e0e5fd02..70c4e754eeb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -21,6 +21,7 @@ import org.apache.doris.backup.Status;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.S3URI;
+import org.apache.doris.common.util.S3Util;
 import org.apache.doris.datasource.property.PropertyConverter;
 import org.apache.doris.datasource.property.constants.AzureProperties;
 import org.apache.doris.datasource.property.constants.S3Properties;
@@ -314,22 +315,6 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
         return String.format("s3://%s/%s", bucket, fileName);
     }
 
-    public static String getLongestPrefix(String globPattern) {
-        int length = globPattern.length();
-        int earliestSpecialCharIndex = length;
-
-        char[] specialChars = {'*', '?', '[', '{', '\\'};
-
-        for (char specialChar : specialChars) {
-            int index = globPattern.indexOf(specialChar);
-            if (index != -1 && index < earliestSpecialCharIndex) {
-                earliestSpecialCharIndex = index;
-            }
-        }
-
-        return globPattern.substring(0, earliestSpecialCharIndex);
-    }
-
     public Status globList(String remotePath, List<RemoteFile> result, boolean 
fileNameOnly) {
         long roundCnt = 0;
         long elementCnt = 0;
@@ -347,7 +332,7 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
             PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pathPattern.toString());
 
             HashSet<String> directorySet = new HashSet<>();
-            String listPrefix = getLongestPrefix(globPath);
+            String listPrefix = S3Util.getLongestPrefix(globPath);
             LOG.info("azure glob list prefix is {}", listPrefix);
             ListBlobsOptions options = new 
ListBlobsOptions().setPrefix(listPrefix);
             String newContinuationToken = null;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 6adb98f1d7f..1a3679171f4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -24,6 +24,7 @@ import org.apache.doris.common.util.S3URI;
 import org.apache.doris.common.util.S3Util;
 import org.apache.doris.datasource.property.PropertyConverter;
 import org.apache.doris.datasource.property.constants.S3Properties;
+import org.apache.doris.fs.remote.RemoteFile;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.tuple.Triple;
@@ -64,8 +65,12 @@ import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.InputStream;
 import java.net.URI;
+import java.nio.file.FileSystems;
+import java.nio.file.PathMatcher;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
@@ -393,4 +398,100 @@ public class S3ObjStorage implements ObjStorage<S3Client> 
{
         }
         return st;
     }
+
+    ListObjectsV2Response listObjectsV2(ListObjectsV2Request request) throws 
UserException {
+        return getClient().listObjectsV2(request);
+    }
+
+    /**
+     * List all files under the given path with glob pattern.
+     * For example, if the path is "s3://bucket/path/to/*.csv",
+     * it will list all files under "s3://bucket/path/to/" with ".csv" suffix.
+     *
+     * Copy from `AzureObjStorage.GlobList`
+     */
+    public Status globList(String remotePath, List<RemoteFile> result, boolean 
fileNameOnly) {
+        long roundCnt = 0;
+        long elementCnt = 0;
+        long matchCnt = 0;
+        long startTime = System.nanoTime();
+        try {
+            S3URI uri = S3URI.create(remotePath, isUsePathStyle, 
forceParsingByStandardUri);
+            String bucket = uri.getBucket();
+            String globPath = uri.getKey(); // eg: path/to/*.csv
+
+            LOG.info("globList globPath:{}, remotePath:{}", globPath, 
remotePath);
+
+            java.nio.file.Path pathPattern = Paths.get(globPath);
+            PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pathPattern);
+            HashSet<String> directorySet = new HashSet<>();
+
+            String listPrefix = S3Util.getLongestPrefix(globPath); // similar 
to Azure
+            LOG.info("globList listPrefix: {}", listPrefix);
+
+            ListObjectsV2Request request = ListObjectsV2Request.builder()
+                    .bucket(bucket)
+                    .prefix(listPrefix)
+                    .build();
+
+            boolean isTruncated = false;
+            do {
+                roundCnt++;
+                ListObjectsV2Response response = listObjectsV2(request);
+                for (S3Object obj : response.contents()) {
+                    elementCnt++;
+                    java.nio.file.Path objPath = Paths.get(obj.key());
+
+                    boolean isPrefix = false;
+                    while (objPath != null && 
objPath.normalize().toString().startsWith(listPrefix)) {
+                        if (!matcher.matches(objPath)) {
+                            isPrefix = true;
+                            objPath = objPath.getParent();
+                            continue;
+                        }
+                        if 
(directorySet.contains(objPath.normalize().toString())) {
+                            break;
+                        }
+                        if (isPrefix) {
+                            directorySet.add(objPath.normalize().toString());
+                        }
+
+                        matchCnt++;
+                        RemoteFile remoteFile = new RemoteFile(
+                                fileNameOnly ? 
objPath.getFileName().toString() :
+                                        "s3://" + bucket + "/" + 
objPath.toString(),
+                                !isPrefix,
+                                isPrefix ? -1 : obj.size(),
+                                isPrefix ? -1 : obj.size(),
+                                isPrefix ? 0 : 
obj.lastModified().toEpochMilli()
+                        );
+                        result.add(remoteFile);
+                        objPath = objPath.getParent();
+                        isPrefix = true;
+                    }
+                }
+
+                isTruncated = response.isTruncated();
+                if (isTruncated) {
+                    request = request.toBuilder()
+                        .continuationToken(response.nextContinuationToken())
+                        .build();
+                }
+            } while (isTruncated);
+
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("remotePath:{}, result:{}", remotePath, result);
+            }
+            return Status.OK;
+        } catch (Exception e) {
+            LOG.warn("Errors while getting file status", e);
+            return new Status(Status.ErrCode.COMMON_ERROR, "Errors while 
getting file status " + e.getMessage());
+        } finally {
+            long endTime = System.nanoTime();
+            long duration = endTime - startTime;
+            LOG.info("process {} elements under prefix {} for {} round, match 
{} elements, take {} ms",
+                    elementCnt, remotePath, roundCnt, matchCnt,
+                    duration / 1000 / 1000);
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFile.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFile.java
index 5904033dec7..1f6f0225278 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFile.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/RemoteFile.java
@@ -101,6 +101,17 @@ public class RemoteFile {
 
     @Override
     public String toString() {
-        return "[name: " + name + ", is file: " + isFile + "]";
+        StringBuilder sb = new StringBuilder();
+        sb.append("RemoteFile [");
+        sb.append("name: ").append(name);
+        sb.append(", isFile: ").append(isFile);
+        sb.append(", isDirectory: ").append(isDirectory);
+        sb.append(", size: ").append(size);
+        sb.append(", blockSize: ").append(blockSize);
+        sb.append(", modificationTime: ").append(modificationTime);
+        sb.append(", path: ").append(path);
+        sb.append("]");
+
+        return sb.toString();
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
index be53ffde2e0..7a7a426c470 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
@@ -23,11 +23,13 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.common.security.authentication.AuthenticationConfig;
 import org.apache.doris.common.security.authentication.HadoopAuthenticator;
 import org.apache.doris.datasource.property.PropertyConverter;
+import org.apache.doris.datasource.property.constants.S3Properties;
 import org.apache.doris.fs.obj.S3ObjStorage;
 import org.apache.doris.fs.remote.dfs.DFSFileSystem;
 
 import com.amazonaws.services.s3.model.AmazonS3Exception;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Strings;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -102,8 +104,7 @@ public class S3FileSystem extends ObjFileSystem {
     }
 
     // broker file pattern glob is too complex, so we use hadoop directly
-    @Override
-    public Status globList(String remotePath, List<RemoteFile> result, boolean 
fileNameOnly) {
+    private Status globListImplV1(String remotePath, List<RemoteFile> result, 
boolean fileNameOnly) {
         try {
             FileSystem s3AFileSystem = nativeFileSystem(remotePath);
             Path pathPattern = new Path(remotePath);
@@ -118,6 +119,10 @@ public class S3FileSystem extends ObjFileSystem {
                         fileStatus.getBlockSize(), 
fileStatus.getModificationTime());
                 result.add(remoteFile);
             }
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("remotePath:{}, result:{}", remotePath, result);
+            }
+
         } catch (FileNotFoundException e) {
             LOG.info("file not found: " + e.getMessage());
             return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + 
e.getMessage());
@@ -139,6 +144,24 @@ public class S3FileSystem extends ObjFileSystem {
         return Status.OK;
     }
 
+    private Status globListImplV2(String remotePath, List<RemoteFile> result, 
boolean fileNameOnly) {
+        return ((S3ObjStorage) objStorage).globList(remotePath, result, 
fileNameOnly);
+    }
+
+    @Override
+    public Status globList(String remotePath, List<RemoteFile> result, boolean 
fileNameOnly) {
+        if (!Strings.isNullOrEmpty(properties.get(S3Properties.ROLE_ARN))
+                || 
!Strings.isNullOrEmpty(properties.get(S3Properties.Env.ROLE_ARN))) {
+            // https://issues.apache.org/jira/browse/HADOOP-19201
+            // hadoop 3.3.6 we used now, not support aws assumed role with 
external id, so we
+            // write a globListImplV2 to support it
+            LOG.info("aws role arn mode, use globListImplV2");
+            return globListImplV2(remotePath, result, fileNameOnly);
+        }
+
+        return globListImplV1(remotePath, result, fileNameOnly);
+    }
+
     @VisibleForTesting
     public HadoopAuthenticator getAuthenticator() {
         return authenticator;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java
new file mode 100644
index 00000000000..06fa47c361c
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/fs/obj/S3ObjStorageGlobListTest.java
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.fs.obj;
+
+import org.apache.doris.backup.Status;
+import org.apache.doris.datasource.property.constants.S3Properties;
+import org.apache.doris.fs.remote.RemoteFile;
+
+import mockit.Mock;
+import mockit.MockUp;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
+import software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
+import software.amazon.awssdk.services.s3.model.S3Object;
+
+import java.nio.file.FileSystems;
+import java.nio.file.PathMatcher;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+public class S3ObjStorageGlobListTest {
+    private static final Logger LOG = 
LogManager.getLogger(S3ObjStorageGlobListTest.class);
+
+    public static class I {
+        public String pattern;
+        public Long expectedMatchSize;
+
+        public I(String pattern, long expectedMatchSize) {
+            this.pattern = pattern;
+            this.expectedMatchSize = expectedMatchSize;
+        }
+    }
+
+    public static List<I> genInputs() {
+        // refer genObjectKeys
+        List<I> inputs = new ArrayList<I>();
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/*/tmp*", 8196L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/tmp*", 4098L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/*tmp*", 4098L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/**/tmp*", 20490L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/**/tmp*", 32784L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/*", 3L)); // no files at 
1st level
+        inputs.add(new I("s3://gavin-test-jp/s3-test/2/*", 4098L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/2*/*", 4098L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/2/*I*", 591L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1", 1L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/2", 1L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/3", 1L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/tmp.k*", 61L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/tmp.[a-z]*", 1722L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/[12]/tmp.[a-z]*", 3444L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/2/tmp.ehJi0Y5QKt", 1L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/non-existed-dir", 0L));
+        inputs.add(new I("s3://gavin-test-jp/s3-test/1/2/non-existed-file", 
0L));
+        inputs.add(new 
I("s3://gavin-test-jp/s3_backup/__palo_repository_s3_repo1/__ss_*/*", 1L));
+
+        return inputs;
+    }
+
+    private static List<String> genObjKeys() {
+        String fileSuffixes = "zMkg8GtbSL MaSolJd8sL gtujCOzywm XBSnMwdoa9 
FTxFgeihCa k9mp5K23pY dxjWuBC0dz osaFdxo2mz h4PEGleain aGxFstkIZz lGZbBe35uE 
gRILmhSPVm Ta3S5IUQiC 730eZqyhum XUhMhqk0DG cL7VKXMQuN DOb5J5MUFV i2dg8BSuTE 
FIzNOcoekf N24tUXQ8ws c31UcFGP1S S7bkoinC5u CCOKe8YL1N b5qGztFktP C7M1G8mYIP 
0JA5yp6VAt lfJpMUYDe5 vXVOGgbZD3 UtM44M7c6F gPcli9bsQ4 cKP1TAjXfC v5k8Wksy9k 
UNkJtnIWwV 4UB0gbsI9g U4yKzgRbkG XcYaYUubBH bNVYZKq422 AvUoK2bcwS 5t6QDuG5ox 
Hj8lPHhsZL 08KZbcvC8P 5nYfmpu0Xj [...]
+        List<String> ret = new ArrayList<>();
+        for (String i : fileSuffixes.split(" ")) {
+            ret.add("s3-test/1/tmp." + i);
+            ret.add("s3-test/1/2/tmp." + i);
+            ret.add("s3-test/1/2/1/tmp." + i);
+            ret.add("s3-test/1/2/2/tmp." + i);
+            ret.add("s3-test/1/2/3/tmp." + i);
+            ret.add("s3-test/1/3/tmp." + i);
+            ret.add("s3-test/2/tmp." + i);
+            ret.add("s3-test/3/tmp." + i);
+        }
+        
ret.add("s3_backup/__palo_repository_s3_repo1/__ss_yyq/__meta.ff9b669c1505f51993d5fb448a345811");
+        ret.sort(String::compareTo);
+        return ret;
+    }
+
+    @Test
+    public void testFsGlob() {
+        for (I i : genInputs()) {
+            String pattern = i.pattern.substring(19); // remove prefix 
s3://gavin-test-jp/
+            PathMatcher matcher = 
FileSystems.getDefault().getPathMatcher("glob:" + pattern);
+            List<String> matchedPaths = new ArrayList<>();
+            HashSet<String> directories = new HashSet<>();
+            for (String p : genObjKeys()) {
+                java.nio.file.Path objPath = Paths.get(p);
+
+                while (objPath != null) {
+                    if (matcher.matches(objPath) && 
!directories.contains(objPath.toString())) {
+                        matchedPaths.add(objPath.toString());
+                        directories.add(objPath.toString());
+                    }
+                    objPath = objPath.getParent();
+                }
+            }
+            LOG.info("pattern: {}, matched:{}", i.pattern, 
matchedPaths.size());
+            Assertions.assertEquals(i.expectedMatchSize, matchedPaths.size());
+        }
+    }
+
+    public static S3ObjStorage genMockedS3ObjStorage(int numBatch) {
+        Map<String, String> props = new HashMap<String, String>();
+        props.put(S3Properties.ROLE_ARN, "test_mocked_arn");
+        props.put(S3Properties.ENDPOINT, "https://s3.us-east-1.amazonaws.com";);
+        props.put(S3Properties.BUCKET, "test_mocked_bucket");
+        List<String> allObjKeys = genObjKeys();
+
+        new MockUp<S3ObjStorage>(S3ObjStorage.class) {
+            @Mock
+            ListObjectsV2Response listObjectsV2(ListObjectsV2Request 
listObjectsV2Request) {
+                ListObjectsV2Response.Builder builder = 
ListObjectsV2Response.builder();
+                List<S3Object> s3Objects = new ArrayList<>();
+                String prefix = listObjectsV2Request.prefix();
+                String continuationToken = 
listObjectsV2Request.continuationToken();
+                int startIndex = 0;
+                if (continuationToken != null) {
+                    startIndex = Integer.parseInt(continuationToken);
+                }
+                int endIndex = Math.min(startIndex + numBatch, 
allObjKeys.size());
+                for (int i = startIndex; i < endIndex; ++i) {
+                    String objKey = allObjKeys.get(i);
+                    if (prefix != null && !objKey.startsWith(prefix)) {
+                        continue;
+                    }
+                    s3Objects.add(S3Object.builder().key(objKey).size(1024L)
+                            
.lastModified(Instant.ofEpochMilli(1596476903000L)).build());
+                }
+                builder.contents(s3Objects);
+                if (endIndex < allObjKeys.size()) {
+                    builder.isTruncated(true);
+                    builder.nextContinuationToken(String.valueOf(endIndex));
+                } else {
+                    builder.isTruncated(false);
+                    builder.nextContinuationToken(null);
+                }
+                return builder.build();
+            }
+        };
+
+        S3ObjStorage s3ObjStorage = new S3ObjStorage(props);
+        return s3ObjStorage;
+    }
+
+    @Test
+    public void testGlobListWithMockedS3Storage() {
+        List<I> inputs = genInputs();
+        inputs.stream().forEach(i -> {
+            S3ObjStorage s3ObjStorage = genMockedS3ObjStorage(4);
+            List<RemoteFile> result = new ArrayList<RemoteFile>();
+            boolean fileNameOnly = false;
+            Status st = s3ObjStorage.globList(i.pattern, result, fileNameOnly);
+            System.out.println("testGlobListWithMockedS3Storage pattern: " + 
i.pattern + " matched " + result.size());
+            Assertions.assertTrue(st.ok());
+            Assertions.assertEquals(i.expectedMatchSize, result.size());
+            for (int j = 0; j < result.size() && j < 10; ++j) {
+                System.out.println(result.get(j).getName());
+            }
+
+        });
+    }
+}
diff --git a/fe/pom.xml b/fe/pom.xml
index 7340dd7fd20..8343d5d138c 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -1499,6 +1499,11 @@ under the License.
                 <artifactId>aws-java-sdk-logs</artifactId>
                 <version>${aws-java-sdk.version}</version>
             </dependency>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-sts</artifactId>
+                <version>${aws-java-sdk.version}</version>
+            </dependency>
             <dependency>
                 <groupId>org.mariadb.jdbc</groupId>
                 <artifactId>mariadb-java-client</artifactId>
diff --git a/regression-test/suites/vault_p0/create/test_create_vault.groovy 
b/regression-test/suites/vault_p0/create/test_create_vault.groovy
index c0128291f82..502782a6cc1 100644
--- a/regression-test/suites/vault_p0/create/test_create_vault.groovy
+++ b/regression-test/suites/vault_p0/create/test_create_vault.groovy
@@ -233,21 +233,6 @@ suite("test_create_vault", "nonConcurrent") {
         );
     """
 
-    sql """
-        CREATE STORAGE VAULT IF NOT EXISTS ${s3VaultName}
-        PROPERTIES (
-            "type"="S3",
-            "s3.endpoint"="${getS3Endpoint()}",
-            "s3.region" = "${getS3Region()}",
-            "s3.access_key" = "${getS3AK()}",
-            "s3.secret_key" = "${getS3SK()}",
-            "s3.root.path" = "${s3VaultName}",
-            "s3.bucket" = "${getS3BucketName()}",
-            "s3.external_endpoint" = "",
-            "provider" = "${getS3Provider()}"
-        );
-    """
-
     sql """
         CREATE TABLE ${s3VaultName} (
             C_CUSTKEY     INTEGER NOT NULL,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to