This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 61a3d3b2a87 [opt](s3) Skip S3 listing for deterministic file paths 
using HEAD requests (#60414)
61a3d3b2a87 is described below

commit 61a3d3b2a87c2fc87407e7d0aac011f12809c885
Author: Yongqiang YANG <[email protected]>
AuthorDate: Sat Feb 28 00:02:00 2026 -0800

    [opt](s3) Skip S3 listing for deterministic file paths using HEAD requests 
(#60414)
    
    ## Summary
    
    - For S3 paths without wildcards (`*`, `?`, `[...]`), use HEAD requests
    instead of ListObjectsV2 to avoid requiring `s3:ListBucket` permission
    - Brace patterns like `{1..10}` are expanded to concrete file paths and
    verified individually with HEAD requests
    - This enables loading data from S3 when only `s3:GetObject` permission
    is granted
    
    ## Motivation
    
    S3 `ListBucket` permission is often more restricted than `GetObject` in
    enterprise environments. When users specify exact file paths or
    deterministic patterns like `file{1..3}.csv`, listing is unnecessary
    since the file names can be determined from the input.
    
    ## Changes
    
    | File | Description |
    |------|-------------|
    | `S3Util.java` | Added `isDeterministicPattern()` to detect paths
    without wildcards, and `expandBracePatterns()` to expand brace patterns
    to concrete paths |
    | `S3ObjStorage.java` | Modified `globListInternal()` to use HEAD
    requests for deterministic paths |
    | `S3UtilTest.java` | Added unit tests for new utility methods |
    
    ## Examples
    
    | Path | Deterministic? | Behavior |
    |------|----------------|----------|
    | `s3://bucket/data/file.csv` | ✅ Yes | Single HEAD request |
    | `s3://bucket/data/file{1..3}.csv` | ✅ Yes | 3 HEAD requests |
    | `s3://bucket/data/*.csv` | ❌ No | Falls back to LIST |
    
    ## Test Plan
    
    - [x] Added unit tests for `isDeterministicPattern()`
    - [x] Added unit tests for `expandBracePatterns()`
    - [ ] Manual testing with S3 TVF and Broker Load
    
    🤖 Generated with [Claude Code](https://claude.ai/code)
---
 .../main/java/org/apache/doris/common/Config.java  |  20 ++
 .../java/org/apache/doris/common/util/S3Util.java  | 232 +++++++++++++++++++++
 .../org/apache/doris/fs/obj/AzureObjStorage.java   |  99 ++++++++-
 .../java/org/apache/doris/fs/obj/S3ObjStorage.java | 108 ++++++++++
 .../org/apache/doris/common/util/S3UtilTest.java   | 208 ++++++++++++++++++
 5 files changed, 666 insertions(+), 1 deletion(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 2b06fcc5a80..45735468dac 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3508,6 +3508,26 @@ public class Config extends ConfigBase {
             + "for example: s3_load_endpoint_white_list=a,b,c"})
     public static String[] s3_load_endpoint_white_list = {};
 
+    @ConfField(mutable = true, description = {
+            "对于确定性的 S3 路径(无通配符如 *, ?),使用 HEAD 请求代替 ListObjects 来避免需要 
ListBucket 权限。"
+            + "花括号模式 {1,2,3} 和非否定方括号模式 [abc] 会展开为具体路径。"
+            + "这对于只有 GetObject 权限的场景很有用。如果遇到问题可以设置为 false 回退到原有行为。",
+            "For deterministic S3 paths (without wildcards like *, ?), use 
HEAD requests instead of "
+            + "ListObjects to avoid requiring ListBucket permission. Brace 
patterns {1,2,3} and "
+            + "non-negated bracket patterns [abc] are expanded to concrete 
paths. This is useful when only "
+            + "GetObject permission is granted. Set to false to fall back to 
the original listing behavior."
+    })
+    public static boolean s3_skip_list_for_deterministic_path = true;
+
+    @ConfField(mutable = true, description = {
+            "当使用 HEAD 请求代替 ListObjects 时,展开路径的最大数量。如果展开的路径数量超过此限制,"
+            + "将回退到使用 ListObjects。这可以防止类似 {1..100}/{1..100} 的模式触发过多的 HEAD 请求。",
+            "Maximum number of expanded paths when using HEAD requests instead 
of ListObjects. "
+            + "If the expanded path count exceeds this limit, falls back to 
ListObjects. "
+            + "This prevents patterns like {1..100}/{1..100} from triggering 
too many HEAD requests."
+    })
+    public static int s3_head_request_max_paths = 100;
+
     @ConfField(mutable = true, description = {
             "此参数控制是否强制使用 Azure global endpoint。默认值为 false,系统将使用用户指定的 endpoint。"
             + "如果设置为 true,系统将强制使用 {account}.blob.core.windows.net。",
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index e537d1f47b0..3e4f4e7a62f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -433,4 +433,236 @@ public class S3Util {
             SecurityChecker.getInstance().stopSSRFChecking();
         }
     }
+
+    /**
+     * Check if a path pattern is deterministic, meaning all file paths can be 
determined
+     * without listing. A pattern is deterministic if it contains no true 
wildcard characters
+     * (*, ?) but may contain brace patterns ({...}) and non-negated bracket 
patterns ([abc], [0-9])
+     * which can be expanded to concrete paths.
+     *
+     * Negated bracket patterns ([!abc], [^abc]) are NOT deterministic because 
they match
+     * any character except those listed, requiring a listing to discover 
matches.
+     *
+     * This allows skipping S3 ListBucket operations when only GetObject 
permission is available.
+     *
+     * @param pathPattern Path that may contain glob patterns
+     * @return true if the pattern is deterministic (expandable without 
listing)
+     */
+    public static boolean isDeterministicPattern(String pathPattern) {
+        // Check for wildcard characters that require listing
+        // Note: '{' is NOT a wildcard - it's a brace expansion pattern that 
can be deterministically expanded
+        // Note: '[' is conditionally deterministic - [abc] can be expanded, 
but [!abc]/[^abc] cannot
+        char[] wildcardChars = {'*', '?'};
+        for (char c : wildcardChars) {
+            if (pathPattern.indexOf(c) != -1) {
+                return false;
+            }
+        }
+        // Check for escaped characters which indicate complex patterns
+        if (pathPattern.indexOf('\\') != -1) {
+            return false;
+        }
+        // Check bracket patterns: [abc] and [0-9] are deterministic, [!abc] 
and [^abc] are not
+        if (!areBracketPatternsDeterministic(pathPattern)) {
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Check if all bracket patterns in the path are deterministic 
(non-negated).
+     * - [abc], [0-9], [a-zA-Z] are deterministic (can be expanded to finite 
character sets)
+     * - [!abc], [^abc] are non-deterministic (negation requires listing)
+     * - Malformed brackets (no closing ]) are non-deterministic
+     */
+    private static boolean areBracketPatternsDeterministic(String pattern) {
+        int i = 0;
+        while (i < pattern.length()) {
+            if (pattern.charAt(i) == '[') {
+                int end = pattern.indexOf(']', i + 1);
+                if (end == -1) {
+                    // Malformed bracket - no closing ], treat as 
non-deterministic
+                    return false;
+                }
+                int contentStart = i + 1;
+                if (contentStart == end) {
+                    // Empty brackets [] - malformed, treat as 
non-deterministic
+                    return false;
+                }
+                // Check for negation
+                char first = pattern.charAt(contentStart);
+                if (first == '!' || first == '^') {
+                    return false;
+                }
+                i = end + 1;
+            } else {
+                i++;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Expand bracket character class patterns to brace patterns.
+     * This converts [abc] to {a,b,c} and [0-9] to {0,1,2,...,9} so that
+     * the existing brace expansion can handle them.
+     *
+     * Only call this on patterns already verified as deterministic by 
isDeterministicPattern()
+     * (i.e., no negated brackets like [!...] or [^...]).
+     *
+     * Examples:
+     *   - "file[abc].csv" => "file{a,b,c}.csv"
+     *   - "file[0-9].csv" => "file{0,1,2,3,4,5,6,7,8,9}.csv"
+     *   - "file[a-cX].csv" => "file{a,b,c,X}.csv"
+     *   - "file.csv" => "file.csv" (no brackets)
+     *
+     * @param pathPattern Path with optional bracket patterns (must not 
contain negated brackets)
+     * @return Path with brackets converted to brace patterns
+     */
+    public static String expandBracketPatterns(String pathPattern) {
+        StringBuilder result = new StringBuilder();
+        int i = 0;
+        while (i < pathPattern.length()) {
+            if (pathPattern.charAt(i) == '[') {
+                int end = pathPattern.indexOf(']', i + 1);
+                if (end == -1) {
+                    // Malformed, keep as-is
+                    result.append(pathPattern.charAt(i));
+                    i++;
+                    continue;
+                }
+                String content = pathPattern.substring(i + 1, end);
+                List<Character> chars = expandBracketContent(content);
+                result.append('{');
+                for (int j = 0; j < chars.size(); j++) {
+                    if (j > 0) {
+                        result.append(',');
+                    }
+                    result.append(chars.get(j));
+                }
+                result.append('}');
+                i = end + 1;
+            } else {
+                result.append(pathPattern.charAt(i));
+                i++;
+            }
+        }
+        return result.toString();
+    }
+
+    private static List<Character> expandBracketContent(String content) {
+        List<Character> chars = new ArrayList<>();
+        int i = 0;
+        while (i < content.length()) {
+            if (i + 2 < content.length() && content.charAt(i + 1) == '-') {
+                // Range like a-z or 0-9
+                char start = content.charAt(i);
+                char end = content.charAt(i + 2);
+                if (start <= end) {
+                    for (char c = start; c <= end; c++) {
+                        if (!chars.contains(c)) {
+                            chars.add(c);
+                        }
+                    }
+                } else {
+                    for (char c = start; c >= end; c--) {
+                        if (!chars.contains(c)) {
+                            chars.add(c);
+                        }
+                    }
+                }
+                i += 3;
+            } else {
+                char c = content.charAt(i);
+                if (!chars.contains(c)) {
+                    chars.add(c);
+                }
+                i++;
+            }
+        }
+        return chars;
+    }
+
+    /**
+     * Expand brace patterns in a path to generate all concrete file paths.
+     * Handles nested and multiple brace patterns.
+     *
+     * Examples:
+     *   - "file{1,2,3}.csv" => ["file1.csv", "file2.csv", "file3.csv"]
+     *   - "data/part{1..3}/file.csv" => ["data/part1/file.csv", 
"data/part2/file.csv", "data/part3/file.csv"]
+     *   - "file.csv" => ["file.csv"] (no braces)
+     *
+     * @param pathPattern Path with optional brace patterns (already processed 
by extendGlobs)
+     * @return List of expanded concrete paths
+     */
+    public static List<String> expandBracePatterns(String pathPattern) {
+        List<String> result = new ArrayList<>();
+        expandBracePatternsRecursive(pathPattern, result);
+        return result;
+    }
+
+    private static void expandBracePatternsRecursive(String pattern, 
List<String> result) {
+        int braceStart = pattern.indexOf('{');
+        if (braceStart == -1) {
+            // No more braces, add the pattern as-is
+            result.add(pattern);
+            return;
+        }
+
+        // Find matching closing brace (handle nested braces)
+        int braceEnd = findMatchingBrace(pattern, braceStart);
+        if (braceEnd == -1) {
+            // Malformed pattern, treat as literal
+            result.add(pattern);
+            return;
+        }
+
+        String prefix = pattern.substring(0, braceStart);
+        String braceContent = pattern.substring(braceStart + 1, braceEnd);
+        String suffix = pattern.substring(braceEnd + 1);
+
+        // Split by comma, but respect nested braces
+        List<String> alternatives = splitBraceContent(braceContent);
+
+        for (String alt : alternatives) {
+            // Recursively expand any remaining braces in the suffix
+            expandBracePatternsRecursive(prefix + alt + suffix, result);
+        }
+    }
+
+    private static int findMatchingBrace(String pattern, int start) {
+        int depth = 0;
+        for (int i = start; i < pattern.length(); i++) {
+            char c = pattern.charAt(i);
+            if (c == '{') {
+                depth++;
+            } else if (c == '}') {
+                depth--;
+                if (depth == 0) {
+                    return i;
+                }
+            }
+        }
+        return -1;
+    }
+
+    private static List<String> splitBraceContent(String content) {
+        List<String> parts = new ArrayList<>();
+        int depth = 0;
+        int start = 0;
+
+        for (int i = 0; i < content.length(); i++) {
+            char c = content.charAt(i);
+            if (c == '{') {
+                depth++;
+            } else if (c == '}') {
+                depth--;
+            } else if (c == ',' && depth == 0) {
+                parts.add(content.substring(start, i));
+                start = i + 1;
+            }
+        }
+        parts.add(content.substring(start));
+        return parts;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 6b0c198d841..4929e34e7f5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -18,6 +18,7 @@
 package org.apache.doris.fs.obj;
 
 import org.apache.doris.backup.Status;
+import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.S3URI;
@@ -357,8 +358,24 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
         try {
             remotePath = 
AzurePropertyUtils.validateAndNormalizeUri(remotePath);
             S3URI uri = S3URI.create(remotePath, isUsePathStyle, 
forceParsingByStandardUri);
-            String globPath = S3Util.extendGlobs(uri.getKey());
             String bucket = uri.getBucket();
+
+            // Optimization: For deterministic paths (no wildcards like *, ?),
+            // use getProperties requests instead of listing to avoid 
requiring list permission.
+            // Controlled by config: s3_skip_list_for_deterministic_path
+            // Note: Skip when using path style (see S3ObjStorage for detailed 
explanation)
+            String keyPattern = uri.getKey();
+            if (Config.s3_skip_list_for_deterministic_path
+                    && !isUsePathStyle
+                    && S3Util.isDeterministicPattern(keyPattern)) {
+                Status headStatus = globListByGetProperties(bucket, 
keyPattern, result, fileNameOnly, startTime);
+                if (headStatus != null) {
+                    return headStatus;
+                }
+                // If headStatus is null, fall through to use listing
+            }
+
+            String globPath = S3Util.extendGlobs(uri.getKey());
             if (LOG.isDebugEnabled()) {
                 LOG.debug("try to glob list for azure, remote path {}, orig 
{}", globPath, remotePath);
             }
@@ -436,6 +453,86 @@ public class AzureObjStorage implements 
ObjStorage<BlobServiceClient> {
         return st;
     }
 
+    /**
+     * Get file metadata using getProperties requests for deterministic paths.
+     * This avoids requiring list permission when only read permission is 
granted.
+     *
+     * @param bucket       Azure container name
+     * @param keyPattern   The key pattern (may contain {..} brace or [...] 
bracket patterns but no wildcards)
+     * @param result       List to store matching RemoteFile objects
+     * @param fileNameOnly If true, only store file names; otherwise store 
full paths
+     * @param startTime    Start time for logging duration
+     * @return Status if successful, null if should fall back to listing
+     */
+    private Status globListByGetProperties(String bucket, String keyPattern,
+            List<RemoteFile> result, boolean fileNameOnly, long startTime) {
+        try {
+            // First expand [...] brackets to {...} braces, then expand {..} 
ranges, then expand braces
+            String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
+            expandedPattern = S3Util.extendGlobs(expandedPattern);
+            List<String> expandedPaths = 
S3Util.expandBracePatterns(expandedPattern);
+
+            // Fall back to listing if too many paths to avoid overwhelming 
Azure with requests
+            // Controlled by config: s3_head_request_max_paths
+            if (expandedPaths.size() > Config.s3_head_request_max_paths) {
+                LOG.info("Expanded path count {} exceeds limit {}, falling 
back to LIST",
+                        expandedPaths.size(), 
Config.s3_head_request_max_paths);
+                return null;
+            }
+
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("Using getProperties requests for deterministic path 
pattern, expanded to {} paths",
+                        expandedPaths.size());
+            }
+
+            BlobContainerClient containerClient = 
getClient().getBlobContainerClient(bucket);
+            long matchCnt = 0;
+            for (String key : expandedPaths) {
+                String fullPath = constructS3Path(key, bucket);
+                try {
+                    BlobClient blobClient = containerClient.getBlobClient(key);
+                    BlobProperties props = blobClient.getProperties();
+
+                    matchCnt++;
+                    RemoteFile remoteFile = new RemoteFile(
+                            fileNameOnly ? 
Paths.get(key).getFileName().toString() : fullPath,
+                            true, // isFile
+                            props.getBlobSize(),
+                            props.getBlobSize(),
+                            props.getLastModified() != null
+                                    ? props.getLastModified().toEpochSecond() 
: 0
+                    );
+                    result.add(remoteFile);
+
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("getProperties success for {}: size={}", 
fullPath, props.getBlobSize());
+                    }
+                } catch (BlobStorageException e) {
+                    if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND
+                            || 
BlobErrorCode.BLOB_NOT_FOUND.equals(e.getErrorCode())) {
+                        // File does not exist, skip it (this is expected for 
some expanded patterns)
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("File does not exist (skipped): {}", 
fullPath);
+                        }
+                    } else {
+                        throw e;
+                    }
+                }
+            }
+
+            if (LOG.isDebugEnabled()) {
+                long duration = System.nanoTime() - startTime;
+                LOG.debug("Deterministic path getProperties requests: checked 
{} paths, found {} files, took {} ms",
+                        expandedPaths.size(), matchCnt, duration / 1000 / 
1000);
+            }
+
+            return Status.OK;
+        } catch (Exception e) {
+            LOG.warn("Failed to use getProperties requests, falling back to 
listing: {}", e.getMessage());
+            return null;
+        }
+    }
+
     public Status listFiles(String remotePath, boolean recursive, 
List<RemoteFile> result) {
         try {
             remotePath = 
AzurePropertyUtils.validateAndNormalizeUri(remotePath);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java 
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 33694b1a3d8..b00f696dad7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -18,6 +18,7 @@
 package org.apache.doris.fs.obj;
 
 import org.apache.doris.backup.Status;
+import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.S3URI;
@@ -584,6 +585,28 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
             }
 
             bucket = uri.getBucket();
+
+            // Optimization: For deterministic paths (no wildcards like *, ?),
+            // use HEAD requests instead of listing to avoid requiring 
ListBucket permission.
+            // This is useful when only GetObject permission is granted.
+            // Controlled by config: s3_skip_list_for_deterministic_path
+            // Note: Skip when using path style because path-style parsing of 
virtual-host URLs
+            // can produce accidental HEAD successes where LIST would 
correctly fail.
+            // (e.g., http://bucket.endpoint/key with path_style=true: HEAD 
URL coincidentally
+            // matches the correct virtual-host URL, while LIST URL format is 
different and fails)
+            String keyPattern = uri.getKey();
+            if (Config.s3_skip_list_for_deterministic_path
+                    && !isUsePathStyle
+                    && S3Util.isDeterministicPattern(keyPattern)
+                    && !hasLimits && startFile == null) {
+                GlobListResult headResult = globListByHeadRequests(
+                        bucket, keyPattern, result, fileNameOnly, startTime);
+                if (headResult != null) {
+                    return headResult;
+                }
+                // If headResult is null, fall through to use listing
+            }
+
             String globPath = S3Util.extendGlobs(uri.getKey());
 
             if (LOG.isDebugEnabled()) {
@@ -705,6 +728,91 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
         }
     }
 
+    /**
+     * Get file metadata using HEAD requests for deterministic paths.
+     * This avoids requiring ListBucket permission when only GetObject 
permission is granted.
+     *
+     * @param bucket       S3 bucket name
+     * @param keyPattern   The key pattern (may contain {..} brace or [...] 
bracket patterns but no wildcards)
+     * @param result       List to store matching RemoteFile objects
+     * @param fileNameOnly If true, only store file names; otherwise store 
full S3 paths
+     * @param startTime    Start time for logging duration
+     * @return GlobListResult if successful, null if should fall back to 
listing
+     */
+    private GlobListResult globListByHeadRequests(String bucket, String 
keyPattern,
+            List<RemoteFile> result, boolean fileNameOnly, long startTime) {
+        try {
+            // First expand [...] brackets to {...} braces, then expand {..} 
ranges, then expand braces
+            String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
+            expandedPattern = S3Util.extendGlobs(expandedPattern);
+            List<String> expandedPaths = 
S3Util.expandBracePatterns(expandedPattern);
+
+            // Fall back to listing if too many paths to avoid overwhelming S3 
with HEAD requests
+            // Controlled by config: s3_head_request_max_paths
+            if (expandedPaths.size() > Config.s3_head_request_max_paths) {
+                LOG.info("Expanded path count {} exceeds limit {}, falling 
back to LIST",
+                        expandedPaths.size(), 
Config.s3_head_request_max_paths);
+                return null;
+            }
+
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("Using HEAD requests for deterministic path pattern, 
expanded to {} paths",
+                        expandedPaths.size());
+            }
+
+            long matchCnt = 0;
+            for (String key : expandedPaths) {
+                String fullPath = "s3://" + bucket + "/" + key;
+                try {
+                    HeadObjectResponse headResponse = getClient()
+                            .headObject(HeadObjectRequest.builder()
+                                    .bucket(bucket)
+                                    .key(key)
+                                    .build());
+
+                    matchCnt++;
+                    RemoteFile remoteFile = new RemoteFile(
+                            fileNameOnly ? 
Paths.get(key).getFileName().toString() : fullPath,
+                            true, // isFile
+                            headResponse.contentLength(),
+                            headResponse.contentLength(),
+                            headResponse.lastModified() != null
+                                    ? 
headResponse.lastModified().toEpochMilli() : 0
+                    );
+                    result.add(remoteFile);
+
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("HEAD success for {}: size={}", fullPath, 
headResponse.contentLength());
+                    }
+                } catch (NoSuchKeyException e) {
+                    // File does not exist, skip it (this is expected for some 
expanded patterns)
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("File does not exist (skipped): {}", 
fullPath);
+                    }
+                } catch (S3Exception e) {
+                    if (e.statusCode() == HttpStatus.SC_NOT_FOUND) {
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("File does not exist (skipped): {}", 
fullPath);
+                        }
+                    } else {
+                        throw e;
+                    }
+                }
+            }
+
+            if (LOG.isDebugEnabled()) {
+                long duration = System.nanoTime() - startTime;
+                LOG.debug("Deterministic path HEAD requests: checked {} paths, 
found {} files, took {} ms",
+                        expandedPaths.size(), matchCnt, duration / 1000 / 
1000);
+            }
+
+            return new GlobListResult(Status.OK, "", bucket, "");
+        } catch (Exception e) {
+            LOG.warn("Failed to use HEAD requests, falling back to listing: 
{}", e.getMessage());
+            return null;
+        }
+    }
+
     private static boolean reachLimit(int matchFileCnt, long matchFileSize, 
long sizeLimit, long fileNum) {
         if (matchFileCnt < 0 || sizeLimit < 0 || fileNum < 0) {
             return false;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
index 23715440e8c..4b976ed86cd 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
@@ -20,6 +20,9 @@ package org.apache.doris.common.util;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.util.Arrays;
+import java.util.List;
+
 public class S3UtilTest {
 
     @Test
@@ -248,5 +251,210 @@ public class S3UtilTest {
         String result = S3Util.extendGlobs(input);
         Assert.assertEquals(expected, result);
     }
+
+    // Tests for isDeterministicPattern
+
+    @Test
+    public void testIsDeterministicPattern_simpleFile() {
+        // Simple file path without any patterns
+        Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withBraces() {
+        // Path with brace pattern (deterministic - can be expanded)
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1,2,3}.csv"));
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1..3}.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withAsterisk() {
+        // Path with asterisk wildcard (not deterministic)
+        Assert.assertFalse(S3Util.isDeterministicPattern("path/to/*.csv"));
+        Assert.assertFalse(S3Util.isDeterministicPattern("path/*/file.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withQuestionMark() {
+        // Path with question mark wildcard (not deterministic)
+        Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file?.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withBrackets() {
+        // Non-negated bracket patterns are deterministic (can be expanded)
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file[0-9].csv"));
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file[abc].csv"));
+        
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file[a-zA-Z].csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withNegatedBrackets() {
+        // Negated bracket patterns are NOT deterministic
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[!abc].csv"));
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[^0-9].csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withMalformedBrackets() {
+        // Malformed brackets (no closing ]) are NOT deterministic
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[abc.csv"));
+        // Empty brackets [] are NOT deterministic
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[].csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_withEscape() {
+        // Path with escape character (not deterministic - complex pattern)
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file\\*.csv"));
+    }
+
+    @Test
+    public void testIsDeterministicPattern_mixed() {
+        // Path with both braces and wildcards
+        
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file{1,2}/*.csv"));
+    }
+
+    // Tests for expandBracePatterns
+
+    @Test
+    public void testExpandBracePatterns_noBraces() {
+        // No braces - returns single path
+        List<String> result = S3Util.expandBracePatterns("path/to/file.csv");
+        Assert.assertEquals(Arrays.asList("path/to/file.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_simpleBrace() {
+        // Simple brace expansion
+        List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv");
+        Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", 
"file3.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_multipleBraces() {
+        // Multiple brace expansions
+        List<String> result = 
S3Util.expandBracePatterns("dir{a,b}/file{1,2}.csv");
+        Assert.assertEquals(Arrays.asList(
+                "dira/file1.csv", "dira/file2.csv",
+                "dirb/file1.csv", "dirb/file2.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_emptyBrace() {
+        // Empty brace content
+        List<String> result = S3Util.expandBracePatterns("file{}.csv");
+        Assert.assertEquals(Arrays.asList("file.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_singleValue() {
+        // Single value in brace
+        List<String> result = S3Util.expandBracePatterns("file{1}.csv");
+        Assert.assertEquals(Arrays.asList("file1.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_withPath() {
+        // Full path with braces: 2 years × 2 months = 4 paths
+        List<String> result = 
S3Util.expandBracePatterns("data/year{2023,2024}/month{01,02}/file.csv");
+        Assert.assertEquals(4, result.size());
+        Assert.assertTrue(result.contains("data/year2023/month01/file.csv"));
+        Assert.assertTrue(result.contains("data/year2023/month02/file.csv"));
+        Assert.assertTrue(result.contains("data/year2024/month01/file.csv"));
+        Assert.assertTrue(result.contains("data/year2024/month02/file.csv"));
+    }
+
+    @Test
+    public void testExpandBracePatterns_extendedRange() {
+        // Test with extended range (after extendGlobs processing)
+        String expanded = S3Util.extendGlobs("file{1..3}.csv");
+        List<String> result = S3Util.expandBracePatterns(expanded);
+        Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv", 
"file3.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_malformedBrace() {
+        // Malformed brace pattern (no closing }) - treated as literal
+        List<String> result = S3Util.expandBracePatterns("file{1,2.csv");
+        Assert.assertEquals(Arrays.asList("file{1,2.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracePatterns_malformedBraceWithDots() {
+        // Malformed range-like pattern (no closing }) - treated as literal
+        List<String> result = S3Util.expandBracePatterns("file{1..csv");
+        Assert.assertEquals(Arrays.asList("file{1..csv"), result);
+    }
+
+    // Tests for expandBracketPatterns
+
+    @Test
+    public void testExpandBracketPatterns_noBrackets() {
+        // No brackets - returns unchanged
+        Assert.assertEquals("path/to/file.csv", 
S3Util.expandBracketPatterns("path/to/file.csv"));
+    }
+
+    @Test
+    public void testExpandBracketPatterns_simpleCharList() {
+        // [abc] => {a,b,c}
+        Assert.assertEquals("file{a,b,c}.csv", 
S3Util.expandBracketPatterns("file[abc].csv"));
+    }
+
+    @Test
+    public void testExpandBracketPatterns_charRange() {
+        // [0-3] => {0,1,2,3}
+        Assert.assertEquals("file{0,1,2,3}.csv", 
S3Util.expandBracketPatterns("file[0-3].csv"));
+    }
+
+    @Test
+    public void testExpandBracketPatterns_mixedRangeAndChars() {
+        // [a-cX] => {a,b,c,X}
+        Assert.assertEquals("file{a,b,c,X}.csv", 
S3Util.expandBracketPatterns("file[a-cX].csv"));
+    }
+
+    @Test
+    public void testExpandBracketPatterns_multipleRanges() {
+        // [a-c0-2] => {a,b,c,0,1,2}
+        Assert.assertEquals("file{a,b,c,0,1,2}.csv", 
S3Util.expandBracketPatterns("file[a-c0-2].csv"));
+    }
+
+    @Test
+    public void testExpandBracketPatterns_fullPipeline() {
+        // Full pipeline: bracket expansion -> extendGlobs -> brace expansion
+        // file[abc].csv => file{a,b,c}.csv => [filea.csv, fileb.csv, 
filec.csv]
+        String bracketExpanded = S3Util.expandBracketPatterns("file[abc].csv");
+        String globExpanded = S3Util.extendGlobs(bracketExpanded);
+        List<String> result = S3Util.expandBracePatterns(globExpanded);
+        Assert.assertEquals(Arrays.asList("filea.csv", "fileb.csv", 
"filec.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracketPatterns_withBracesAndBrackets() {
+        // Mixed brackets and braces: dir[ab]/file{1,2}.csv
+        // => dir{a,b}/file{1,2}.csv => [dira/file1.csv, dira/file2.csv, 
dirb/file1.csv, dirb/file2.csv]
+        String bracketExpanded = 
S3Util.expandBracketPatterns("dir[ab]/file{1,2}.csv");
+        Assert.assertEquals("dir{a,b}/file{1,2}.csv", bracketExpanded);
+        List<String> result = S3Util.expandBracePatterns(bracketExpanded);
+        Assert.assertEquals(Arrays.asList(
+                "dira/file1.csv", "dira/file2.csv",
+                "dirb/file1.csv", "dirb/file2.csv"), result);
+    }
+
+    @Test
+    public void testExpandBracketPatterns_digitRange() {
+        // [0-9] => {0,1,2,3,4,5,6,7,8,9}
+        String expanded = S3Util.expandBracketPatterns("part[0-9].dat");
+        List<String> result = S3Util.expandBracePatterns(expanded);
+        Assert.assertEquals(10, result.size());
+        Assert.assertTrue(result.contains("part0.dat"));
+        Assert.assertTrue(result.contains("part9.dat"));
+    }
+
+    @Test
+    public void testExpandBracketPatterns_malformedBracket() {
+        // Malformed bracket (no closing ]) - [ kept as literal
+        Assert.assertEquals("file[abc.csv", 
S3Util.expandBracketPatterns("file[abc.csv"));
+    }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to