This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 61a3d3b2a87 [opt](s3) Skip S3 listing for deterministic file paths
using HEAD requests (#60414)
61a3d3b2a87 is described below
commit 61a3d3b2a87c2fc87407e7d0aac011f12809c885
Author: Yongqiang YANG <[email protected]>
AuthorDate: Sat Feb 28 00:02:00 2026 -0800
[opt](s3) Skip S3 listing for deterministic file paths using HEAD requests
(#60414)
## Summary
- For S3 paths without wildcards (`*`, `?`, `[...]`), use HEAD requests
instead of ListObjectsV2 to avoid requiring `s3:ListBucket` permission
- Brace patterns like `{1..10}` are expanded to concrete file paths and
verified individually with HEAD requests
- This enables loading data from S3 when only `s3:GetObject` permission
is granted
## Motivation
S3 `ListBucket` permission is often more restricted than `GetObject` in
enterprise environments. When users specify exact file paths or
deterministic patterns like `file{1..3}.csv`, listing is unnecessary
since the file names can be determined from the input.
## Changes
| File | Description |
|------|-------------|
| `S3Util.java` | Added `isDeterministicPattern()` to detect paths
without wildcards, and `expandBracePatterns()` to expand brace patterns
to concrete paths |
| `S3ObjStorage.java` | Modified `globListInternal()` to use HEAD
requests for deterministic paths |
| `S3UtilTest.java` | Added unit tests for new utility methods |
## Examples
| Path | Deterministic? | Behavior |
|------|----------------|----------|
| `s3://bucket/data/file.csv` | ✅ Yes | Single HEAD request |
| `s3://bucket/data/file{1..3}.csv` | ✅ Yes | 3 HEAD requests |
| `s3://bucket/data/*.csv` | ❌ No | Falls back to LIST |
## Test Plan
- [x] Added unit tests for `isDeterministicPattern()`
- [x] Added unit tests for `expandBracePatterns()`
- [ ] Manual testing with S3 TVF and Broker Load
🤖 Generated with [Claude Code](https://claude.ai/code)
---
.../main/java/org/apache/doris/common/Config.java | 20 ++
.../java/org/apache/doris/common/util/S3Util.java | 232 +++++++++++++++++++++
.../org/apache/doris/fs/obj/AzureObjStorage.java | 99 ++++++++-
.../java/org/apache/doris/fs/obj/S3ObjStorage.java | 108 ++++++++++
.../org/apache/doris/common/util/S3UtilTest.java | 208 ++++++++++++++++++
5 files changed, 666 insertions(+), 1 deletion(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 2b06fcc5a80..45735468dac 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3508,6 +3508,26 @@ public class Config extends ConfigBase {
+ "for example: s3_load_endpoint_white_list=a,b,c"})
public static String[] s3_load_endpoint_white_list = {};
+ @ConfField(mutable = true, description = {
+ "对于确定性的 S3 路径(无通配符如 *, ?),使用 HEAD 请求代替 ListObjects 来避免需要
ListBucket 权限。"
+ + "花括号模式 {1,2,3} 和非否定方括号模式 [abc] 会展开为具体路径。"
+ + "这对于只有 GetObject 权限的场景很有用。如果遇到问题可以设置为 false 回退到原有行为。",
+ "For deterministic S3 paths (without wildcards like *, ?), use
HEAD requests instead of "
+ + "ListObjects to avoid requiring ListBucket permission. Brace
patterns {1,2,3} and "
+ + "non-negated bracket patterns [abc] are expanded to concrete
paths. This is useful when only "
+ + "GetObject permission is granted. Set to false to fall back to
the original listing behavior."
+ })
+ public static boolean s3_skip_list_for_deterministic_path = true;
+
+ @ConfField(mutable = true, description = {
+ "当使用 HEAD 请求代替 ListObjects 时,展开路径的最大数量。如果展开的路径数量超过此限制,"
+ + "将回退到使用 ListObjects。这可以防止类似 {1..100}/{1..100} 的模式触发过多的 HEAD 请求。",
+ "Maximum number of expanded paths when using HEAD requests instead
of ListObjects. "
+ + "If the expanded path count exceeds this limit, falls back to
ListObjects. "
+ + "This prevents patterns like {1..100}/{1..100} from triggering
too many HEAD requests."
+ })
+ public static int s3_head_request_max_paths = 100;
+
@ConfField(mutable = true, description = {
"此参数控制是否强制使用 Azure global endpoint。默认值为 false,系统将使用用户指定的 endpoint。"
+ "如果设置为 true,系统将强制使用 {account}.blob.core.windows.net。",
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
index e537d1f47b0..3e4f4e7a62f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java
@@ -433,4 +433,236 @@ public class S3Util {
SecurityChecker.getInstance().stopSSRFChecking();
}
}
+
+ /**
+ * Check if a path pattern is deterministic, meaning all file paths can be
determined
+ * without listing. A pattern is deterministic if it contains no true
wildcard characters
+ * (*, ?) but may contain brace patterns ({...}) and non-negated bracket
patterns ([abc], [0-9])
+ * which can be expanded to concrete paths.
+ *
+ * Negated bracket patterns ([!abc], [^abc]) are NOT deterministic because
they match
+ * any character except those listed, requiring a listing to discover
matches.
+ *
+ * This allows skipping S3 ListBucket operations when only GetObject
permission is available.
+ *
+ * @param pathPattern Path that may contain glob patterns
+ * @return true if the pattern is deterministic (expandable without
listing)
+ */
+ public static boolean isDeterministicPattern(String pathPattern) {
+ // Check for wildcard characters that require listing
+ // Note: '{' is NOT a wildcard - it's a brace expansion pattern that
can be deterministically expanded
+ // Note: '[' is conditionally deterministic - [abc] can be expanded,
but [!abc]/[^abc] cannot
+ char[] wildcardChars = {'*', '?'};
+ for (char c : wildcardChars) {
+ if (pathPattern.indexOf(c) != -1) {
+ return false;
+ }
+ }
+ // Check for escaped characters which indicate complex patterns
+ if (pathPattern.indexOf('\\') != -1) {
+ return false;
+ }
+ // Check bracket patterns: [abc] and [0-9] are deterministic, [!abc]
and [^abc] are not
+ if (!areBracketPatternsDeterministic(pathPattern)) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Check if all bracket patterns in the path are deterministic
(non-negated).
+ * - [abc], [0-9], [a-zA-Z] are deterministic (can be expanded to finite
character sets)
+ * - [!abc], [^abc] are non-deterministic (negation requires listing)
+ * - Malformed brackets (no closing ]) are non-deterministic
+ */
+ private static boolean areBracketPatternsDeterministic(String pattern) {
+ int i = 0;
+ while (i < pattern.length()) {
+ if (pattern.charAt(i) == '[') {
+ int end = pattern.indexOf(']', i + 1);
+ if (end == -1) {
+ // Malformed bracket - no closing ], treat as
non-deterministic
+ return false;
+ }
+ int contentStart = i + 1;
+ if (contentStart == end) {
+ // Empty brackets [] - malformed, treat as
non-deterministic
+ return false;
+ }
+ // Check for negation
+ char first = pattern.charAt(contentStart);
+ if (first == '!' || first == '^') {
+ return false;
+ }
+ i = end + 1;
+ } else {
+ i++;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Expand bracket character class patterns to brace patterns.
+ * This converts [abc] to {a,b,c} and [0-9] to {0,1,2,...,9} so that
+ * the existing brace expansion can handle them.
+ *
+ * Only call this on patterns already verified as deterministic by
isDeterministicPattern()
+ * (i.e., no negated brackets like [!...] or [^...]).
+ *
+ * Examples:
+ * - "file[abc].csv" => "file{a,b,c}.csv"
+ * - "file[0-9].csv" => "file{0,1,2,3,4,5,6,7,8,9}.csv"
+ * - "file[a-cX].csv" => "file{a,b,c,X}.csv"
+ * - "file.csv" => "file.csv" (no brackets)
+ *
+ * @param pathPattern Path with optional bracket patterns (must not
contain negated brackets)
+ * @return Path with brackets converted to brace patterns
+ */
+ public static String expandBracketPatterns(String pathPattern) {
+ StringBuilder result = new StringBuilder();
+ int i = 0;
+ while (i < pathPattern.length()) {
+ if (pathPattern.charAt(i) == '[') {
+ int end = pathPattern.indexOf(']', i + 1);
+ if (end == -1) {
+ // Malformed, keep as-is
+ result.append(pathPattern.charAt(i));
+ i++;
+ continue;
+ }
+ String content = pathPattern.substring(i + 1, end);
+ List<Character> chars = expandBracketContent(content);
+ result.append('{');
+ for (int j = 0; j < chars.size(); j++) {
+ if (j > 0) {
+ result.append(',');
+ }
+ result.append(chars.get(j));
+ }
+ result.append('}');
+ i = end + 1;
+ } else {
+ result.append(pathPattern.charAt(i));
+ i++;
+ }
+ }
+ return result.toString();
+ }
+
+ private static List<Character> expandBracketContent(String content) {
+ List<Character> chars = new ArrayList<>();
+ int i = 0;
+ while (i < content.length()) {
+ if (i + 2 < content.length() && content.charAt(i + 1) == '-') {
+ // Range like a-z or 0-9
+ char start = content.charAt(i);
+ char end = content.charAt(i + 2);
+ if (start <= end) {
+ for (char c = start; c <= end; c++) {
+ if (!chars.contains(c)) {
+ chars.add(c);
+ }
+ }
+ } else {
+ for (char c = start; c >= end; c--) {
+ if (!chars.contains(c)) {
+ chars.add(c);
+ }
+ }
+ }
+ i += 3;
+ } else {
+ char c = content.charAt(i);
+ if (!chars.contains(c)) {
+ chars.add(c);
+ }
+ i++;
+ }
+ }
+ return chars;
+ }
+
+ /**
+ * Expand brace patterns in a path to generate all concrete file paths.
+ * Handles nested and multiple brace patterns.
+ *
+ * Examples:
+ * - "file{1,2,3}.csv" => ["file1.csv", "file2.csv", "file3.csv"]
+ * - "data/part{1..3}/file.csv" => ["data/part1/file.csv",
"data/part2/file.csv", "data/part3/file.csv"]
+ * - "file.csv" => ["file.csv"] (no braces)
+ *
+ * @param pathPattern Path with optional brace patterns (already processed
by extendGlobs)
+ * @return List of expanded concrete paths
+ */
+ public static List<String> expandBracePatterns(String pathPattern) {
+ List<String> result = new ArrayList<>();
+ expandBracePatternsRecursive(pathPattern, result);
+ return result;
+ }
+
+ private static void expandBracePatternsRecursive(String pattern,
List<String> result) {
+ int braceStart = pattern.indexOf('{');
+ if (braceStart == -1) {
+ // No more braces, add the pattern as-is
+ result.add(pattern);
+ return;
+ }
+
+ // Find matching closing brace (handle nested braces)
+ int braceEnd = findMatchingBrace(pattern, braceStart);
+ if (braceEnd == -1) {
+ // Malformed pattern, treat as literal
+ result.add(pattern);
+ return;
+ }
+
+ String prefix = pattern.substring(0, braceStart);
+ String braceContent = pattern.substring(braceStart + 1, braceEnd);
+ String suffix = pattern.substring(braceEnd + 1);
+
+ // Split by comma, but respect nested braces
+ List<String> alternatives = splitBraceContent(braceContent);
+
+ for (String alt : alternatives) {
+ // Recursively expand any remaining braces in the suffix
+ expandBracePatternsRecursive(prefix + alt + suffix, result);
+ }
+ }
+
+ private static int findMatchingBrace(String pattern, int start) {
+ int depth = 0;
+ for (int i = start; i < pattern.length(); i++) {
+ char c = pattern.charAt(i);
+ if (c == '{') {
+ depth++;
+ } else if (c == '}') {
+ depth--;
+ if (depth == 0) {
+ return i;
+ }
+ }
+ }
+ return -1;
+ }
+
+ private static List<String> splitBraceContent(String content) {
+ List<String> parts = new ArrayList<>();
+ int depth = 0;
+ int start = 0;
+
+ for (int i = 0; i < content.length(); i++) {
+ char c = content.charAt(i);
+ if (c == '{') {
+ depth++;
+ } else if (c == '}') {
+ depth--;
+ } else if (c == ',' && depth == 0) {
+ parts.add(content.substring(start, i));
+ start = i + 1;
+ }
+ }
+ parts.add(content.substring(start));
+ return parts;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
index 6b0c198d841..4929e34e7f5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/AzureObjStorage.java
@@ -18,6 +18,7 @@
package org.apache.doris.fs.obj;
import org.apache.doris.backup.Status;
+import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.S3URI;
@@ -357,8 +358,24 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
try {
remotePath =
AzurePropertyUtils.validateAndNormalizeUri(remotePath);
S3URI uri = S3URI.create(remotePath, isUsePathStyle,
forceParsingByStandardUri);
- String globPath = S3Util.extendGlobs(uri.getKey());
String bucket = uri.getBucket();
+
+ // Optimization: For deterministic paths (no wildcards like *, ?),
+ // use getProperties requests instead of listing to avoid
requiring list permission.
+ // Controlled by config: s3_skip_list_for_deterministic_path
+ // Note: Skip when using path style (see S3ObjStorage for detailed
explanation)
+ String keyPattern = uri.getKey();
+ if (Config.s3_skip_list_for_deterministic_path
+ && !isUsePathStyle
+ && S3Util.isDeterministicPattern(keyPattern)) {
+ Status headStatus = globListByGetProperties(bucket,
keyPattern, result, fileNameOnly, startTime);
+ if (headStatus != null) {
+ return headStatus;
+ }
+ // If headStatus is null, fall through to use listing
+ }
+
+ String globPath = S3Util.extendGlobs(uri.getKey());
if (LOG.isDebugEnabled()) {
LOG.debug("try to glob list for azure, remote path {}, orig
{}", globPath, remotePath);
}
@@ -436,6 +453,86 @@ public class AzureObjStorage implements
ObjStorage<BlobServiceClient> {
return st;
}
+ /**
+ * Get file metadata using getProperties requests for deterministic paths.
+ * This avoids requiring list permission when only read permission is
granted.
+ *
+ * @param bucket Azure container name
+ * @param keyPattern The key pattern (may contain {..} brace or [...]
bracket patterns but no wildcards)
+ * @param result List to store matching RemoteFile objects
+ * @param fileNameOnly If true, only store file names; otherwise store
full paths
+ * @param startTime Start time for logging duration
+ * @return Status if successful, null if should fall back to listing
+ */
+ private Status globListByGetProperties(String bucket, String keyPattern,
+ List<RemoteFile> result, boolean fileNameOnly, long startTime) {
+ try {
+ // First expand [...] brackets to {...} braces, then expand {..}
ranges, then expand braces
+ String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
+ expandedPattern = S3Util.extendGlobs(expandedPattern);
+ List<String> expandedPaths =
S3Util.expandBracePatterns(expandedPattern);
+
+ // Fall back to listing if too many paths to avoid overwhelming
Azure with requests
+ // Controlled by config: s3_head_request_max_paths
+ if (expandedPaths.size() > Config.s3_head_request_max_paths) {
+ LOG.info("Expanded path count {} exceeds limit {}, falling
back to LIST",
+ expandedPaths.size(),
Config.s3_head_request_max_paths);
+ return null;
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Using getProperties requests for deterministic path
pattern, expanded to {} paths",
+ expandedPaths.size());
+ }
+
+ BlobContainerClient containerClient =
getClient().getBlobContainerClient(bucket);
+ long matchCnt = 0;
+ for (String key : expandedPaths) {
+ String fullPath = constructS3Path(key, bucket);
+ try {
+ BlobClient blobClient = containerClient.getBlobClient(key);
+ BlobProperties props = blobClient.getProperties();
+
+ matchCnt++;
+ RemoteFile remoteFile = new RemoteFile(
+ fileNameOnly ?
Paths.get(key).getFileName().toString() : fullPath,
+ true, // isFile
+ props.getBlobSize(),
+ props.getBlobSize(),
+ props.getLastModified() != null
+ ? props.getLastModified().toEpochSecond()
: 0
+ );
+ result.add(remoteFile);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("getProperties success for {}: size={}",
fullPath, props.getBlobSize());
+ }
+ } catch (BlobStorageException e) {
+ if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND
+ ||
BlobErrorCode.BLOB_NOT_FOUND.equals(e.getErrorCode())) {
+ // File does not exist, skip it (this is expected for
some expanded patterns)
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("File does not exist (skipped): {}",
fullPath);
+ }
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ long duration = System.nanoTime() - startTime;
+ LOG.debug("Deterministic path getProperties requests: checked
{} paths, found {} files, took {} ms",
+ expandedPaths.size(), matchCnt, duration / 1000 /
1000);
+ }
+
+ return Status.OK;
+ } catch (Exception e) {
+ LOG.warn("Failed to use getProperties requests, falling back to
listing: {}", e.getMessage());
+ return null;
+ }
+ }
+
public Status listFiles(String remotePath, boolean recursive,
List<RemoteFile> result) {
try {
remotePath =
AzurePropertyUtils.validateAndNormalizeUri(remotePath);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
index 33694b1a3d8..b00f696dad7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java
@@ -18,6 +18,7 @@
package org.apache.doris.fs.obj;
import org.apache.doris.backup.Status;
+import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.S3URI;
@@ -584,6 +585,28 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
}
bucket = uri.getBucket();
+
+ // Optimization: For deterministic paths (no wildcards like *, ?),
+ // use HEAD requests instead of listing to avoid requiring
ListBucket permission.
+ // This is useful when only GetObject permission is granted.
+ // Controlled by config: s3_skip_list_for_deterministic_path
+ // Note: Skip when using path style because path-style parsing of
virtual-host URLs
+ // can produce accidental HEAD successes where LIST would
correctly fail.
+ // (e.g., http://bucket.endpoint/key with path_style=true: HEAD
URL coincidentally
+ // matches the correct virtual-host URL, while LIST URL format is
different and fails)
+ String keyPattern = uri.getKey();
+ if (Config.s3_skip_list_for_deterministic_path
+ && !isUsePathStyle
+ && S3Util.isDeterministicPattern(keyPattern)
+ && !hasLimits && startFile == null) {
+ GlobListResult headResult = globListByHeadRequests(
+ bucket, keyPattern, result, fileNameOnly, startTime);
+ if (headResult != null) {
+ return headResult;
+ }
+ // If headResult is null, fall through to use listing
+ }
+
String globPath = S3Util.extendGlobs(uri.getKey());
if (LOG.isDebugEnabled()) {
@@ -705,6 +728,91 @@ public class S3ObjStorage implements ObjStorage<S3Client> {
}
}
+ /**
+ * Get file metadata using HEAD requests for deterministic paths.
+ * This avoids requiring ListBucket permission when only GetObject
permission is granted.
+ *
+ * @param bucket S3 bucket name
+ * @param keyPattern The key pattern (may contain {..} brace or [...]
bracket patterns but no wildcards)
+ * @param result List to store matching RemoteFile objects
+ * @param fileNameOnly If true, only store file names; otherwise store
full S3 paths
+ * @param startTime Start time for logging duration
+ * @return GlobListResult if successful, null if should fall back to
listing
+ */
+ private GlobListResult globListByHeadRequests(String bucket, String
keyPattern,
+ List<RemoteFile> result, boolean fileNameOnly, long startTime) {
+ try {
+ // First expand [...] brackets to {...} braces, then expand {..}
ranges, then expand braces
+ String expandedPattern = S3Util.expandBracketPatterns(keyPattern);
+ expandedPattern = S3Util.extendGlobs(expandedPattern);
+ List<String> expandedPaths =
S3Util.expandBracePatterns(expandedPattern);
+
+ // Fall back to listing if too many paths to avoid overwhelming S3
with HEAD requests
+ // Controlled by config: s3_head_request_max_paths
+ if (expandedPaths.size() > Config.s3_head_request_max_paths) {
+ LOG.info("Expanded path count {} exceeds limit {}, falling
back to LIST",
+ expandedPaths.size(),
Config.s3_head_request_max_paths);
+ return null;
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Using HEAD requests for deterministic path pattern,
expanded to {} paths",
+ expandedPaths.size());
+ }
+
+ long matchCnt = 0;
+ for (String key : expandedPaths) {
+ String fullPath = "s3://" + bucket + "/" + key;
+ try {
+ HeadObjectResponse headResponse = getClient()
+ .headObject(HeadObjectRequest.builder()
+ .bucket(bucket)
+ .key(key)
+ .build());
+
+ matchCnt++;
+ RemoteFile remoteFile = new RemoteFile(
+ fileNameOnly ?
Paths.get(key).getFileName().toString() : fullPath,
+ true, // isFile
+ headResponse.contentLength(),
+ headResponse.contentLength(),
+ headResponse.lastModified() != null
+ ?
headResponse.lastModified().toEpochMilli() : 0
+ );
+ result.add(remoteFile);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("HEAD success for {}: size={}", fullPath,
headResponse.contentLength());
+ }
+ } catch (NoSuchKeyException e) {
+ // File does not exist, skip it (this is expected for some
expanded patterns)
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("File does not exist (skipped): {}",
fullPath);
+ }
+ } catch (S3Exception e) {
+ if (e.statusCode() == HttpStatus.SC_NOT_FOUND) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("File does not exist (skipped): {}",
fullPath);
+ }
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ long duration = System.nanoTime() - startTime;
+ LOG.debug("Deterministic path HEAD requests: checked {} paths,
found {} files, took {} ms",
+ expandedPaths.size(), matchCnt, duration / 1000 /
1000);
+ }
+
+ return new GlobListResult(Status.OK, "", bucket, "");
+ } catch (Exception e) {
+ LOG.warn("Failed to use HEAD requests, falling back to listing:
{}", e.getMessage());
+ return null;
+ }
+ }
+
private static boolean reachLimit(int matchFileCnt, long matchFileSize,
long sizeLimit, long fileNum) {
if (matchFileCnt < 0 || sizeLimit < 0 || fileNum < 0) {
return false;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
index 23715440e8c..4b976ed86cd 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/S3UtilTest.java
@@ -20,6 +20,9 @@ package org.apache.doris.common.util;
import org.junit.Assert;
import org.junit.Test;
+import java.util.Arrays;
+import java.util.List;
+
public class S3UtilTest {
@Test
@@ -248,5 +251,210 @@ public class S3UtilTest {
String result = S3Util.extendGlobs(input);
Assert.assertEquals(expected, result);
}
+
+ // Tests for isDeterministicPattern
+
+ @Test
+ public void testIsDeterministicPattern_simpleFile() {
+ // Simple file path without any patterns
+ Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withBraces() {
+ // Path with brace pattern (deterministic - can be expanded)
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1,2,3}.csv"));
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file{1..3}.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withAsterisk() {
+ // Path with asterisk wildcard (not deterministic)
+ Assert.assertFalse(S3Util.isDeterministicPattern("path/to/*.csv"));
+ Assert.assertFalse(S3Util.isDeterministicPattern("path/*/file.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withQuestionMark() {
+ // Path with question mark wildcard (not deterministic)
+ Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file?.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withBrackets() {
+ // Non-negated bracket patterns are deterministic (can be expanded)
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file[0-9].csv"));
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file[abc].csv"));
+
Assert.assertTrue(S3Util.isDeterministicPattern("path/to/file[a-zA-Z].csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withNegatedBrackets() {
+ // Negated bracket patterns are NOT deterministic
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[!abc].csv"));
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[^0-9].csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withMalformedBrackets() {
+ // Malformed brackets (no closing ]) are NOT deterministic
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[abc.csv"));
+ // Empty brackets [] are NOT deterministic
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file[].csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_withEscape() {
+ // Path with escape character (not deterministic - complex pattern)
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file\\*.csv"));
+ }
+
+ @Test
+ public void testIsDeterministicPattern_mixed() {
+ // Path with both braces and wildcards
+
Assert.assertFalse(S3Util.isDeterministicPattern("path/to/file{1,2}/*.csv"));
+ }
+
+ // Tests for expandBracePatterns
+
+ @Test
+ public void testExpandBracePatterns_noBraces() {
+ // No braces - returns single path
+ List<String> result = S3Util.expandBracePatterns("path/to/file.csv");
+ Assert.assertEquals(Arrays.asList("path/to/file.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_simpleBrace() {
+ // Simple brace expansion
+ List<String> result = S3Util.expandBracePatterns("file{1,2,3}.csv");
+ Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv",
"file3.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_multipleBraces() {
+ // Multiple brace expansions
+ List<String> result =
S3Util.expandBracePatterns("dir{a,b}/file{1,2}.csv");
+ Assert.assertEquals(Arrays.asList(
+ "dira/file1.csv", "dira/file2.csv",
+ "dirb/file1.csv", "dirb/file2.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_emptyBrace() {
+ // Empty brace content
+ List<String> result = S3Util.expandBracePatterns("file{}.csv");
+ Assert.assertEquals(Arrays.asList("file.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_singleValue() {
+ // Single value in brace
+ List<String> result = S3Util.expandBracePatterns("file{1}.csv");
+ Assert.assertEquals(Arrays.asList("file1.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_withPath() {
+ // Full path with braces: 2 years × 2 months = 4 paths
+ List<String> result =
S3Util.expandBracePatterns("data/year{2023,2024}/month{01,02}/file.csv");
+ Assert.assertEquals(4, result.size());
+ Assert.assertTrue(result.contains("data/year2023/month01/file.csv"));
+ Assert.assertTrue(result.contains("data/year2023/month02/file.csv"));
+ Assert.assertTrue(result.contains("data/year2024/month01/file.csv"));
+ Assert.assertTrue(result.contains("data/year2024/month02/file.csv"));
+ }
+
+ @Test
+ public void testExpandBracePatterns_extendedRange() {
+ // Test with extended range (after extendGlobs processing)
+ String expanded = S3Util.extendGlobs("file{1..3}.csv");
+ List<String> result = S3Util.expandBracePatterns(expanded);
+ Assert.assertEquals(Arrays.asList("file1.csv", "file2.csv",
"file3.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_malformedBrace() {
+ // Malformed brace pattern (no closing }) - treated as literal
+ List<String> result = S3Util.expandBracePatterns("file{1,2.csv");
+ Assert.assertEquals(Arrays.asList("file{1,2.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracePatterns_malformedBraceWithDots() {
+ // Malformed range-like pattern (no closing }) - treated as literal
+ List<String> result = S3Util.expandBracePatterns("file{1..csv");
+ Assert.assertEquals(Arrays.asList("file{1..csv"), result);
+ }
+
+ // Tests for expandBracketPatterns
+
+ @Test
+ public void testExpandBracketPatterns_noBrackets() {
+ // No brackets - returns unchanged
+ Assert.assertEquals("path/to/file.csv",
S3Util.expandBracketPatterns("path/to/file.csv"));
+ }
+
+ @Test
+ public void testExpandBracketPatterns_simpleCharList() {
+ // [abc] => {a,b,c}
+ Assert.assertEquals("file{a,b,c}.csv",
S3Util.expandBracketPatterns("file[abc].csv"));
+ }
+
+ @Test
+ public void testExpandBracketPatterns_charRange() {
+ // [0-3] => {0,1,2,3}
+ Assert.assertEquals("file{0,1,2,3}.csv",
S3Util.expandBracketPatterns("file[0-3].csv"));
+ }
+
+ @Test
+ public void testExpandBracketPatterns_mixedRangeAndChars() {
+ // [a-cX] => {a,b,c,X}
+ Assert.assertEquals("file{a,b,c,X}.csv",
S3Util.expandBracketPatterns("file[a-cX].csv"));
+ }
+
+ @Test
+ public void testExpandBracketPatterns_multipleRanges() {
+ // [a-c0-2] => {a,b,c,0,1,2}
+ Assert.assertEquals("file{a,b,c,0,1,2}.csv",
S3Util.expandBracketPatterns("file[a-c0-2].csv"));
+ }
+
+ @Test
+ public void testExpandBracketPatterns_fullPipeline() {
+ // Full pipeline: bracket expansion -> extendGlobs -> brace expansion
+ // file[abc].csv => file{a,b,c}.csv => [filea.csv, fileb.csv,
filec.csv]
+ String bracketExpanded = S3Util.expandBracketPatterns("file[abc].csv");
+ String globExpanded = S3Util.extendGlobs(bracketExpanded);
+ List<String> result = S3Util.expandBracePatterns(globExpanded);
+ Assert.assertEquals(Arrays.asList("filea.csv", "fileb.csv",
"filec.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracketPatterns_withBracesAndBrackets() {
+ // Mixed brackets and braces: dir[ab]/file{1,2}.csv
+ // => dir{a,b}/file{1,2}.csv => [dira/file1.csv, dira/file2.csv,
dirb/file1.csv, dirb/file2.csv]
+ String bracketExpanded =
S3Util.expandBracketPatterns("dir[ab]/file{1,2}.csv");
+ Assert.assertEquals("dir{a,b}/file{1,2}.csv", bracketExpanded);
+ List<String> result = S3Util.expandBracePatterns(bracketExpanded);
+ Assert.assertEquals(Arrays.asList(
+ "dira/file1.csv", "dira/file2.csv",
+ "dirb/file1.csv", "dirb/file2.csv"), result);
+ }
+
+ @Test
+ public void testExpandBracketPatterns_digitRange() {
+ // [0-9] => {0,1,2,3,4,5,6,7,8,9}
+ String expanded = S3Util.expandBracketPatterns("part[0-9].dat");
+ List<String> result = S3Util.expandBracePatterns(expanded);
+ Assert.assertEquals(10, result.size());
+ Assert.assertTrue(result.contains("part0.dat"));
+ Assert.assertTrue(result.contains("part9.dat"));
+ }
+
+ @Test
+ public void testExpandBracketPatterns_malformedBracket() {
+ // Malformed bracket (no closing ]) - [ kept as literal
+ Assert.assertEquals("file[abc.csv",
S3Util.expandBracketPatterns("file[abc.csv"));
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]