This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new d8f7adca512 branch-4.0: [Improvement](Iceberg) Optimize 
LocationPath.of performance for Iceberg table queries #59217 (#59427)
d8f7adca512 is described below

commit d8f7adca512f86c4166482744e41ce8983bc3c17
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Dec 29 10:22:36 2025 +0800

    branch-4.0: [Improvement](Iceberg) Optimize LocationPath.of performance for 
Iceberg table queries #59217 (#59427)
    
    Cherry-picked from #59217
    
    Co-authored-by: Socrates <[email protected]>
---
 .../org/apache/doris/common/util/LocationPath.java | 96 ++++++++++++++++++----
 .../datasource/iceberg/source/IcebergScanNode.java | 79 +++++++++++++++++-
 .../property/storage/S3PropertyUtils.java          | 52 +++++++++++-
 .../apache/doris/common/util/LocationPathTest.java | 40 +++++++++
 .../storage/S3ConnectorPropertiesUtilsTest.java    | 12 +++
 5 files changed, 260 insertions(+), 19 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
index e4b9aa0b25c..b41cb950ff8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
@@ -96,27 +96,25 @@ public class LocationPath {
     }
 
     private static String parseScheme(String finalLocation) {
-        String scheme = "";
-        String[] schemeSplit = finalLocation.split(SCHEME_DELIM);
-        if (schemeSplit.length > 1) {
-            scheme = schemeSplit[0];
-        } else {
-            schemeSplit = finalLocation.split(NONSTANDARD_SCHEME_DELIM);
-            if (schemeSplit.length > 1) {
-                scheme = schemeSplit[0];
-            }
+        // Use indexOf instead of split for better performance
+        int schemeDelimIndex = finalLocation.indexOf(SCHEME_DELIM);
+        if (schemeDelimIndex > 0) {
+            return finalLocation.substring(0, schemeDelimIndex);
+        }
+
+        int nonstandardDelimIndex = 
finalLocation.indexOf(NONSTANDARD_SCHEME_DELIM);
+        if (nonstandardDelimIndex > 0) {
+            return finalLocation.substring(0, nonstandardDelimIndex);
         }
 
         // if not get scheme, need consider /path/to/local to no scheme
-        if (scheme.isEmpty()) {
-            try {
-                Paths.get(finalLocation);
-            } catch (InvalidPathException exception) {
-                throw new IllegalArgumentException("Fail to parse scheme, 
invalid location: " + finalLocation);
-            }
+        try {
+            Paths.get(finalLocation);
+        } catch (InvalidPathException exception) {
+            throw new IllegalArgumentException("Fail to parse scheme, invalid 
location: " + finalLocation);
         }
 
-        return scheme;
+        return "";
     }
 
     /**
@@ -201,6 +199,72 @@ public class LocationPath {
         }
     }
 
+    /**
+     * Ultra-fast factory method that directly constructs LocationPath without 
any parsing.
+     * This is used when the normalized location is already known (e.g., from 
prefix transformation).
+     *
+     * @param normalizedLocation the already-normalized location string
+     * @param schema             pre-computed schema
+     * @param fsIdentifier       pre-computed filesystem identifier
+     * @param storageProperties  the storage properties (can be null)
+     * @return a new LocationPath instance
+     */
+    public static LocationPath ofDirect(String normalizedLocation,
+                                        String schema,
+                                        String fsIdentifier,
+                                        StorageProperties storageProperties) {
+        return new LocationPath(schema, normalizedLocation, fsIdentifier, 
storageProperties);
+    }
+
+    /**
+     * Fast factory method that reuses pre-computed schema and fsIdentifier.
+     * This is optimized for batch processing where many files share the same 
bucket/prefix.
+     *
+     * @param location           the input URI location string
+     * @param storageProperties  pre-computed storage properties for 
normalization
+     * @param cachedSchema       pre-computed schema (can be null to compute)
+     * @param cachedFsIdPrefix   pre-computed fsIdentifier prefix like "s3://" 
(can be null to compute)
+     * @return a new LocationPath instance
+     */
+    public static LocationPath ofWithCache(String location,
+                                           StorageProperties storageProperties,
+                                           String cachedSchema,
+                                           String cachedFsIdPrefix) {
+        try {
+            String normalizedLocation = 
storageProperties.validateAndNormalizeUri(location);
+
+            String fsIdentifier;
+            if (cachedFsIdPrefix != null && 
normalizedLocation.startsWith(cachedFsIdPrefix)) {
+                // Fast path: extract authority from normalized location 
without full URI parsing
+                int authorityStart = cachedFsIdPrefix.length();
+                int authorityEnd = normalizedLocation.indexOf('/', 
authorityStart);
+                if (authorityEnd == -1) {
+                    authorityEnd = normalizedLocation.length();
+                }
+                String authority = 
normalizedLocation.substring(authorityStart, authorityEnd);
+                if (authority.isEmpty()) {
+                    throw new StoragePropertiesException("Invalid location, 
missing authority: " + normalizedLocation);
+                }
+                fsIdentifier = cachedFsIdPrefix + authority;
+            } else {
+                // Fallback to full URI parsing
+                String encodedLocation = encodedLocation(normalizedLocation);
+                URI uri = URI.create(encodedLocation);
+                String authority = uri.getAuthority();
+                if (Strings.isNullOrEmpty(authority)) {
+                    throw new StoragePropertiesException("Invalid location, 
missing authority: " + normalizedLocation);
+                }
+                fsIdentifier = Strings.nullToEmpty(uri.getScheme()) + "://"
+                        + authority;
+            }
+
+            String schema = cachedSchema != null ? cachedSchema : 
extractScheme(location);
+            return new LocationPath(schema, normalizedLocation, fsIdentifier, 
storageProperties);
+        } catch (UserException e) {
+            throw new StoragePropertiesException("Failed to create 
LocationPath for location: " + location, e);
+        }
+    }
+
     /**
      * Extracts the URI scheme (e.g., "s3", "hdfs") from the location string.
      *
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index fb3d746fb10..696b1dacd76 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -117,6 +117,18 @@ public class IcebergScanNode extends FileQueryScanNode {
     private Map<StorageProperties.Type, StorageProperties> 
storagePropertiesMap;
     private Map<String, String> backendStorageProperties;
 
+    // Cached values for LocationPath creation optimization
+    // These are lazily initialized on first use to avoid parsing overhead for 
each file
+    private boolean locationPathCacheInitialized = false;
+    private StorageProperties cachedStorageProperties;
+    private String cachedSchema;
+    private String cachedFsIdPrefix;
+    // Cache for path prefix transformation to avoid repeated S3URI parsing
+    // Maps original path prefix (e.g., "https://bucket.s3.amazonaws.com/";) to 
normalized prefix (e.g., "s3://bucket/")
+    private String cachedOriginalPathPrefix;
+    private String cachedNormalizedPathPrefix;
+    private String cachedFsIdentifier;
+
     // for test
     @VisibleForTesting
     public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, 
SessionVariable sv) {
@@ -364,9 +376,74 @@ public class IcebergScanNode extends FileQueryScanNode {
         return TableScanUtil.splitFiles(scan.planFiles(), targetSplitSize);
     }
 
+    /**
+     * Initialize cached values for LocationPath creation on first use.
+     * This avoids repeated StorageProperties lookup, scheme parsing, and 
S3URI regex parsing for each file.
+     */
+    private void initLocationPathCache(String samplePath) {
+        try {
+            // Create a LocationPath using the full method to get all cached 
values
+            LocationPath sampleLocationPath = LocationPath.of(samplePath, 
storagePropertiesMap);
+            cachedStorageProperties = 
sampleLocationPath.getStorageProperties();
+            cachedSchema = sampleLocationPath.getSchema();
+            cachedFsIdentifier = sampleLocationPath.getFsIdentifier();
+
+            // Extract fsIdPrefix like "s3://" from fsIdentifier like 
"s3://bucket"
+            int schemeEnd = cachedFsIdentifier.indexOf("://");
+            if (schemeEnd > 0) {
+                cachedFsIdPrefix = cachedFsIdentifier.substring(0, schemeEnd + 
3);
+            }
+
+            // Cache path prefix mapping for fast transformation
+            // This allows subsequent files to skip S3URI regex parsing 
entirely
+            String normalizedPath = sampleLocationPath.getNormalizedLocation();
+
+            // Find the common prefix by looking for the last '/' before the 
filename
+            int lastSlashInOriginal = samplePath.lastIndexOf('/');
+            int lastSlashInNormalized = normalizedPath.lastIndexOf('/');
+
+            if (lastSlashInOriginal > 0 && lastSlashInNormalized > 0) {
+                cachedOriginalPathPrefix = samplePath.substring(0, 
lastSlashInOriginal + 1);
+                cachedNormalizedPathPrefix = normalizedPath.substring(0, 
lastSlashInNormalized + 1);
+            }
+
+            locationPathCacheInitialized = true;
+        } catch (Exception e) {
+            // If caching fails, try to initialize again on next use
+            LOG.warn("Failed to initialize LocationPath cache, will use full 
parsing", e);
+        }
+    }
+
+    /**
+     * Create a LocationPath with cached values for better performance.
+     * Uses cached path prefix mapping to completely bypass S3URI regex 
parsing for most files.
+     * Falls back to full parsing if cache is not available or path doesn't 
match cached prefix.
+     */
+    private LocationPath createLocationPathWithCache(String path) {
+        // Initialize cache on first call
+        if (!locationPathCacheInitialized) {
+            initLocationPathCache(path);
+        }
+
+        // Fast path: if path starts with cached original prefix, directly 
transform without any parsing
+        if (cachedOriginalPathPrefix != null && 
path.startsWith(cachedOriginalPathPrefix)) {
+            // Transform: replace original prefix with normalized prefix
+            String normalizedPath = cachedNormalizedPathPrefix + 
path.substring(cachedOriginalPathPrefix.length());
+            return LocationPath.ofDirect(normalizedPath, cachedSchema, 
cachedFsIdentifier, cachedStorageProperties);
+        }
+
+        // Medium path: use cached StorageProperties but still need 
validateAndNormalizeUri
+        if (cachedStorageProperties != null) {
+            return LocationPath.ofWithCache(path, cachedStorageProperties, 
cachedSchema, cachedFsIdPrefix);
+        }
+
+        // Fallback to full parsing
+        return LocationPath.of(path, storagePropertiesMap);
+    }
+
     private Split createIcebergSplit(FileScanTask fileScanTask) {
         String originalPath = fileScanTask.file().path().toString();
-        LocationPath locationPath = LocationPath.of(originalPath, 
storagePropertiesMap);
+        LocationPath locationPath = createLocationPathWithCache(originalPath);
         IcebergSplit split = new IcebergSplit(
                 locationPath,
                 fileScanTask.start(),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3PropertyUtils.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3PropertyUtils.java
index 99064b4e2e2..4e64f13a635 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3PropertyUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/storage/S3PropertyUtils.java
@@ -33,6 +33,15 @@ import java.util.Optional;
 public class S3PropertyUtils {
     private static final Logger LOG = 
LogManager.getLogger(S3PropertyUtils.class);
 
+    private static final String SCHEME_DELIM = "://";
+    private static final String S3_SCHEME_PREFIX = "s3://";
+
+    // S3-compatible schemes that can be converted to s3:// with simple string 
replacement
+    // Format: scheme://bucket/key -> s3://bucket/key
+    private static final String[] SIMPLE_S3_COMPATIBLE_SCHEMES = {
+            "s3a", "s3n", "oss", "cos", "cosn", "obs", "bos", "gs"
+    };
+
     /**
      * Constructs the S3 endpoint from a given URI in the props map.
      *
@@ -113,7 +122,8 @@ public class S3PropertyUtils {
 
     /**
      * Validates and normalizes the given path into a standard S3 URI.
-     * If the input already starts with "s3://", it is returned as-is.
+     * If the input already starts with a known S3-compatible scheme (s3://, 
s3a://, oss://, etc.),
+     * it is returned as-is to avoid expensive regex parsing.
      * Otherwise, it is parsed and converted into an S3-compatible URI format.
      *
      * @param path                            the raw S3-style path or full URI
@@ -132,16 +142,54 @@ public class S3PropertyUtils {
         if (StringUtils.isBlank(path)) {
             throw new StoragePropertiesException("path is null");
         }
-        if (path.startsWith("s3://")) {
+
+        // Fast path 1: s3:// paths are already in the normalized format 
expected by BE
+        if (path.startsWith(S3_SCHEME_PREFIX)) {
             return path;
         }
 
+        // Fast path 2: simple S3-compatible schemes (oss://, cos://, s3a://, 
etc.)
+        // can be converted with simple string replacement: 
scheme://bucket/key -> s3://bucket/key
+        String normalized = trySimpleSchemeConversion(path);
+        if (normalized != null) {
+            return normalized;
+        }
+
+        // Full parsing path: for HTTP URLs and other complex formats
         boolean usePathStyle = Boolean.parseBoolean(stringUsePathStyle);
         boolean forceParsingByStandardUri = 
Boolean.parseBoolean(stringForceParsingByStandardUri);
         S3URI s3uri = S3URI.create(path, usePathStyle, 
forceParsingByStandardUri);
         return "s3" + S3URI.SCHEME_DELIM + s3uri.getBucket() + 
S3URI.PATH_DELIM + s3uri.getKey();
     }
 
+    /**
+     * Try to convert simple S3-compatible scheme URIs to s3:// format using 
string replacement.
+     * This avoids expensive regex parsing for common cases like 
oss://bucket/key, s3a://bucket/key, etc.
+     *
+     * @param path the input path
+     * @return converted s3:// path if successful, null if the path doesn't 
match simple pattern
+     */
+    private static String trySimpleSchemeConversion(String path) {
+        int delimIndex = path.indexOf(SCHEME_DELIM);
+        if (delimIndex <= 0) {
+            return null;
+        }
+
+        String scheme = path.substring(0, delimIndex).toLowerCase();
+        for (String compatibleScheme : SIMPLE_S3_COMPATIBLE_SCHEMES) {
+            if (compatibleScheme.equals(scheme)) {
+                String rest = path.substring(delimIndex + 
SCHEME_DELIM.length());
+                if (rest.isEmpty() || rest.startsWith(S3URI.PATH_DELIM) || 
rest.contains(SCHEME_DELIM)) {
+                    return null;
+                }
+                // Simple conversion: replace scheme with "s3"
+                // e.g., "oss://bucket/key" -> "s3://bucket/key"
+                return S3_SCHEME_PREFIX + rest;
+            }
+        }
+        return null;
+    }
+
     /**
      * Extracts and returns the raw URI string from the given props map.
      *
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index 0051ea494b0..8a73619824c 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -19,6 +19,7 @@ package org.apache.doris.common.util;
 
 import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.property.storage.StorageProperties;
+import 
org.apache.doris.datasource.property.storage.exception.StoragePropertiesException;
 import org.apache.doris.fs.FileSystemType;
 import org.apache.doris.thrift.TFileType;
 
@@ -300,4 +301,43 @@ public class LocationPathTest {
 
     }
 
+    @Test
+    public void testLocationPathDirect() {
+        StorageProperties storageProperties = 
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.S3);
+        LocationPath locationPath = LocationPath.ofDirect("s3://bucket/key", 
"s3", "s3://bucket", storageProperties);
+        Assertions.assertEquals("s3://bucket/key", 
locationPath.getNormalizedLocation());
+        Assertions.assertEquals("s3", locationPath.getSchema());
+        Assertions.assertEquals("s3://bucket", locationPath.getFsIdentifier());
+        Assertions.assertEquals(storageProperties, 
locationPath.getStorageProperties());
+    }
+
+    @Test
+    public void testLocationPathWithCacheFastPath() {
+        StorageProperties storageProperties = 
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.S3);
+        String location = "s3://bucket/path/to/file";
+        LocationPath cached = LocationPath.ofWithCache(location, 
storageProperties, "s3", "s3://");
+        LocationPath full = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
+        Assertions.assertEquals(full.getNormalizedLocation(), 
cached.getNormalizedLocation());
+        Assertions.assertEquals(full.getFsIdentifier(), 
cached.getFsIdentifier());
+        Assertions.assertEquals(full.getSchema(), cached.getSchema());
+    }
+
+    @Test
+    public void testLocationPathWithCacheFallback() {
+        StorageProperties storageProperties = 
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.S3);
+        String location = "s3://bucket/path/to/file";
+        LocationPath cached = LocationPath.ofWithCache(location, 
storageProperties, "s3", null);
+        LocationPath full = LocationPath.of(location, STORAGE_PROPERTIES_MAP);
+        Assertions.assertEquals(full.getNormalizedLocation(), 
cached.getNormalizedLocation());
+        Assertions.assertEquals(full.getFsIdentifier(), 
cached.getFsIdentifier());
+        Assertions.assertEquals(full.getSchema(), cached.getSchema());
+    }
+
+    @Test
+    public void testLocationPathWithCacheMissingAuthority() {
+        StorageProperties storageProperties = 
STORAGE_PROPERTIES_MAP.get(StorageProperties.Type.S3);
+        Assertions.assertThrows(StoragePropertiesException.class,
+                () -> LocationPath.ofWithCache("s3:///path", 
storageProperties, "s3", "s3://"));
+    }
+
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3ConnectorPropertiesUtilsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3ConnectorPropertiesUtilsTest.java
index 261f8f273c4..a1aba03d5aa 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3ConnectorPropertiesUtilsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/storage/S3ConnectorPropertiesUtilsTest.java
@@ -108,4 +108,16 @@ class S3ConnectorPropertiesUtilsTest {
         Assertions.assertThrows(StoragePropertiesException.class, () -> 
S3PropertyUtils.validateAndNormalizeUri("", "false", "false"));
         Assertions.assertThrows(UserException.class, () -> 
S3PropertyUtils.validateAndNormalizeUri("not a uri", "true", "true"));
     }
+
+    @Test
+    void testSimpleSchemeConversion() throws UserException {
+        String[] schemes = {"s3a", "s3n", "oss", "cos", "cosn", "obs", "bos", 
"gs"};
+        for (String scheme : schemes) {
+            String input = scheme + "://bucket/key";
+            Assertions.assertEquals("s3://bucket/key",
+                    S3PropertyUtils.validateAndNormalizeUri(input, "false", 
"false"));
+        }
+        Assertions.assertEquals("s3://bucket/key",
+                S3PropertyUtils.validateAndNormalizeUri("OSS://bucket/key", 
"false", "false"));
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to