This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 727161bfd18 [feat](oss) support oss-hdfs root policy (#50678)
727161bfd18 is described below

commit 727161bfd1870f8dbcec0f745102b0dc4882a454
Author: Mingyu Chen (Rayner) <morning...@163.com>
AuthorDate: Thu May 15 23:35:34 2025 +0800

    [feat](oss) support oss-hdfs root policy (#50678)
    
    ### What problem does this PR solve?
    
    The root policy is a feature for aliyun OSS-HDFS, it will mapping a hdfs
    path to the oss path,
    eg:
    `hdfs://my_root_policy_name/` ->
    `oss://emr-dev-oss.cn-beijing.oss-dls.aliyuncs.com/`
    
    So when root policy is enabled, the location of a hive table will be
    `hdfs://my_root_policy_name/xxx`
    instead of `oss://emr-dev-oss.cn-beijing.oss-dls.aliyuncs.com/xxx`
    
    This PR covers this case by adding a new catalog property
    `oss.root_policy`.
    Default is false, if set to true, the location like
    `hdfs://my_root_policy_name/` will be treated separately
    to fit the jindofs format.
---
 .../org/apache/doris/common/util/LocationPath.java | 48 +++++++++-----
 .../apache/doris/datasource/ExternalCatalog.java   |  3 +
 .../apache/doris/common/util/LocationPathTest.java | 74 ++++++++++++++++++++++
 3 files changed, 109 insertions(+), 16 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
index 7e8b357395c..798e13d321f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
@@ -20,6 +20,7 @@ package org.apache.doris.common.util;
 import org.apache.doris.catalog.HdfsResource;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
+import org.apache.doris.datasource.ExternalCatalog;
 import org.apache.doris.datasource.hive.HMSExternalCatalog;
 import org.apache.doris.datasource.property.constants.CosProperties;
 import org.apache.doris.datasource.property.constants.ObsProperties;
@@ -52,6 +53,9 @@ public class LocationPath {
     private static final Logger LOG = LogManager.getLogger(LocationPath.class);
     private static final String SCHEME_DELIM = "://";
     private static final String NONSTANDARD_SCHEME_DELIM = ":/";
+    private static final String STANDARD_HDFS_PREFIX = "hdfs://";
+    private static final String EMPTY_HDFS_PREFIX = "hdfs:///";
+    private static final String BROKEN_HDFS_PREFIX = "hdfs:/";
     private final Scheme scheme;
     private final String location;
     private final boolean isBindBroker;
@@ -106,7 +110,9 @@ public class LocationPath {
                 this.scheme = Scheme.HDFS;
                 // Need add hdfs host to location
                 String host = props.get(HdfsResource.DSF_NAMESERVICES);
-                tmpLocation = convertPath ? normalizedHdfsPath(tmpLocation, 
host) : tmpLocation;
+                boolean enableOssRootPolicy = 
props.getOrDefault(ExternalCatalog.OOS_ROOT_POLICY, "false")
+                        .equals("true");
+                tmpLocation = convertPath ? normalizedHdfsPath(tmpLocation, 
host, enableOssRootPolicy) : tmpLocation;
                 break;
             case FeConstants.FS_PREFIX_S3:
                 this.scheme = Scheme.S3;
@@ -365,38 +371,48 @@ public class LocationPath {
         return pos;
     }
 
-    private static String normalizedHdfsPath(String location, String host) {
+    @VisibleForTesting
+    public static String normalizedHdfsPath(String location, String host, 
boolean enableOssRootPolicy) {
         try {
             // Hive partition may contain special characters such as ' ', '<', 
'>' and so on.
             // Need to encode these characters before creating URI.
             // But doesn't encode '/' and ':' so that we can get the correct 
uri host.
-            location = URLEncoder.encode(location, 
StandardCharsets.UTF_8.name())
-                .replace("%2F", "/").replace("%3A", ":");
-            URI normalizedUri = new URI(location);
+            String newLocation = URLEncoder.encode(location, 
StandardCharsets.UTF_8.name()).replace("%2F", "/")
+                    .replace("%3A", ":");
+            URI normalizedUri = new URI(newLocation).normalize();
             // compatible with 'hdfs:///' or 'hdfs:/'
             if (StringUtils.isEmpty(normalizedUri.getHost())) {
-                location = URLDecoder.decode(location, 
StandardCharsets.UTF_8.name());
-                String normalizedPrefix = HdfsResource.HDFS_PREFIX + "//";
-                String brokenPrefix = HdfsResource.HDFS_PREFIX + "/";
-                if (location.startsWith(brokenPrefix) && 
!location.startsWith(normalizedPrefix)) {
-                    location = location.replace(brokenPrefix, 
normalizedPrefix);
+                newLocation = URLDecoder.decode(newLocation, 
StandardCharsets.UTF_8.name());
+                if (newLocation.startsWith(BROKEN_HDFS_PREFIX) && 
!newLocation.startsWith(STANDARD_HDFS_PREFIX)) {
+                    newLocation = newLocation.replace(BROKEN_HDFS_PREFIX, 
STANDARD_HDFS_PREFIX);
                 }
                 if (StringUtils.isNotEmpty(host)) {
                     // Replace 'hdfs://key/' to 'hdfs://name_service/key/'
                     // Or hdfs:///abc to hdfs://name_service/abc
-                    return location.replace(normalizedPrefix, normalizedPrefix 
+ host + "/");
+                    if (newLocation.startsWith(EMPTY_HDFS_PREFIX)) {
+                        return newLocation.replace(STANDARD_HDFS_PREFIX, 
STANDARD_HDFS_PREFIX + host);
+                    } else {
+                        return newLocation.replace(STANDARD_HDFS_PREFIX, 
STANDARD_HDFS_PREFIX + host + "/");
+                    }
                 } else {
                     // 'hdfs://null/' equals the 'hdfs:///'
-                    if (location.startsWith(HdfsResource.HDFS_PREFIX + "///")) 
{
+                    if (newLocation.startsWith(EMPTY_HDFS_PREFIX)) {
                         // Do not support hdfs:///location
-                        throw new RuntimeException("Invalid location with 
empty host: " + location);
+                        throw new RuntimeException("Invalid location with 
empty host: " + newLocation);
                     } else {
-                        // Replace 'hdfs://key/' to '/key/', try access local 
NameNode on BE.
-                        return location.replace(normalizedPrefix, "/");
+                        if (enableOssRootPolicy) {
+                            // if oss root policy is enabled, the path should 
be like:
+                            // hdfs://customized_host/path/to/file
+                            // Should remain unchanged.
+                            return newLocation;
+                        } else {
+                            // Replace 'hdfs://key/' to '/key/', try access 
local NameNode on BE.
+                            return newLocation.replace(STANDARD_HDFS_PREFIX, 
"/");
+                        }
                     }
                 }
             }
-            return URLDecoder.decode(location, StandardCharsets.UTF_8.name());
+            return URLDecoder.decode(newLocation, 
StandardCharsets.UTF_8.name());
         } catch (URISyntaxException | UnsupportedEncodingException e) {
             throw new RuntimeException(e.getMessage(), e);
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
index 261f86cd5a6..01114899053 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
@@ -121,6 +121,9 @@ public abstract class ExternalCatalog
     public static final String FOUND_CONFLICTING = "Found conflicting";
     public static final String ONLY_TEST_LOWER_CASE_TABLE_NAMES = 
"only_test_lower_case_table_names";
 
+    // 
https://help.aliyun.com/zh/emr/emr-on-ecs/user-guide/use-rootpolicy-to-access-oss-hdfs?spm=a2c4g.11186623.help-menu-search-28066.d_0
+    public static final String OOS_ROOT_POLICY = "oss.root_policy";
+
     // Properties that should not be shown in the `show create catalog` result
     public static final Set<String> HIDDEN_PROPERTIES = Sets.newHashSet(
             CREATE_TIME,
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index 1e9e3e1ab88..e49302cef7f 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -230,4 +230,78 @@ public class LocationPathTest {
         LocationPath p3 = new LocationPath("file://authority/abc/def", props);
         Assertions.assertEquals(Scheme.LOCAL, p3.getScheme());
     }
+
+    @Test
+    public void testNormalizedHdfsPath() {
+        // Test case 1: Path with special characters that need encoding
+        // Input: Path with spaces and special characters
+        // Expected: Characters are properly encoded while preserving / and :
+        String location = "hdfs://namenode/path with spaces/<special>chars";
+        String host = "";
+        boolean enableOssRootPolicy = false;
+        String result = LocationPath.normalizedHdfsPath(location, host, 
enableOssRootPolicy);
+        Assertions.assertEquals("hdfs://namenode/path with 
spaces/<special>chars", result);
+
+        // Test case 2: Empty host in URI with host parameter provided
+        // Input: hdfs:///, host = nameservice
+        // Expected: hdfs://nameservice/
+        location = "hdfs:///path/to/file";
+        host = "nameservice";
+        result = LocationPath.normalizedHdfsPath(location, host, false);
+        Assertions.assertEquals("hdfs://nameservice/path/to/file", result);
+
+        // Test case 3: Broken prefix case (hdfs:/ instead of hdfs://)
+        // Input: hdfs:/path, host = nameservice
+        // Expected: hdfs://nameservice/path
+        location = "hdfs:/path/to/file";
+        host = "nameservice";
+        result = LocationPath.normalizedHdfsPath(location, host, false);
+        Assertions.assertEquals("hdfs://nameservice/path/to/file", result);
+
+        // Test case 4: Empty host parameter with enableOssRootPolicy=true
+        // Input: hdfs://customized_host/path
+        // Expected: hdfs://customized_host/path (unchanged)
+        location = "hdfs://customized_host/path/to/file";
+        host = "";
+        result = LocationPath.normalizedHdfsPath(location, host, true);
+        Assertions.assertEquals("hdfs://customized_host/path/to/file", result);
+
+        // Test case 5: Empty host parameter with enableOssRootPolicy=false
+        // Input: hdfs://host/path
+        // Expected: /path
+        location = "hdfs://customized_host/path/to/file";
+        host = "";
+        result = LocationPath.normalizedHdfsPath(location, host, false);
+        Assertions.assertEquals("/customized_host/path/to/file", result);
+
+        // Test case 6: hdfs:/// with empty host parameter
+        // Input: hdfs:///path
+        // Expected: Exception since this format is not supported
+        location = "hdfs:///path/to/file";
+        host = "";
+        boolean exceptionThrown = false;
+        try {
+            LocationPath.normalizedHdfsPath(location, host, false);
+        } catch (RuntimeException e) {
+            exceptionThrown = true;
+            Assertions.assertTrue(e.getMessage().contains("Invalid location 
with empty host"));
+        }
+        Assertions.assertTrue(exceptionThrown);
+
+        // Test case 7: Non-empty host in URI (regular case)
+        // Input: hdfs://existinghost/path
+        // Expected: hdfs://existinghost/path (unchanged)
+        location = "hdfs://existinghost/path/to/file";
+        host = "nameservice";
+        result = LocationPath.normalizedHdfsPath(location, host, false);
+        Assertions.assertEquals("hdfs://existinghost/path/to/file", result);
+
+        // Test case 8: No valid host name
+        // Input: hdfs://hdfs_host/path
+        // Expected: hdfs://existinghost/path (unchanged)
+        location = "hdfs://hdfs_host/path/to/file";
+        host = "nameservice";
+        result = LocationPath.normalizedHdfsPath(location, host, false);
+        Assertions.assertEquals("hdfs://nameservice/hdfs_host/path/to/file", 
result);
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to