This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 727161bfd18 [feat](oss) support oss-hdfs root policy (#50678) 727161bfd18 is described below commit 727161bfd1870f8dbcec0f745102b0dc4882a454 Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Thu May 15 23:35:34 2025 +0800 [feat](oss) support oss-hdfs root policy (#50678) ### What problem does this PR solve? The root policy is a feature for aliyun OSS-HDFS, it will mapping a hdfs path to the oss path, eg: `hdfs://my_root_policy_name/` -> `oss://emr-dev-oss.cn-beijing.oss-dls.aliyuncs.com/` So when root policy is enabled, the location of a hive table will be `hdfs://my_root_policy_name/xxx` instead of `oss://emr-dev-oss.cn-beijing.oss-dls.aliyuncs.com/xxx` This PR covers this case by adding a new catalog property `oss.root_policy`. Default is false, if set to true, the location like `hdfs://my_root_policy_name/` will be treated separately to fit the jindofs format. --- .../org/apache/doris/common/util/LocationPath.java | 48 +++++++++----- .../apache/doris/datasource/ExternalCatalog.java | 3 + .../apache/doris/common/util/LocationPathTest.java | 74 ++++++++++++++++++++++ 3 files changed, 109 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java index 7e8b357395c..798e13d321f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java @@ -20,6 +20,7 @@ package org.apache.doris.common.util; import org.apache.doris.catalog.HdfsResource; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; +import org.apache.doris.datasource.ExternalCatalog; import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.property.constants.CosProperties; import org.apache.doris.datasource.property.constants.ObsProperties; @@ -52,6 +53,9 @@ public class LocationPath { private static final Logger LOG = LogManager.getLogger(LocationPath.class); private static final String SCHEME_DELIM = "://"; private static final String NONSTANDARD_SCHEME_DELIM = ":/"; + private static final String STANDARD_HDFS_PREFIX = "hdfs://"; + private static final String EMPTY_HDFS_PREFIX = "hdfs:///"; + private static final String BROKEN_HDFS_PREFIX = "hdfs:/"; private final Scheme scheme; private final String location; private final boolean isBindBroker; @@ -106,7 +110,9 @@ public class LocationPath { this.scheme = Scheme.HDFS; // Need add hdfs host to location String host = props.get(HdfsResource.DSF_NAMESERVICES); - tmpLocation = convertPath ? normalizedHdfsPath(tmpLocation, host) : tmpLocation; + boolean enableOssRootPolicy = props.getOrDefault(ExternalCatalog.OOS_ROOT_POLICY, "false") + .equals("true"); + tmpLocation = convertPath ? normalizedHdfsPath(tmpLocation, host, enableOssRootPolicy) : tmpLocation; break; case FeConstants.FS_PREFIX_S3: this.scheme = Scheme.S3; @@ -365,38 +371,48 @@ public class LocationPath { return pos; } - private static String normalizedHdfsPath(String location, String host) { + @VisibleForTesting + public static String normalizedHdfsPath(String location, String host, boolean enableOssRootPolicy) { try { // Hive partition may contain special characters such as ' ', '<', '>' and so on. // Need to encode these characters before creating URI. // But doesn't encode '/' and ':' so that we can get the correct uri host. - location = URLEncoder.encode(location, StandardCharsets.UTF_8.name()) - .replace("%2F", "/").replace("%3A", ":"); - URI normalizedUri = new URI(location); + String newLocation = URLEncoder.encode(location, StandardCharsets.UTF_8.name()).replace("%2F", "/") + .replace("%3A", ":"); + URI normalizedUri = new URI(newLocation).normalize(); // compatible with 'hdfs:///' or 'hdfs:/' if (StringUtils.isEmpty(normalizedUri.getHost())) { - location = URLDecoder.decode(location, StandardCharsets.UTF_8.name()); - String normalizedPrefix = HdfsResource.HDFS_PREFIX + "//"; - String brokenPrefix = HdfsResource.HDFS_PREFIX + "/"; - if (location.startsWith(brokenPrefix) && !location.startsWith(normalizedPrefix)) { - location = location.replace(brokenPrefix, normalizedPrefix); + newLocation = URLDecoder.decode(newLocation, StandardCharsets.UTF_8.name()); + if (newLocation.startsWith(BROKEN_HDFS_PREFIX) && !newLocation.startsWith(STANDARD_HDFS_PREFIX)) { + newLocation = newLocation.replace(BROKEN_HDFS_PREFIX, STANDARD_HDFS_PREFIX); } if (StringUtils.isNotEmpty(host)) { // Replace 'hdfs://key/' to 'hdfs://name_service/key/' // Or hdfs:///abc to hdfs://name_service/abc - return location.replace(normalizedPrefix, normalizedPrefix + host + "/"); + if (newLocation.startsWith(EMPTY_HDFS_PREFIX)) { + return newLocation.replace(STANDARD_HDFS_PREFIX, STANDARD_HDFS_PREFIX + host); + } else { + return newLocation.replace(STANDARD_HDFS_PREFIX, STANDARD_HDFS_PREFIX + host + "/"); + } } else { // 'hdfs://null/' equals the 'hdfs:///' - if (location.startsWith(HdfsResource.HDFS_PREFIX + "///")) { + if (newLocation.startsWith(EMPTY_HDFS_PREFIX)) { // Do not support hdfs:///location - throw new RuntimeException("Invalid location with empty host: " + location); + throw new RuntimeException("Invalid location with empty host: " + newLocation); } else { - // Replace 'hdfs://key/' to '/key/', try access local NameNode on BE. - return location.replace(normalizedPrefix, "/"); + if (enableOssRootPolicy) { + // if oss root policy is enabled, the path should be like: + // hdfs://customized_host/path/to/file + // Should remain unchanged. + return newLocation; + } else { + // Replace 'hdfs://key/' to '/key/', try access local NameNode on BE. + return newLocation.replace(STANDARD_HDFS_PREFIX, "/"); + } } } } - return URLDecoder.decode(location, StandardCharsets.UTF_8.name()); + return URLDecoder.decode(newLocation, StandardCharsets.UTF_8.name()); } catch (URISyntaxException | UnsupportedEncodingException e) { throw new RuntimeException(e.getMessage(), e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 261f86cd5a6..01114899053 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -121,6 +121,9 @@ public abstract class ExternalCatalog public static final String FOUND_CONFLICTING = "Found conflicting"; public static final String ONLY_TEST_LOWER_CASE_TABLE_NAMES = "only_test_lower_case_table_names"; + // https://help.aliyun.com/zh/emr/emr-on-ecs/user-guide/use-rootpolicy-to-access-oss-hdfs?spm=a2c4g.11186623.help-menu-search-28066.d_0 + public static final String OOS_ROOT_POLICY = "oss.root_policy"; + // Properties that should not be shown in the `show create catalog` result public static final Set<String> HIDDEN_PROPERTIES = Sets.newHashSet( CREATE_TIME, diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java index 1e9e3e1ab88..e49302cef7f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java @@ -230,4 +230,78 @@ public class LocationPathTest { LocationPath p3 = new LocationPath("file://authority/abc/def", props); Assertions.assertEquals(Scheme.LOCAL, p3.getScheme()); } + + @Test + public void testNormalizedHdfsPath() { + // Test case 1: Path with special characters that need encoding + // Input: Path with spaces and special characters + // Expected: Characters are properly encoded while preserving / and : + String location = "hdfs://namenode/path with spaces/<special>chars"; + String host = ""; + boolean enableOssRootPolicy = false; + String result = LocationPath.normalizedHdfsPath(location, host, enableOssRootPolicy); + Assertions.assertEquals("hdfs://namenode/path with spaces/<special>chars", result); + + // Test case 2: Empty host in URI with host parameter provided + // Input: hdfs:///, host = nameservice + // Expected: hdfs://nameservice/ + location = "hdfs:///path/to/file"; + host = "nameservice"; + result = LocationPath.normalizedHdfsPath(location, host, false); + Assertions.assertEquals("hdfs://nameservice/path/to/file", result); + + // Test case 3: Broken prefix case (hdfs:/ instead of hdfs://) + // Input: hdfs:/path, host = nameservice + // Expected: hdfs://nameservice/path + location = "hdfs:/path/to/file"; + host = "nameservice"; + result = LocationPath.normalizedHdfsPath(location, host, false); + Assertions.assertEquals("hdfs://nameservice/path/to/file", result); + + // Test case 4: Empty host parameter with enableOssRootPolicy=true + // Input: hdfs://customized_host/path + // Expected: hdfs://customized_host/path (unchanged) + location = "hdfs://customized_host/path/to/file"; + host = ""; + result = LocationPath.normalizedHdfsPath(location, host, true); + Assertions.assertEquals("hdfs://customized_host/path/to/file", result); + + // Test case 5: Empty host parameter with enableOssRootPolicy=false + // Input: hdfs://host/path + // Expected: /path + location = "hdfs://customized_host/path/to/file"; + host = ""; + result = LocationPath.normalizedHdfsPath(location, host, false); + Assertions.assertEquals("/customized_host/path/to/file", result); + + // Test case 6: hdfs:/// with empty host parameter + // Input: hdfs:///path + // Expected: Exception since this format is not supported + location = "hdfs:///path/to/file"; + host = ""; + boolean exceptionThrown = false; + try { + LocationPath.normalizedHdfsPath(location, host, false); + } catch (RuntimeException e) { + exceptionThrown = true; + Assertions.assertTrue(e.getMessage().contains("Invalid location with empty host")); + } + Assertions.assertTrue(exceptionThrown); + + // Test case 7: Non-empty host in URI (regular case) + // Input: hdfs://existinghost/path + // Expected: hdfs://existinghost/path (unchanged) + location = "hdfs://existinghost/path/to/file"; + host = "nameservice"; + result = LocationPath.normalizedHdfsPath(location, host, false); + Assertions.assertEquals("hdfs://existinghost/path/to/file", result); + + // Test case 8: No valid host name + // Input: hdfs://hdfs_host/path + // Expected: hdfs://existinghost/path (unchanged) + location = "hdfs://hdfs_host/path/to/file"; + host = "nameservice"; + result = LocationPath.normalizedHdfsPath(location, host, false); + Assertions.assertEquals("hdfs://nameservice/hdfs_host/path/to/file", result); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org