This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 2c927224acc branch-3.0: [opt](identifer) let unicode format as a superset of latin format #48078 (#49808) 2c927224acc is described below commit 2c927224accf207ea03a192794049f6da310faa7 Author: morrySnow <zhangwen...@selectdb.com> AuthorDate: Sat Apr 5 06:29:59 2025 +0800 branch-3.0: [opt](identifer) let unicode format as a superset of latin format #48078 (#49808) pick from master #48078 --- .../java/org/apache/doris/common/FeNameFormat.java | 6 +- .../org/apache/doris/common/FeNameFormatTest.java | 110 ++++++++++++++++++--- 2 files changed, 97 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java index 363ec175f23..fbbd670c9df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java @@ -37,15 +37,15 @@ public class FeNameFormat { private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$"; private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$"; private static final String REPOSITORY_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]{0,255}$"; - private static final String COLUMN_NAME_REGEX = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,256}$"; + private static final String COLUMN_NAME_REGEX = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{1,256}$"; - private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1,128}$"; + private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1," + Config.label_regex_length + "}$"; private static final String UNICODE_COMMON_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$"; private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$"; private static final String UNICODE_TABLE_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$"; private static final String UNICODE_USER_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$"; private static final String UNICODE_COLUMN_NAME_REGEX - = "^[.a-zA-Z0-9_+-/?@#$%^&*\\s,:\\p{L}]{0,256}$"; + = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:\\p{L}]{1,256}$"; private static final String UNICODE_REPOSITORY_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,255}$"; public static final String FORBIDDEN_PARTITION_NAME = "placeholder_"; diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java index b6e5e68ee83..32e2a553b94 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java @@ -17,27 +17,24 @@ package org.apache.doris.common; -import org.junit.Test; +import org.apache.doris.qe.VariableMgr; + +import com.google.common.collect.Lists; +import org.apache.ivy.util.StringUtils; +import org.junit.jupiter.api.Test; + +import java.util.List; public class FeNameFormatTest { @Test - public void testCheckColumnName() { + void testLabelName() { // check label use correct regex, begin with '-' is different from others ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkLabel("-lable")); + } - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("_id")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("__id")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id_")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("@timestamp")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("@timestamp#")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp*")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp.1")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp.#")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("?id_")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("#id_")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("$id_")); + @Test + void testTableName() { // length 64 String tblName = "test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq"; ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkTableName(tblName)); @@ -46,19 +43,100 @@ public class FeNameFormatTest { ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName(largeTblName)); // check table name use correct regex, not begin with '-' ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName("-" + tblName)); + } + + @Test + void testCheckColumnName() { + List<String> alwaysValid = Lists.newArrayList( + "_id", + "_id", + "_ id", + " _id", + "__id", + "___id", + "___id_", + "@timestamp", + "@timestamp#", + "timestamp*", + "timestamp.1", + "timestamp.#", + "?id_", + "#id_", + "$id_", + "a-zA-Z0-9.+-/?@#$%^&*\" ,:" + ); + + List<String> alwaysInvalid = Lists.newArrayList( + // inner column prefix + "mv_", + "mva_", + "__doris_shadow_", + + // invalid + "", + "\\", + "column\\", + StringUtils.repeat("a", 257) + ); + + List<String> unicodeValid = Lists.newArrayList( + "中文", + "語言", + "язык", + "언어", + "لغة", + "ภาษา", + "שפה", + "γλώσσα", + "ენა", + "げんご" + ); + boolean defaultUnicode = VariableMgr.getDefaultSessionVariable().enableUnicodeNameSupport; + List<Boolean> enableUnicode = Lists.newArrayList(false, true); + try { + for (Boolean unicode : enableUnicode) { + VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(unicode); + for (String s : alwaysValid) { + ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName(s)); + } + for (String s : alwaysInvalid) { + ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName(s)); + } + for (String s : unicodeValid) { + if (unicode) { + ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName(s)); + } else { + ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName(s)); + } + } + } + } finally { + VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(defaultUnicode); + } + } + + @Test + void testUserName() { ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkUserName("a.b")); // check user name use correct regex, not begin with '.' ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkUserName(".a.b")); + } + + @Test + void testCommonName() { + String commonName = "test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq"; // check common name use correct regex, length 65 - ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", tblName + "t")); + ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", commonName + "t")); ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", "_commonName")); ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkCommonName("fakeType", "common-Name")); ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkCommonName("fakeType", "commonName-")); + } + @Test + void testOutfileName() { // check success file name prefix ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkOutfileSuccessFileName("fakeType", "_success")); } - } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org