This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 948540c0438 [opt](identifer) let unicode format as a superset of latin format (#48078) 948540c0438 is described below commit 948540c0438323309cf232da1e976c68ba1902a2 Author: morrySnow <zhangwen...@selectdb.com> AuthorDate: Mon Feb 24 14:26:36 2025 +0800 [opt](identifer) let unicode format as a superset of latin format (#48078) --- .../java/org/apache/doris/common/FeNameFormat.java | 6 +- .../org/apache/doris/common/FeNameFormatTest.java | 110 ++++++++++++++++++--- 2 files changed, 97 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java index 8c1bc162ad7..234179823dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java @@ -36,15 +36,15 @@ public class FeNameFormat { private static final String UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$"; private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$"; private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$"; - private static final String COLUMN_NAME_REGEX = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,256}$"; + private static final String COLUMN_NAME_REGEX = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{1,256}$"; - private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1,128}$"; + private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1," + Config.label_regex_length + "}$"; private static final String UNICODE_COMMON_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$"; private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$"; private static final String UNICODE_TABLE_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$"; private static final String UNICODE_USER_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$"; private static final String UNICODE_COLUMN_NAME_REGEX - = "^[.a-zA-Z0-9_+-/?@#$%^&*\\s,:\\p{L}]{0,256}$"; + = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:\\p{L}]{1,256}$"; public static final String FORBIDDEN_PARTITION_NAME = "placeholder_"; diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java index b6e5e68ee83..32e2a553b94 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java @@ -17,27 +17,24 @@ package org.apache.doris.common; -import org.junit.Test; +import org.apache.doris.qe.VariableMgr; + +import com.google.common.collect.Lists; +import org.apache.ivy.util.StringUtils; +import org.junit.jupiter.api.Test; + +import java.util.List; public class FeNameFormatTest { @Test - public void testCheckColumnName() { + void testLabelName() { // check label use correct regex, begin with '-' is different from others ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkLabel("-lable")); + } - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("_id")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("__id")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("___id_")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("@timestamp")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("@timestamp#")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp*")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp.1")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("timestamp.#")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("?id_")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("#id_")); - ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName("$id_")); + @Test + void testTableName() { // length 64 String tblName = "test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq"; ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkTableName(tblName)); @@ -46,19 +43,100 @@ public class FeNameFormatTest { ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName(largeTblName)); // check table name use correct regex, not begin with '-' ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkTableName("-" + tblName)); + } + + @Test + void testCheckColumnName() { + List<String> alwaysValid = Lists.newArrayList( + "_id", + "_id", + "_ id", + " _id", + "__id", + "___id", + "___id_", + "@timestamp", + "@timestamp#", + "timestamp*", + "timestamp.1", + "timestamp.#", + "?id_", + "#id_", + "$id_", + "a-zA-Z0-9.+-/?@#$%^&*\" ,:" + ); + + List<String> alwaysInvalid = Lists.newArrayList( + // inner column prefix + "mv_", + "mva_", + "__doris_shadow_", + + // invalid + "", + "\\", + "column\\", + StringUtils.repeat("a", 257) + ); + + List<String> unicodeValid = Lists.newArrayList( + "中文", + "語言", + "язык", + "언어", + "لغة", + "ภาษา", + "שפה", + "γλώσσα", + "ენა", + "げんご" + ); + boolean defaultUnicode = VariableMgr.getDefaultSessionVariable().enableUnicodeNameSupport; + List<Boolean> enableUnicode = Lists.newArrayList(false, true); + try { + for (Boolean unicode : enableUnicode) { + VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(unicode); + for (String s : alwaysValid) { + ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName(s)); + } + for (String s : alwaysInvalid) { + ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName(s)); + } + for (String s : unicodeValid) { + if (unicode) { + ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkColumnName(s)); + } else { + ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkColumnName(s)); + } + } + } + } finally { + VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(defaultUnicode); + } + } + + @Test + void testUserName() { ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkUserName("a.b")); // check user name use correct regex, not begin with '.' ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkUserName(".a.b")); + } + + @Test + void testCommonName() { + String commonName = "test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq"; // check common name use correct regex, length 65 - ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", tblName + "t")); + ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", commonName + "t")); ExceptionChecker.expectThrows(AnalysisException.class, () -> FeNameFormat.checkCommonName("fakeType", "_commonName")); ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkCommonName("fakeType", "common-Name")); ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkCommonName("fakeType", "commonName-")); + } + @Test + void testOutfileName() { // check success file name prefix ExceptionChecker.expectThrowsNoException(() -> FeNameFormat.checkOutfileSuccessFileName("fakeType", "_success")); } - } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org