This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 2c927224acc branch-3.0: [opt](identifer) let unicode format as a 
superset of latin format #48078 (#49808)
2c927224acc is described below

commit 2c927224accf207ea03a192794049f6da310faa7
Author: morrySnow <zhangwen...@selectdb.com>
AuthorDate: Sat Apr 5 06:29:59 2025 +0800

    branch-3.0: [opt](identifer) let unicode format as a superset of latin 
format #48078 (#49808)
    
    pick from master #48078
---
 .../java/org/apache/doris/common/FeNameFormat.java |   6 +-
 .../org/apache/doris/common/FeNameFormatTest.java  | 110 ++++++++++++++++++---
 2 files changed, 97 insertions(+), 19 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
index 363ec175f23..fbbd670c9df 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
@@ -37,15 +37,15 @@ public class FeNameFormat {
     private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
     private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
     private static final String REPOSITORY_NAME_REGEX = 
"^[a-zA-Z][a-zA-Z0-9-_]{0,255}$";
-    private static final String COLUMN_NAME_REGEX = 
"^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,256}$";
+    private static final String COLUMN_NAME_REGEX = 
"^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{1,256}$";
 
-    private static final String UNICODE_LABEL_REGEX = 
"^[-_A-Za-z0-9:\\p{L}]{1,128}$";
+    private static final String UNICODE_LABEL_REGEX = 
"^[-_A-Za-z0-9:\\p{L}]{1," + Config.label_regex_length + "}$";
     private static final String UNICODE_COMMON_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
     private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX = 
"^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
     private static final String UNICODE_TABLE_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
     private static final String UNICODE_USER_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
     private static final String UNICODE_COLUMN_NAME_REGEX
-            = "^[.a-zA-Z0-9_+-/?@#$%^&*\\s,:\\p{L}]{0,256}$";
+            = "^[.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:\\p{L}]{1,256}$";
     private static final String UNICODE_REPOSITORY_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,255}$";
 
     public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
index b6e5e68ee83..32e2a553b94 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/FeNameFormatTest.java
@@ -17,27 +17,24 @@
 
 package org.apache.doris.common;
 
-import org.junit.Test;
+import org.apache.doris.qe.VariableMgr;
+
+import com.google.common.collect.Lists;
+import org.apache.ivy.util.StringUtils;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
 
 public class FeNameFormatTest {
 
     @Test
-    public void testCheckColumnName() {
+    void testLabelName() {
         // check label use correct regex, begin with '-' is different from 
others
         ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkLabel("-lable"));
+    }
 
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("_id"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("__id"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("___id"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("___id_"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("@timestamp"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("@timestamp#"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("timestamp*"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("timestamp.1"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("timestamp.#"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("?id_"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("#id_"));
-        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName("$id_"));
+    @Test
+    void testTableName() {
         // length 64
         String tblName = 
"test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
         ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkTableName(tblName));
@@ -46,19 +43,100 @@ public class FeNameFormatTest {
         ExceptionChecker.expectThrows(AnalysisException.class, () -> 
FeNameFormat.checkTableName(largeTblName));
         // check table name use correct regex, not begin with '-'
         ExceptionChecker.expectThrows(AnalysisException.class, () -> 
FeNameFormat.checkTableName("-" + tblName));
+    }
+
+    @Test
+    void testCheckColumnName() {
+        List<String> alwaysValid = Lists.newArrayList(
+                "_id",
+                "_id",
+                "_ id",
+                " _id",
+                "__id",
+                "___id",
+                "___id_",
+                "@timestamp",
+                "@timestamp#",
+                "timestamp*",
+                "timestamp.1",
+                "timestamp.#",
+                "?id_",
+                "#id_",
+                "$id_",
+                "a-zA-Z0-9.+-/?@#$%^&*\" ,:"
+        );
+
+        List<String> alwaysInvalid = Lists.newArrayList(
+                // inner column prefix
+                "mv_",
+                "mva_",
+                "__doris_shadow_",
+
+                // invalid
+                "",
+                "\\",
+                "column\\",
+                StringUtils.repeat("a", 257)
+        );
+
+        List<String> unicodeValid = Lists.newArrayList(
+                "中文",
+                "語言",
+                "язык",
+                "언어",
+                "لغة",
+                "ภาษา",
+                "שפה",
+                "γλώσσα",
+                "ენა",
+                "げんご"
+        );
 
+        boolean defaultUnicode = 
VariableMgr.getDefaultSessionVariable().enableUnicodeNameSupport;
+        List<Boolean> enableUnicode = Lists.newArrayList(false, true);
+        try {
+            for (Boolean unicode : enableUnicode) {
+                
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(unicode);
+                for (String s : alwaysValid) {
+                    ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName(s));
+                }
+                for (String s : alwaysInvalid) {
+                    ExceptionChecker.expectThrows(AnalysisException.class, () 
-> FeNameFormat.checkColumnName(s));
+                }
+                for (String s : unicodeValid) {
+                    if (unicode) {
+                        ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkColumnName(s));
+                    } else {
+                        ExceptionChecker.expectThrows(AnalysisException.class, 
() -> FeNameFormat.checkColumnName(s));
+                    }
+                }
+            }
+        } finally {
+            
VariableMgr.getDefaultSessionVariable().setEnableUnicodeNameSupport(defaultUnicode);
+        }
+    }
+
+    @Test
+    void testUserName() {
         ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkUserName("a.b"));
         // check user name use correct regex, not begin with '.'
         ExceptionChecker.expectThrows(AnalysisException.class, () -> 
FeNameFormat.checkUserName(".a.b"));
+    }
+
+    @Test
+    void testCommonName() {
+        String commonName = 
"test_sys_partition_list_basic_test_list_partition_bigint_tb-uniq";
 
         // check common name use correct regex, length 65
-        ExceptionChecker.expectThrows(AnalysisException.class, () -> 
FeNameFormat.checkCommonName("fakeType", tblName + "t"));
+        ExceptionChecker.expectThrows(AnalysisException.class, () -> 
FeNameFormat.checkCommonName("fakeType", commonName + "t"));
         ExceptionChecker.expectThrows(AnalysisException.class, () -> 
FeNameFormat.checkCommonName("fakeType", "_commonName"));
         ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkCommonName("fakeType", "common-Name"));
         ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkCommonName("fakeType", "commonName-"));
+    }
 
+    @Test
+    void testOutfileName() {
         // check success file name prefix
         ExceptionChecker.expectThrowsNoException(() -> 
FeNameFormat.checkOutfileSuccessFileName("fakeType", "_success"));
     }
-
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to