This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8fc9d804790efdd6d97a112ecb58a2bc42bd65ed
Author: morrySnow <101034200+morrys...@users.noreply.github.com>
AuthorDate: Wed Feb 21 16:40:26 2024 +0800

    [compatibility](MySQL) update charset to utf8mb4, collation to 
utf8mb4_0900_bin (#31046)
    
    Doris's behaviour is more like utf8mb4 and utf8mb4_0900_bin than utf8 and 
utf8_general_ci
---
 be/src/exec/schema_scanner/schema_charsets_scanner.cpp     |  2 +-
 be/src/exec/schema_scanner/schema_collations_scanner.cpp   |  2 +-
 be/src/exec/schema_scanner/schema_schemata_scanner.cpp     |  4 ++--
 .../src/main/java/org/apache/doris/qe/SessionVariable.java | 14 +++++++-------
 .../src/main/java/org/apache/doris/qe/ShowExecutor.java    | 10 +++++-----
 .../java/org/apache/doris/analysis/SelectStmtTest.java     |  2 +-
 .../data/show_p0/test_show_create_table_and_views.out      |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp 
b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp
index 1b2b8a15581..9bd7ad7919c 100644
--- a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp
@@ -39,7 +39,7 @@ std::vector<SchemaScanner::ColumnDesc> 
SchemaCharsetsScanner::_s_css_columns = {
 };
 
 SchemaCharsetsScanner::CharsetStruct SchemaCharsetsScanner::_s_charsets[] = {
-        {"utf8", "utf8_general_ci", "UTF-8 Unicode", 3},
+        {"utf8mb4", "utf8mb4_0900_bin", "UTF-8 Unicode", 4},
         {nullptr, nullptr, nullptr, 0},
 };
 
diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.cpp 
b/be/src/exec/schema_scanner/schema_collations_scanner.cpp
index 377cff69486..812a8cff18e 100644
--- a/be/src/exec/schema_scanner/schema_collations_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_collations_scanner.cpp
@@ -41,7 +41,7 @@ std::vector<SchemaScanner::ColumnDesc> 
SchemaCollationsScanner::_s_cols_columns
 };
 
 SchemaCollationsScanner::CollationStruct 
SchemaCollationsScanner::_s_collations[] = {
-        {"utf8_general_ci", "utf8", 33, "Yes", "Yes", 1},
+        {"utf8mb4_0900_bin", "utf8mb4", 309, "Yes", "Yes", 1},
         {nullptr, nullptr, 0, nullptr, nullptr, 0},
 };
 
diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp 
b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp
index a465ab6550c..d6e82f611e8 100644
--- a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp
@@ -127,7 +127,7 @@ Status 
SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // DEFAULT_CHARACTER_SET_NAME
     {
-        std::string src = "utf8";
+        std::string src = "utf8mb4";
         StringRef str = StringRef(src.c_str(), src.size());
         for (int i = 0; i < dbs_num; ++i) {
             datas[i] = &str;
@@ -136,7 +136,7 @@ Status 
SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // DEFAULT_COLLATION_NAME
     {
-        std::string src = "utf8_general_ci";
+        std::string src = "utf8mb4_0900_bin";
         StringRef str = StringRef(src.c_str(), src.size());
         for (int i = 0; i < dbs_num; ++i) {
             datas[i] = &str;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 4c8a5006829..4b4d108650b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -650,20 +650,20 @@ public class SessionVariable implements Serializable, 
Writable {
 
     // this is used to make c3p0 library happy
     @VariableMgr.VarAttr(name = CHARACTER_SET_CLIENT)
-    public String charsetClient = "utf8";
+    public String charsetClient = "utf8mb4";
     @VariableMgr.VarAttr(name = CHARACTER_SET_CONNNECTION)
-    public String charsetConnection = "utf8";
+    public String charsetConnection = "utf8mb4";
     @VariableMgr.VarAttr(name = CHARACTER_SET_RESULTS)
-    public String charsetResults = "utf8";
+    public String charsetResults = "utf8mb4";
     @VariableMgr.VarAttr(name = CHARACTER_SET_SERVER)
-    public String charsetServer = "utf8";
+    public String charsetServer = "utf8mb4";
     @VariableMgr.VarAttr(name = COLLATION_CONNECTION)
-    public String collationConnection = "utf8_general_ci";
+    public String collationConnection = "utf8mb4_0900_bin";
     @VariableMgr.VarAttr(name = COLLATION_DATABASE)
-    public String collationDatabase = "utf8_general_ci";
+    public String collationDatabase = "utf8mb4_0900_bin";
 
     @VariableMgr.VarAttr(name = COLLATION_SERVER)
-    public String collationServer = "utf8_general_ci";
+    public String collationServer = "utf8mb4_0900_bin";
 
     // this is used to make c3p0 library happy
     @VariableMgr.VarAttr(name = SQL_AUTO_IS_NULL)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index c030dfbc66b..58b96cde269 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -1035,7 +1035,7 @@ public class ShowExecutor {
             }
 
             if (table instanceof View) {
-                rows.add(Lists.newArrayList(table.getName(), 
createTableStmt.get(0), "utf8", "utf8_general_ci"));
+                rows.add(Lists.newArrayList(table.getName(), 
createTableStmt.get(0), "utf8mb4", "utf8mb4_0900_bin"));
                 resultSet = new 
ShowResultSet(ShowCreateTableStmt.getViewMetaData(), rows);
             } else {
                 if (showStmt.isView()) {
@@ -1661,10 +1661,10 @@ public class ShowExecutor {
         ShowCollationStmt showStmt = (ShowCollationStmt) stmt;
         List<List<String>> rows = Lists.newArrayList();
         List<String> row = Lists.newArrayList();
-        // | utf8_general_ci | utf8 | 33 | Yes | Yes | 1 |
-        row.add("utf8_general_ci");
-        row.add("utf8");
-        row.add("33");
+        // | utf8mb4_0900_bin | utf8mb4 | 309 | Yes | Yes | 1 |
+        row.add("utf8mb4_0900_bin");
+        row.add("utf8mb4");
+        row.add("309");
         row.add("Yes");
         row.add("Yes");
         row.add("1");
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
index 76bbdec5fdb..4fa49376f7d 100755
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
@@ -479,7 +479,7 @@ public class SelectStmtTest {
 
                 + "character_set_name,\n"
 
-                + "is_default collate utf8_general_ci = 'Yes' as is_default\n"
+                + "is_default collate utf8mb4_0900_bin = 'Yes' as is_default\n"
                 + "from information_schema.collations";
         dorisAssert.query(sql).explainQuery();
     }
diff --git a/regression-test/data/show_p0/test_show_create_table_and_views.out 
b/regression-test/data/show_p0/test_show_create_table_and_views.out
index fe4d8ea9fca..c6b261e5b6b 100644
--- a/regression-test/data/show_p0/test_show_create_table_and_views.out
+++ b/regression-test/data/show_p0/test_show_create_table_and_views.out
@@ -24,7 +24,7 @@ show_create_table_and_views_table     CREATE TABLE 
`show_create_table_and_views_tabl
 3      1
 
 -- !show --
-show_create_table_and_views_view       CREATE VIEW 
`show_create_table_and_views_view` COMMENT 'VIEW' AS SELECT `user_id` AS 
`user_id`, `cost` AS `cost` FROM 
`show_create_table_and_views_db`.`show_create_table_and_views_table` WHERE 
(`good_id` = 2);       utf8    utf8_general_ci
+show_create_table_and_views_view       CREATE VIEW 
`show_create_table_and_views_view` COMMENT 'VIEW' AS SELECT `user_id` AS 
`user_id`, `cost` AS `cost` FROM 
`show_create_table_and_views_db`.`show_create_table_and_views_table` WHERE 
(`good_id` = 2);       utf8mb4 utf8mb4_0900_bin
 
 -- !select --
 1      47


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to