This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8fc9d804790efdd6d97a112ecb58a2bc42bd65ed Author: morrySnow <101034200+morrys...@users.noreply.github.com> AuthorDate: Wed Feb 21 16:40:26 2024 +0800 [compatibility](MySQL) update charset to utf8mb4, collation to utf8mb4_0900_bin (#31046) Doris's behaviour is more like utf8mb4 and utf8mb4_0900_bin than utf8 and utf8_general_ci --- be/src/exec/schema_scanner/schema_charsets_scanner.cpp | 2 +- be/src/exec/schema_scanner/schema_collations_scanner.cpp | 2 +- be/src/exec/schema_scanner/schema_schemata_scanner.cpp | 4 ++-- .../src/main/java/org/apache/doris/qe/SessionVariable.java | 14 +++++++------- .../src/main/java/org/apache/doris/qe/ShowExecutor.java | 10 +++++----- .../java/org/apache/doris/analysis/SelectStmtTest.java | 2 +- .../data/show_p0/test_show_create_table_and_views.out | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp index 1b2b8a15581..9bd7ad7919c 100644 --- a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp @@ -39,7 +39,7 @@ std::vector<SchemaScanner::ColumnDesc> SchemaCharsetsScanner::_s_css_columns = { }; SchemaCharsetsScanner::CharsetStruct SchemaCharsetsScanner::_s_charsets[] = { - {"utf8", "utf8_general_ci", "UTF-8 Unicode", 3}, + {"utf8mb4", "utf8mb4_0900_bin", "UTF-8 Unicode", 4}, {nullptr, nullptr, nullptr, 0}, }; diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.cpp b/be/src/exec/schema_scanner/schema_collations_scanner.cpp index 377cff69486..812a8cff18e 100644 --- a/be/src/exec/schema_scanner/schema_collations_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_collations_scanner.cpp @@ -41,7 +41,7 @@ std::vector<SchemaScanner::ColumnDesc> SchemaCollationsScanner::_s_cols_columns }; SchemaCollationsScanner::CollationStruct SchemaCollationsScanner::_s_collations[] = { - {"utf8_general_ci", "utf8", 33, "Yes", "Yes", 1}, + {"utf8mb4_0900_bin", "utf8mb4", 309, "Yes", "Yes", 1}, {nullptr, nullptr, 0, nullptr, nullptr, 0}, }; diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp index a465ab6550c..d6e82f611e8 100644 --- a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp @@ -127,7 +127,7 @@ Status SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) { } // DEFAULT_CHARACTER_SET_NAME { - std::string src = "utf8"; + std::string src = "utf8mb4"; StringRef str = StringRef(src.c_str(), src.size()); for (int i = 0; i < dbs_num; ++i) { datas[i] = &str; @@ -136,7 +136,7 @@ Status SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) { } // DEFAULT_COLLATION_NAME { - std::string src = "utf8_general_ci"; + std::string src = "utf8mb4_0900_bin"; StringRef str = StringRef(src.c_str(), src.size()); for (int i = 0; i < dbs_num; ++i) { datas[i] = &str; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 4c8a5006829..4b4d108650b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -650,20 +650,20 @@ public class SessionVariable implements Serializable, Writable { // this is used to make c3p0 library happy @VariableMgr.VarAttr(name = CHARACTER_SET_CLIENT) - public String charsetClient = "utf8"; + public String charsetClient = "utf8mb4"; @VariableMgr.VarAttr(name = CHARACTER_SET_CONNNECTION) - public String charsetConnection = "utf8"; + public String charsetConnection = "utf8mb4"; @VariableMgr.VarAttr(name = CHARACTER_SET_RESULTS) - public String charsetResults = "utf8"; + public String charsetResults = "utf8mb4"; @VariableMgr.VarAttr(name = CHARACTER_SET_SERVER) - public String charsetServer = "utf8"; + public String charsetServer = "utf8mb4"; @VariableMgr.VarAttr(name = COLLATION_CONNECTION) - public String collationConnection = "utf8_general_ci"; + public String collationConnection = "utf8mb4_0900_bin"; @VariableMgr.VarAttr(name = COLLATION_DATABASE) - public String collationDatabase = "utf8_general_ci"; + public String collationDatabase = "utf8mb4_0900_bin"; @VariableMgr.VarAttr(name = COLLATION_SERVER) - public String collationServer = "utf8_general_ci"; + public String collationServer = "utf8mb4_0900_bin"; // this is used to make c3p0 library happy @VariableMgr.VarAttr(name = SQL_AUTO_IS_NULL) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index c030dfbc66b..58b96cde269 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -1035,7 +1035,7 @@ public class ShowExecutor { } if (table instanceof View) { - rows.add(Lists.newArrayList(table.getName(), createTableStmt.get(0), "utf8", "utf8_general_ci")); + rows.add(Lists.newArrayList(table.getName(), createTableStmt.get(0), "utf8mb4", "utf8mb4_0900_bin")); resultSet = new ShowResultSet(ShowCreateTableStmt.getViewMetaData(), rows); } else { if (showStmt.isView()) { @@ -1661,10 +1661,10 @@ public class ShowExecutor { ShowCollationStmt showStmt = (ShowCollationStmt) stmt; List<List<String>> rows = Lists.newArrayList(); List<String> row = Lists.newArrayList(); - // | utf8_general_ci | utf8 | 33 | Yes | Yes | 1 | - row.add("utf8_general_ci"); - row.add("utf8"); - row.add("33"); + // | utf8mb4_0900_bin | utf8mb4 | 309 | Yes | Yes | 1 | + row.add("utf8mb4_0900_bin"); + row.add("utf8mb4"); + row.add("309"); row.add("Yes"); row.add("Yes"); row.add("1"); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index 76bbdec5fdb..4fa49376f7d 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -479,7 +479,7 @@ public class SelectStmtTest { + "character_set_name,\n" - + "is_default collate utf8_general_ci = 'Yes' as is_default\n" + + "is_default collate utf8mb4_0900_bin = 'Yes' as is_default\n" + "from information_schema.collations"; dorisAssert.query(sql).explainQuery(); } diff --git a/regression-test/data/show_p0/test_show_create_table_and_views.out b/regression-test/data/show_p0/test_show_create_table_and_views.out index fe4d8ea9fca..c6b261e5b6b 100644 --- a/regression-test/data/show_p0/test_show_create_table_and_views.out +++ b/regression-test/data/show_p0/test_show_create_table_and_views.out @@ -24,7 +24,7 @@ show_create_table_and_views_table CREATE TABLE `show_create_table_and_views_tabl 3 1 -- !show -- -show_create_table_and_views_view CREATE VIEW `show_create_table_and_views_view` COMMENT 'VIEW' AS SELECT `user_id` AS `user_id`, `cost` AS `cost` FROM `show_create_table_and_views_db`.`show_create_table_and_views_table` WHERE (`good_id` = 2); utf8 utf8_general_ci +show_create_table_and_views_view CREATE VIEW `show_create_table_and_views_view` COMMENT 'VIEW' AS SELECT `user_id` AS `user_id`, `cost` AS `cost` FROM `show_create_table_and_views_db`.`show_create_table_and_views_table` WHERE (`good_id` = 2); utf8mb4 utf8mb4_0900_bin -- !select -- 1 47 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org