This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b75f4c97f3 [function](string) support char function (#18878) b75f4c97f3 is described below commit b75f4c97f3c9668be6b7de62c33d33114b2a0e57 Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Sat Apr 22 08:36:48 2023 +0800 [function](string) support char function (#18878) * [function](string) support char function * fix --- be/src/vec/functions/function_string.cpp | 1 + be/src/vec/functions/function_string.h | 310 +++++++++++++++++++++ .../sql-functions/string-functions/char.md | 69 +++++ docs/sidebars.json | 1 + .../sql-functions/string-functions/char.md | 69 +++++ fe/fe-core/src/main/cup/sql_parser.cup | 18 +- .../apache/doris/analysis/FunctionCallExpr.java | 17 ++ .../apache/doris/catalog/CreateFunctionTest.java | 12 +- gensrc/script/doris_builtins_functions.py | 2 + .../string_functions/test_string_function.out | 95 +++++++ .../string_functions/test_string_function.groovy | 69 +++++ 11 files changed, 656 insertions(+), 7 deletions(-) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 280eb93b2a..4ece11822f 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -943,6 +943,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionNullOrEmpty>(); factory.register_function<FunctionNotNullOrEmpty>(); factory.register_function<FunctionStringConcat>(); + factory.register_function<FunctionIntToChar>(); factory.register_function<FunctionStringElt>(); factory.register_function<FunctionStringConcatWs>(); factory.register_function<FunctionStringAppendTrailingCharIfAbsent>(); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 5bc415dd2a..bc43011a14 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -42,6 +42,7 @@ #include "runtime/runtime_state.h" #include "runtime/string_search.hpp" #include "util/string_util.h" +#include "util/utf8_check.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" @@ -2765,4 +2766,313 @@ public: *out_len = dest - out; } }; + +// refer to https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char +// UTF8 +// 多 0xe5, 0xa4, 0x9a 0xb6, 0xe0 +// 睿 0xe7, 0x9d, 0xbf 0xee, 0xa3 +// 丝 0xe4, 0xb8, 0x9d 0xcb, 0xbf 14989469 +// MySQL behaviour: +// mysql> select char(0xe4, 0xb8, 0x9d using utf8); +// +-----------------------------------+ +// | char(0xe4, 0xb8, 0x9d using utf8) | +// +-----------------------------------+ +// | 丝 | +// +-----------------------------------+ +// 1 row in set, 1 warning (0.00 sec) +// mysql> select char(14989469 using utf8); +// +---------------------------+ +// | char(14989469 using utf8) | +// +---------------------------+ +// | 丝 | +// +---------------------------+ +// 1 row in set, 1 warning (0.00 sec) +// mysql> select char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8); +// +---------------------------------------------------------------------------------------------+ +// | char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8) | +// +---------------------------------------------------------------------------------------------+ +// | 多睿丝Doris | +// +---------------------------------------------------------------------------------------------+ +// mysql> select char(68, 111, 114, 0, 105, null, 115 using utf8); +// +--------------------------------------------------+ +// | char(68, 111, 114, 0, 105, null, 115 using utf8) | +// +--------------------------------------------------+ +// | Dor is | +// +--------------------------------------------------+ + +// return null: +// mysql> select char(255 using utf8); +// +----------------------+ +// | char(255 using utf8) | +// +----------------------+ +// | NULL | +// +----------------------+ +// 1 row in set, 2 warnings (0.00 sec) +// +// mysql> show warnings; +// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// | Level | Code | Message | +// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. | +// | Warning | 1300 | Invalid utf8mb3 character string: 'FF' | +// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// 2 rows in set (0.01 sec) + +// max int value: +// mysql> select char(18446744073709551615); +// +--------------------------------------------------------+ +// | char(18446744073709551615) | +// +--------------------------------------------------------+ +// | 0xFFFFFFFF | +// +--------------------------------------------------------+ +// 1 row in set (0.00 sec) +// +// mysql> select char(18446744073709551616); +// +--------------------------------------------------------+ +// | char(18446744073709551616) | +// +--------------------------------------------------------+ +// | 0xFFFFFFFF | +// +--------------------------------------------------------+ +// 1 row in set, 1 warning (0.00 sec) +// +// mysql> show warnings; +// +---------+------+-----------------------------------------------------------+ +// | Level | Code | Message | +// +---------+------+-----------------------------------------------------------+ +// | Warning | 1292 | Truncated incorrect DECIMAL value: '18446744073709551616' | +// +---------+------+-----------------------------------------------------------+ +// 1 row in set (0.00 sec) + +// table columns: +// mysql> select * from t; +// +------+------+------+ +// | f1 | f2 | f3 | +// +------+------+------+ +// | 228 | 184 | 157 | +// | 228 | 184 | 0 | +// | 228 | 184 | 99 | +// | 99 | 228 | 184 | +// +------+------+------+ +// 4 rows in set (0.00 sec) +// +// mysql> select char(f1, f2, f3 using utf8) from t; +// +-----------------------------+ +// | char(f1, f2, f3 using utf8) | +// +-----------------------------+ +// | 丝 | +// | | +// | | +// | c | +// +-----------------------------+ +// 4 rows in set, 4 warnings (0.00 sec) +// +// mysql> show warnings; +// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// | Level | Code | Message | +// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. | +// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B800' | +// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B863' | +// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B8' | +// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +class FunctionIntToChar : public IFunction { +public: + static constexpr auto name = "char"; + static FunctionPtr create() { return std::make_shared<FunctionIntToChar>(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 0; } + bool is_variadic() const override { return true; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + bool use_default_implementation_for_nulls() const override { return false; } + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + DCHECK_GE(arguments.size(), 2); + + int argument_size = arguments.size(); + std::vector<ColumnPtr> str_columns(argument_size - 1); + std::vector<const ColumnString::Offsets*> offsets_list(argument_size - 1); + std::vector<const ColumnString::Chars*> chars_list(argument_size - 1); + + // convert each argument columns to column string and then concat the string columns + for (size_t i = 1; i < argument_size; ++i) { + if (auto const_column = check_and_get_column<const ColumnConst>( + *block.get_by_position(arguments[i]).column)) { + // ignore null + if (const_column->only_null()) { + str_columns[i - 1] = nullptr; + } else { + auto str_column = ColumnString::create(); + auto& chars = str_column->get_chars(); + auto& offsets = str_column->get_offsets(); + offsets.resize(1); + const ColumnVector<Int32>* int_column; + if (auto* nullable = check_and_get_column<const ColumnNullable>( + const_column->get_data_column())) { + int_column = assert_cast<const ColumnVector<Int32>*>( + nullable->get_nested_column_ptr().get()); + } else { + int_column = assert_cast<const ColumnVector<Int32>*>( + &const_column->get_data_column()); + } + int int_val = int_column->get_int(0); + integer_to_char_(0, &int_val, chars, offsets); + str_columns[i - 1] = + ColumnConst::create(std::move(str_column), input_rows_count); + } + offsets_list[i - 1] = nullptr; + chars_list[i - 1] = nullptr; + } else { + auto str_column = ColumnString::create(); + auto& chars = str_column->get_chars(); + auto& offsets = str_column->get_offsets(); + // data.resize(input_rows_count); + offsets.resize(input_rows_count); + + if (auto nullable = check_and_get_column<const ColumnNullable>( + *block.get_by_position(arguments[i]).column)) { + const auto* int_data = assert_cast<const ColumnVector<Int32>*>( + nullable->get_nested_column_ptr().get()) + ->get_data() + .data(); + const auto* null_map_data = nullable->get_null_map_data().data(); + for (size_t j = 0; j < input_rows_count; ++j) { + // ignore null + if (null_map_data[j]) { + offsets[j] = offsets[j - 1]; + } else { + integer_to_char_(j, int_data + j, chars, offsets); + } + } + } else { + const auto* int_data = assert_cast<const ColumnVector<Int32>*>( + block.get_by_position(arguments[i]).column.get()) + ->get_data() + .data(); + for (size_t j = 0; j < input_rows_count; ++j) { + integer_to_char_(j, int_data + j, chars, offsets); + } + } + offsets_list[i - 1] = &str_column->get_offsets(); + chars_list[i - 1] = &str_column->get_chars(); + str_columns[i - 1] = std::move(str_column); + } + } + + auto null_map = ColumnUInt8::create(input_rows_count, 0); + auto res = ColumnString::create(); + auto& res_data = res->get_chars(); + auto& res_offset = res->get_offsets(); + + size_t res_reserve_size = 0; + for (size_t i = 0; i < argument_size - 1; ++i) { + if (!str_columns[i]) { + continue; + } + if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[i])) { + auto str_column = + assert_cast<const ColumnString*>(&(const_column->get_data_column())); + auto& offsets = str_column->get_offsets(); + res_reserve_size += (offsets[0] - offsets[-1]) * input_rows_count; + } else { + for (size_t j = 0; j < input_rows_count; ++j) { + size_t append = (*offsets_list[i])[j] - (*offsets_list[i])[j - 1]; + // check whether the output might overflow(unlikely) + if (UNLIKELY(UINT_MAX - append < res_reserve_size)) { + return Status::BufferAllocFailed( + "function char output is too large to allocate"); + } + res_reserve_size += append; + } + } + } + if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) { + return Status::BufferAllocFailed("function char output is too large to allocate"); + } + + res_data.resize(res_reserve_size); + res_offset.resize(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) { + int current_length = 0; + for (size_t j = 0; j < argument_size - 1; ++j) { + if (!str_columns[j]) { + continue; + } + if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[j])) { + auto str_column = + assert_cast<const ColumnString*>(&(const_column->get_data_column())); + auto data_item = str_column->get_data_at(0); + memcpy_small_allow_read_write_overflow15( + &res_data[res_offset[i - 1]] + current_length, data_item.data, + data_item.size); + current_length += data_item.size; + } else { + auto& current_offsets = *offsets_list[j]; + auto& current_chars = *chars_list[j]; + + int size = current_offsets[i] - current_offsets[i - 1]; + if (size > 0) { + memcpy_small_allow_read_write_overflow15( + &res_data[res_offset[i - 1]] + current_length, + ¤t_chars[current_offsets[i - 1]], size); + current_length += size; + } + } + } + res_offset[i] = res_offset[i - 1] + current_length; + } + + // validate utf8 + auto* null_map_data = null_map->get_data().data(); + for (size_t i = 0; i < input_rows_count; ++i) { + if (!validate_utf8((const char*)(&res_data[res_offset[i - 1]]), + res_offset[i] - res_offset[i - 1])) { + null_map_data[i] = 1; + } + } + + block.get_by_position(result).column = + ColumnNullable::create(std::move(res), std::move(null_map)); + return Status::OK(); + } + +private: + void integer_to_char_(int line_num, const int* num, ColumnString::Chars& chars, + IColumn::Offsets& offsets) { + if (0 == *num) { + chars.push_back(' '); + offsets[line_num] = offsets[line_num - 1] + 1; + return; + } + const char* bytes = (const char*)(num); +#if __BYTE_ORDER == __LITTLE_ENDIAN + int k = 3; + for (; k >= 0; --k) { + if (bytes[k]) { + break; + } + } + offsets[line_num] = offsets[line_num - 1] + k + 1; + for (; k >= 0; --k) { + chars.push_back(bytes[k] ? bytes[k] : ' '); + } +#else + int k = 0; + for (; k < 4; ++k) { + if (bytes[k]) { + break; + } + } + offsets[line_num] = offsets[line_num - 1] + 4 - k; + for (; k < 4; ++k) { + chars.push_back(bytes[k] ? bytes[k] : ' '); + } +#endif + } +}; } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/char.md b/docs/en/docs/sql-manual/sql-functions/string-functions/char.md new file mode 100644 index 0000000000..71e574077c --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/char.md @@ -0,0 +1,69 @@ +--- +{ + "title": "char", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<version since="1.2"> + +## function char +### description +#### Syntax + +`VARCHAR char(INT,..., [USING charset_name])` + +Interprets each argument as an integer and returns a string consisting of the characters given by the code values of those integers. `NULL` values are skipped. + +If the result string is illegal for the given character set, the result from `CHAR()` becomes `NULL`. + +Arguments larger than `255` are converted into multiple result bytes. For example, `char(15049882)` is equivalent to `char(229, 164, 154)`. + +Currently only `utf8` is supported for `charset_name`. +</version> + +### example + +``` +mysql> select char(68, 111, 114, 105, 115); ++--------------------------------------+ +| char('utf8', 68, 111, 114, 105, 115) | ++--------------------------------------+ +| Doris | ++--------------------------------------+ + +mysql> select char(15049882, 15179199, 14989469); ++--------------------------------------------+ +| char('utf8', 15049882, 15179199, 14989469) | ++--------------------------------------------+ +| 多睿丝 | ++--------------------------------------------+ + +mysql> select char(255); ++-------------------+ +| char('utf8', 255) | ++-------------------+ +| NULL | ++-------------------+ +``` +### keywords + CHAR diff --git a/docs/sidebars.json b/docs/sidebars.json index ae99b00e33..5272f9494d 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -420,6 +420,7 @@ "sql-manual/sql-functions/string-functions/initcap", "sql-manual/sql-functions/string-functions/repeat", "sql-manual/sql-functions/string-functions/reverse", + "sql-manual/sql-functions/string-functions/char", "sql-manual/sql-functions/string-functions/concat", "sql-manual/sql-functions/string-functions/concat_ws", "sql-manual/sql-functions/string-functions/substr", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/char.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/char.md new file mode 100644 index 0000000000..e3d57f3fd3 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/char.md @@ -0,0 +1,69 @@ +--- +{ + "title": "char", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<version since="1.2"> + +## function char +### description +#### Syntax + +`VARCHAR char(INT,..., [USING charset_name])` + +将每个参数解释为整数,并返回一个字符串,该字符串由这些整数的代码值给出的字符组成。忽略`NULL`值。 + +如果结果字符串对于给定字符集是非法的,相应的转换结果为`NULL`值。 + +大于 `255` 的参数将转换为多个结果字节。例如,`char(15049882)`等价于`char(229, 164, 154)`。 + +`charset_name`目前只支持`utf8`。 +</version> + +### example + +``` +mysql> select char(68, 111, 114, 105, 115); ++--------------------------------------+ +| char('utf8', 68, 111, 114, 105, 115) | ++--------------------------------------+ +| Doris | ++--------------------------------------+ + +mysql> select char(15049882, 15179199, 14989469); ++--------------------------------------------+ +| char('utf8', 15049882, 15179199, 14989469) | ++--------------------------------------------+ +| 多睿丝 | ++--------------------------------------------+ + +mysql> select char(255); ++-------------------+ +| char('utf8', 255) | ++-------------------+ +| NULL | ++-------------------+ +``` +### keywords + CHAR diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 86c32c430c..90903ab5e6 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -674,7 +674,7 @@ nonterminal describe_command, opt_full, opt_inner, opt_outer, from_or_in, keys_o transaction_access_mode, isolation_types; // String -nonterminal String user, opt_user; +nonterminal String user, opt_user, opt_using_charset; nonterminal UserIdentity user_identity; nonterminal String quantity; @@ -6400,8 +6400,24 @@ non_pred_expr ::= exprs.add(new StringLiteral(character)); RESULT = new FunctionCallExpr("convert_to", new FunctionParams(exprs)); :} + | KW_CHAR LPAREN expr_list:exprs opt_using_charset:charset_name RPAREN + {: + exprs.add(0, new StringLiteral(charset_name)); + RESULT = new FunctionCallExpr("char", new FunctionParams(exprs)); + :} ; +opt_using_charset ::= + /* empty */ + {: + RESULT = "utf8"; + :} + | KW_USING ident:charset_name + {: + RESULT = charset_name; + :} + ; + expr_pipe_list ::= expr:e1 KW_PIPE expr:e2 {: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index dc68ce5cf5..2445b5e196 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -1444,6 +1444,19 @@ public class FunctionCallExpr extends Expr { .toSql()); } } + if (fnName.getFunction().equalsIgnoreCase("char")) { + if (!getChild(0).isConstant()) { + throw new AnalysisException( + fnName.getFunction() + " charset name must be a constant: " + this + .toSql()); + } + LiteralExpr literal = (LiteralExpr) getChild(0); + if (!literal.getStringValue().equalsIgnoreCase("utf8")) { + throw new AnalysisException( + fnName.getFunction() + " function currently only support charset name 'utf8': " + this + .toSql()); + } + } if (fn.getFunctionName().getFunction().equals("timediff")) { fn.getReturnType().getPrimitiveType().setTimeType(); } @@ -1500,6 +1513,10 @@ public class FunctionCallExpr extends Expr { ix = i % 2 == 0 ? 0 : 1; } + if (i == 0 && (fnName.getFunction().equalsIgnoreCase("char"))) { + continue; + } + if ((fnName.getFunction().equalsIgnoreCase("money_format") || fnName.getFunction() .equalsIgnoreCase("histogram") || fnName.getFunction().equalsIgnoreCase("hist")) diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java index 1fb441d8c7..77f56003cc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java @@ -173,7 +173,7 @@ public class CreateFunctionTest { Assert.assertTrue(dorisAssert.query(queryStr).explainQuery().contains("CAST(`k1` AS CHARACTER)")); // cast any type to char with fixed length - createFuncStr = "create alias function db1.char(all, int) with parameter(text, length) as " + createFuncStr = "create alias function db1.to_char(all, int) with parameter(text, length) as " + "cast(text as char(length));"; createFunctionStmt = (CreateFunctionStmt) UtFrameUtils.parseAndAnalyzeStmt(createFuncStr, ctx); Env.getCurrentEnv().createFunction(createFunctionStmt); @@ -181,7 +181,7 @@ public class CreateFunctionTest { functions = db.getFunctions(); Assert.assertEquals(4, functions.size()); - queryStr = "select db1.char(333, 4);"; + queryStr = "select db1.to_char(333, 4);"; ctx.getState().reset(); stmtExecutor = new StmtExecutor(ctx, queryStr); stmtExecutor.execute(); @@ -196,7 +196,7 @@ public class CreateFunctionTest { Assert.assertEquals(1, constExprLists.get(0).size()); Assert.assertTrue(constExprLists.get(0).get(0) instanceof StringLiteral); - queryStr = "select db1.char(k1, 4) from db1.tbl1;"; + queryStr = "select db1.to_char(k1, 4) from db1.tbl1;"; Assert.assertTrue(dorisAssert.query(queryStr).explainQuery().contains("CAST(`k1` AS CHARACTER)")); } @@ -270,7 +270,7 @@ public class CreateFunctionTest { Assert.assertTrue(dorisAssert.query(queryStr).explainQuery().contains("CAST(`k1` AS CHARACTER)")); // 6. cast any type to char with fixed length - createFuncStr = "create global alias function db2.char(all, int) with parameter(text, length) as " + createFuncStr = "create global alias function db2.to_char(all, int) with parameter(text, length) as " + "cast(text as char(length));"; createFunctionStmt = (CreateFunctionStmt) UtFrameUtils.parseAndAnalyzeStmt(createFuncStr, ctx); Env.getCurrentEnv().createFunction(createFunctionStmt); @@ -278,10 +278,10 @@ public class CreateFunctionTest { functions = Env.getCurrentEnv().getGlobalFunctionMgr().getFunctions(); Assert.assertEquals(4, functions.size()); - queryStr = "select char(333, 4);"; + queryStr = "select to_char(333, 4);"; testFunctionQuery(ctx, queryStr, true); - queryStr = "select char(k1, 4) from db2.tbl1;"; + queryStr = "select to_char(k1, 4) from db2.tbl1;"; Assert.assertTrue(dorisAssert.query(queryStr).explainQuery().contains("CAST(`k1` AS CHARACTER)")); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index fae68def16..b641356504 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1509,6 +1509,8 @@ visible_functions = [ [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'], 'ALWAYS_NULLABLE'], [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT', 'INT'], 'ALWAYS_NULLABLE'], + [['char'], 'VARCHAR', ['VARCHAR', 'INT', '...'], 'ALWAYS_NULLABLE'], + # width_bucket [['width_bucket'], 'BIGINT', ['TINYINT','TINYINT','TINYINT','TINYINT'], ''], [['width_bucket'], 'BIGINT', ['SMALLINT','SMALLINT','SMALLINT','SMALLINT'], ''], diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out index ff04cdd167..587319531e 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out @@ -396,3 +396,98 @@ tNEW-STRorigin str -- !sql -- d***is +-- !sql_func_char_const1 -- +D + +-- !sql_func_char_const2 -- +Doris + +-- !sql_func_char_const3 -- + Doris + +-- !sql_func_char_const4 -- +Doris + +-- !sql_func_char_const5 -- +6 + +-- !sql_func_char_const6 -- +Dor is + +-- !sql_func_char_const7 -- +多 + +-- !sql_func_char_const8 -- +3 + +-- !sql_func_char_const9 -- +多睿丝 + +-- !sql_func_char1 -- + +a +a +a +a +a +a +a + +-- !sql_func_char2 -- + bcd +acd +a cd +abd +ab d +abc +abc +abcd + +-- !sql_func_char3 -- +A bcd +Aacd +Aa cd +Aabd +Aab d +Aabc +Aabc +Aabcd + +-- !sql_func_char4 -- + 多bcd +a多cd +a多 cd +a多bd +a多b d +a多bc +a多bc +a多bcd + +-- !sql_func_char5 -- + bcd多 +acd多 +a cd多 +abd多 +ab d多 +abc多 +abc 多 +abcd多 + +-- !sql_func_char6 -- +\N +多 +多 +多 + +-- !sql_func_char7 -- +多 +多睿丝 +多睿丝 +多睿丝 + +-- !sql_func_char8 -- +多 A +多睿丝A +多睿丝A +多睿丝A + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index 73a9222aa1..ae33e448e7 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -208,4 +208,73 @@ suite("test_string_function") { qt_sql "select sub_replace(\"this is origin str\",\"NEW-STR\",1);" qt_sql "select sub_replace(\"doris\",\"***\",1,2);" + + // test function char + sql 'set enable_nereids_planner=false' + def success = false + try { + sql """ select char(68 using abc); """ + success = true + } catch (Exception e) { + assertTrue(e.getMessage().contains("only support charset name 'utf8'"), e.getMessage()) + } + assertFalse(success) + + // const + qt_sql_func_char_const1 """ select char(68); """ + qt_sql_func_char_const2 """ select char(68, 111, 114, 105, 115); """ + qt_sql_func_char_const3 """ select char(0, 68, 111, 114, 105, 115); """ + qt_sql_func_char_const4 """ select char(68, 111, 114, 105, 115, 0); """ + qt_sql_func_char_const5 """ select length(char(68, 111, 114, 105, 115, 0)); """ + qt_sql_func_char_const6 """ select char(68, 111, 114, 0, 105, null, 115 using utf8); """ + qt_sql_func_char_const7 """ select char(229, 164, 154); """ + qt_sql_func_char_const8 """ select length(char(229, 164, 154 using utf8)); """ + qt_sql_func_char_const9 """ select char(15049882, 15179199, 14989469); """ + + sql "drop table if exists test_function_char;"; + sql """ create table test_function_char ( + k1 tinyint not null, + k2 smallint, + k3 int, + k4 bigint + ) distributed by hash (k1) buckets 1 + properties ("replication_num"="1"); + """ + + sql """ insert into test_function_char values + (97, 98, 99, 100), + (97, null, 99, 100), + (97, 98, null, 100), + (97, 98, 99, null), + (0, 98, 99, 100), + (97, 0, 99, 100), + (97, 98, 0, 100), + (97, 98, 99, 0) + """ + qt_sql_func_char1 """ select char(k1) from test_function_char order by k1; """ + qt_sql_func_char2 """ select char(k1, k2, k3, k4) from test_function_char order by k1, k2, k3, k4; """ + qt_sql_func_char3 """ select char(65, k1, k2, k3, k4) from test_function_char order by k1, k2, k3, k4; """ + qt_sql_func_char4 """ select char(k1, 15049882, k2, k3, k4) from test_function_char order by k1, k2, k3, k4; """ + qt_sql_func_char5 """ select char(k1, k2, k3, k4, 15049882) from test_function_char order by k1, k2, k3, k4; """ + + sql "drop table if exists test_function_char;"; + sql """ create table test_function_char ( + k1 int not null, + k2 int, + k3 int, + k4 int + ) distributed by hash (k1) buckets 1 + properties ("replication_num"="1"); + """ + + sql """ insert into test_function_char values + (229, 164, 154, 0), + (15049882, null, 15179199, 14989469), + (15049882, 15179199, null, 14989469), + (15049882, 15179199, 14989469, null) + """ + qt_sql_func_char6 """ select char(k1) from test_function_char order by k1; """ + qt_sql_func_char7 """ select char(k1, k2, k3, k4) from test_function_char order by k1, k2, k3, k4; """ + qt_sql_func_char8 """ select char(k1, k2, k3, k4, 65) from test_function_char order by k1, k2, k3, k4; """ + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org