This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 7a08a799e9 [Vectorized](function) support order by convert_to function (#14555) 7a08a799e9 is described below commit 7a08a799e90af3e38f0dce03cd05f751bd5b46ce Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Tue Nov 29 15:22:27 2022 +0800 [Vectorized](function) support order by convert_to function (#14555) --- be/src/vec/functions/function_string.cpp | 1 + be/src/vec/functions/function_string.h | 92 ++++++++++++++++++++++ .../sql-functions/string-functions/convert_to.md | 73 +++++++++++++++++ docs/sidebars.json | 1 + .../sql-functions/string-functions/convert_to.md | 73 +++++++++++++++++ fe/fe-core/src/main/cup/sql_parser.cup | 7 ++ .../apache/doris/analysis/FunctionCallExpr.java | 9 ++- gensrc/script/doris_builtins_functions.py | 1 + 8 files changed, 255 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 7e762dfe8c..5d5d45409b 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -666,6 +666,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionLTrim>(); factory.register_function<FunctionRTrim>(); factory.register_function<FunctionTrim>(); + factory.register_function<FunctionConvertTo>(); factory.register_function<FunctionSubstring<Substr3Impl>>(); factory.register_function<FunctionSubstring<Substr2Impl>>(); factory.register_function<FunctionLeft>(); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index c3676656be..4d95b07a49 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -17,6 +17,13 @@ #pragma once +#include <iconv.h> +#include <stddef.h> + +#include <memory> + +#include "util/string_util.h" +#include "vec/columns/column.h" #ifndef USE_LIBCPP #include <memory_resource> #define PMR std::pmr @@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl { } }; +class FunctionConvertTo : public IFunction { +public: + static constexpr auto name = "convert_to"; + + static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 2; } + + DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override { + return std::make_shared<DataTypeString>(); + } + + bool use_default_implementation_for_constants() const override { return true; } + + Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope != FunctionContext::THREAD_LOCAL) { + return Status::OK(); + } + if (!context->is_col_constant(1)) { + return Status::InvalidArgument( + "character argument to convert function must be constant."); + } + const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0); + if (doris::iequal(character_data.to_string(), "gbk")) { + iconv_t cd = iconv_open("gb2312", "utf-8"); + if (cd == nullptr) { + return Status::RuntimeError("function {} is convert to gbk failed in iconv_open", + get_name()); + } + context->set_function_state(scope, cd); + } else { + return Status::RuntimeError( + "Illegal second argument column of function convert. now only support " + "convert to character set of gbk"); + } + + return Status::OK(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + ColumnPtr argument_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get()); + const auto& str_offset = str_col->get_offsets(); + const auto& str_chars = str_col->get_chars(); + auto col_res = ColumnString::create(); + auto& res_offset = col_res->get_offsets(); + auto& res_chars = col_res->get_chars(); + res_offset.resize(input_rows_count); + iconv_t cd = reinterpret_cast<iconv_t>( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + DCHECK(cd != nullptr); + + size_t in_len = 0, out_len = 0; + for (int i = 0; i < input_rows_count; ++i) { + in_len = str_offset[i] - str_offset[i - 1]; + const char* value_data = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]); + res_chars.resize(res_offset[i - 1] + in_len); + char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]); + char* in = const_cast<char*>(value_data); + out_len = in_len; + if (iconv(cd, &in, &in_len, &out, &out_len) == -1) { + return Status::RuntimeError("function {} is convert to gbk failed in iconv", + get_name()); + } else { + res_offset[i] = res_chars.size(); + } + } + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } + + Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + iconv_t cd = reinterpret_cast<iconv_t>( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + iconv_close(cd); + context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr); + } + return Status::OK(); + } +}; } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md b/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md new file mode 100644 index 0000000000..aa071f3bd5 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md @@ -0,0 +1,73 @@ +--- +{ + "title": "convert_to", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<version since="1.2"> + +## convert_to +### description +#### Syntax + +` convert_to(VARCHAR column, VARCHAR character)` + +It is used in the order by clause. eg: order by convert(column using gbk), Now only support character can be converted to 'gbk'. +Because when the order by column contains Chinese, it is not arranged in the order of Pinyin +After the character encoding of column is converted to gbk, it can be arranged according to pinyin + +</version> + +### example + +``` +mysql> select * from class_test order by class_name; ++----------+------------+-------------+ +| class_id | class_name | student_ids | ++----------+------------+-------------+ +| 6 | asd | [6] | +| 7 | qwe | [7] | +| 8 | z | [8] | +| 2 | 哈 | [2] | +| 3 | 哦 | [3] | +| 1 | 啊 | [1] | +| 4 | 张 | [4] | +| 5 | 我 | [5] | ++----------+------------+-------------+ + +mysql> select * from class_test order by convert(class_name using gbk); ++----------+------------+-------------+ +| class_id | class_name | student_ids | ++----------+------------+-------------+ +| 6 | asd | [6] | +| 7 | qwe | [7] | +| 8 | z | [8] | +| 1 | 啊 | [1] | +| 2 | 哈 | [2] | +| 3 | 哦 | [3] | +| 5 | 我 | [5] | +| 4 | 张 | [4] | ++----------+------------+-------------+ +``` +### keywords + convert_to diff --git a/docs/sidebars.json b/docs/sidebars.json index 2378985fbf..66eac63629 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -412,6 +412,7 @@ "sql-manual/sql-functions/string-functions/split_part", "sql-manual/sql-functions/string-functions/money_format", "sql-manual/sql-functions/string-functions/parse_url", + "sql-manual/sql-functions/string-functions/convert_to", "sql-manual/sql-functions/string-functions/extract_url_parameter", "sql-manual/sql-functions/string-functions/uuid", "sql-manual/sql-functions/string-functions/space", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md new file mode 100644 index 0000000000..7750023633 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md @@ -0,0 +1,73 @@ +--- +{ + "title": "convert_to", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<version since="1.2"> + +## convert_to +### description +#### Syntax + +` convert_to(VARCHAR column, VARCHAR character)` +在order by子句中使用,例如order by convert(column using gbk), 现在仅支持character转为'gbk'. +因为当order by column中包含中文时,其排列不是按照汉语拼音的顺序. +将column的字符编码转为gbk后,可实现按拼音的排列的效果. + +</version> + +### example + +``` +mysql> select * from class_test order by class_name; ++----------+------------+-------------+ +| class_id | class_name | student_ids | ++----------+------------+-------------+ +| 6 | asd | [6] | +| 7 | qwe | [7] | +| 8 | z | [8] | +| 2 | 哈 | [2] | +| 3 | 哦 | [3] | +| 1 | 啊 | [1] | +| 4 | 张 | [4] | +| 5 | 我 | [5] | ++----------+------------+-------------+ + +mysql> select * from class_test order by convert(class_name using gbk); ++----------+------------+-------------+ +| class_id | class_name | student_ids | ++----------+------------+-------------+ +| 6 | asd | [6] | +| 7 | qwe | [7] | +| 8 | z | [8] | +| 1 | 啊 | [1] | +| 2 | 哈 | [2] | +| 3 | 哦 | [3] | +| 5 | 我 | [5] | +| 4 | 张 | [4] | ++----------+------------+-------------+ + +``` +### keywords + convert_to diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 4026b18d26..3bc6034a63 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -5861,6 +5861,13 @@ non_pred_expr ::= {: RESULT = new CastExpr(targetType, e); :} | KW_KEY encryptkey_name:name {: RESULT = new EncryptKeyRef(name); :} + | KW_CONVERT LPAREN expr:e KW_USING ident:character RPAREN + {: + ArrayList<Expr> exprs = new ArrayList<>(); + exprs.add(e); + exprs.add(new StringLiteral(character)); + RESULT = new FunctionCallExpr("convert_to", new FunctionParams(exprs)); + :} ; expr_pipe_list ::= diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index ed0bea7362..29f76a471c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -606,7 +606,6 @@ public class FunctionCallExpr extends Expr { } return; } - if (fnName.getFunction().equalsIgnoreCase("group_concat")) { if (children.size() - orderByElements.size() > 2 || children.isEmpty()) { throw new AnalysisException( @@ -1229,7 +1228,13 @@ public class FunctionCallExpr extends Expr { } } } - + if (fnName.getFunction().equalsIgnoreCase("convert_to")) { + if (children.size() < 2 || !getChild(1).isConstant()) { + throw new AnalysisException( + fnName.getFunction() + " needs two params, and the second is must be a constant: " + this + .toSql()); + } + } if (fn.getFunctionName().getFunction().equals("timediff")) { fn.getReturnType().getPrimitiveType().setTimeType(); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index e369f79ffb..d34b0ba34b 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2483,6 +2483,7 @@ visible_functions = [ '', '', 'vec', 'ALWAYS_NULLABLE'], # Utility functions + [['convert_to'], 'VARCHAR', ['VARCHAR','VARCHAR'], '','', '', 'vec', ''], [['sleep'], 'BOOLEAN', ['INT'], '_ZN5doris16UtilityFunctions5sleepEPN9doris_udf15FunctionContextERKNS1_6IntValE', '', '', 'vec', ''], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org