This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a08a799e9 [Vectorized](function) support order by convert_to function 
(#14555)
7a08a799e9 is described below

commit 7a08a799e90af3e38f0dce03cd05f751bd5b46ce
Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com>
AuthorDate: Tue Nov 29 15:22:27 2022 +0800

    [Vectorized](function) support order by convert_to function (#14555)
---
 be/src/vec/functions/function_string.cpp           |  1 +
 be/src/vec/functions/function_string.h             | 92 ++++++++++++++++++++++
 .../sql-functions/string-functions/convert_to.md   | 73 +++++++++++++++++
 docs/sidebars.json                                 |  1 +
 .../sql-functions/string-functions/convert_to.md   | 73 +++++++++++++++++
 fe/fe-core/src/main/cup/sql_parser.cup             |  7 ++
 .../apache/doris/analysis/FunctionCallExpr.java    |  9 ++-
 gensrc/script/doris_builtins_functions.py          |  1 +
 8 files changed, 255 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index 7e762dfe8c..5d5d45409b 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -666,6 +666,7 @@ void register_function_string(SimpleFunctionFactory& 
factory) {
     factory.register_function<FunctionLTrim>();
     factory.register_function<FunctionRTrim>();
     factory.register_function<FunctionTrim>();
+    factory.register_function<FunctionConvertTo>();
     factory.register_function<FunctionSubstring<Substr3Impl>>();
     factory.register_function<FunctionSubstring<Substr2Impl>>();
     factory.register_function<FunctionLeft>();
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index c3676656be..4d95b07a49 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -17,6 +17,13 @@
 
 #pragma once
 
+#include <iconv.h>
+#include <stddef.h>
+
+#include <memory>
+
+#include "util/string_util.h"
+#include "vec/columns/column.h"
 #ifndef USE_LIBCPP
 #include <memory_resource>
 #define PMR std::pmr
@@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl {
     }
 };
 
+class FunctionConvertTo : public IFunction {
+public:
+    static constexpr auto name = "convert_to";
+
+    static FunctionPtr create() { return 
std::make_shared<FunctionConvertTo>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    bool use_default_implementation_for_constants() const override { return 
true; }
+
+    Status prepare(FunctionContext* context, 
FunctionContext::FunctionStateScope scope) override {
+        if (scope != FunctionContext::THREAD_LOCAL) {
+            return Status::OK();
+        }
+        if (!context->is_col_constant(1)) {
+            return Status::InvalidArgument(
+                    "character argument to convert function must be 
constant.");
+        }
+        const auto& character_data = 
context->get_constant_col(1)->column_ptr->get_data_at(0);
+        if (doris::iequal(character_data.to_string(), "gbk")) {
+            iconv_t cd = iconv_open("gb2312", "utf-8");
+            if (cd == nullptr) {
+                return Status::RuntimeError("function {} is convert to gbk 
failed in iconv_open",
+                                            get_name());
+            }
+            context->set_function_state(scope, cd);
+        } else {
+            return Status::RuntimeError(
+                    "Illegal second argument column of function convert. now 
only support "
+                    "convert to character set of gbk");
+        }
+
+        return Status::OK();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        ColumnPtr argument_column =
+                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        const ColumnString* str_col = static_cast<const 
ColumnString*>(argument_column.get());
+        const auto& str_offset = str_col->get_offsets();
+        const auto& str_chars = str_col->get_chars();
+        auto col_res = ColumnString::create();
+        auto& res_offset = col_res->get_offsets();
+        auto& res_chars = col_res->get_chars();
+        res_offset.resize(input_rows_count);
+        iconv_t cd = reinterpret_cast<iconv_t>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        DCHECK(cd != nullptr);
+
+        size_t in_len = 0, out_len = 0;
+        for (int i = 0; i < input_rows_count; ++i) {
+            in_len = str_offset[i] - str_offset[i - 1];
+            const char* value_data = reinterpret_cast<const 
char*>(&str_chars[str_offset[i - 1]]);
+            res_chars.resize(res_offset[i - 1] + in_len);
+            char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
+            char* in = const_cast<char*>(value_data);
+            out_len = in_len;
+            if (iconv(cd, &in, &in_len, &out, &out_len) == -1) {
+                return Status::RuntimeError("function {} is convert to gbk 
failed in iconv",
+                                            get_name());
+            } else {
+                res_offset[i] = res_chars.size();
+            }
+        }
+        block.replace_by_position(result, std::move(col_res));
+        return Status::OK();
+    }
+
+    Status close(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            iconv_t cd = reinterpret_cast<iconv_t>(
+                    
context->get_function_state(FunctionContext::THREAD_LOCAL));
+            iconv_close(cd);
+            context->set_function_state(FunctionContext::THREAD_LOCAL, 
nullptr);
+        }
+        return Status::OK();
+    }
+};
 } // namespace doris::vectorized
diff --git 
a/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md 
b/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md
new file mode 100644
index 0000000000..aa071f3bd5
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/convert_to.md
@@ -0,0 +1,73 @@
+---
+{
+    "title": "convert_to",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<version since="1.2">
+
+## convert_to
+### description
+#### Syntax
+
+` convert_to(VARCHAR column, VARCHAR character)`
+
+It is used in the order by clause. eg: order by convert(column using gbk), Now 
only support character can be converted to 'gbk'.
+Because when the order by column contains Chinese, it is not arranged in the 
order of Pinyin
+After the character encoding of column is converted to gbk, it can be arranged 
according to pinyin
+
+</version>
+
+### example
+
+```
+mysql> select * from class_test order by class_name;
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+|        6 | asd        | [6]         |
+|        7 | qwe        | [7]         |
+|        8 | z          | [8]         |
+|        2 | 哈         | [2]         |
+|        3 | 哦         | [3]         |
+|        1 | 啊         | [1]         |
+|        4 | 张         | [4]         |
+|        5 | 我         | [5]         |
++----------+------------+-------------+
+
+mysql> select * from class_test order by convert(class_name using gbk);
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+|        6 | asd        | [6]         |
+|        7 | qwe        | [7]         |
+|        8 | z          | [8]         |
+|        1 | 啊         | [1]         |
+|        2 | 哈         | [2]         |
+|        3 | 哦         | [3]         |
+|        5 | 我         | [5]         |
+|        4 | 张         | [4]         |
++----------+------------+-------------+
+```
+### keywords
+    convert_to
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 2378985fbf..66eac63629 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -412,6 +412,7 @@
                                 
"sql-manual/sql-functions/string-functions/split_part",
                                 
"sql-manual/sql-functions/string-functions/money_format",
                                 
"sql-manual/sql-functions/string-functions/parse_url",
+                                
"sql-manual/sql-functions/string-functions/convert_to",
                                 
"sql-manual/sql-functions/string-functions/extract_url_parameter",
                                 
"sql-manual/sql-functions/string-functions/uuid",
                                 
"sql-manual/sql-functions/string-functions/space",
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md
new file mode 100644
index 0000000000..7750023633
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/convert_to.md
@@ -0,0 +1,73 @@
+---
+{
+    "title": "convert_to",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<version since="1.2">
+
+## convert_to
+### description
+#### Syntax
+
+` convert_to(VARCHAR column, VARCHAR character)`
+在order by子句中使用,例如order by convert(column using gbk), 现在仅支持character转为'gbk'.
+因为当order by column中包含中文时,其排列不是按照汉语拼音的顺序.
+将column的字符编码转为gbk后,可实现按拼音的排列的效果.
+
+</version>
+
+### example
+
+```
+mysql> select * from class_test order by class_name;
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+|        6 | asd        | [6]         |
+|        7 | qwe        | [7]         |
+|        8 | z          | [8]         |
+|        2 | 哈         | [2]         |
+|        3 | 哦         | [3]         |
+|        1 | 啊         | [1]         |
+|        4 | 张         | [4]         |
+|        5 | 我         | [5]         |
++----------+------------+-------------+
+
+mysql> select * from class_test order by convert(class_name using gbk);
++----------+------------+-------------+
+| class_id | class_name | student_ids |
++----------+------------+-------------+
+|        6 | asd        | [6]         |
+|        7 | qwe        | [7]         |
+|        8 | z          | [8]         |
+|        1 | 啊         | [1]         |
+|        2 | 哈         | [2]         |
+|        3 | 哦         | [3]         |
+|        5 | 我         | [5]         |
+|        4 | 张         | [4]         |
++----------+------------+-------------+
+
+```
+### keywords
+    convert_to
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index 4026b18d26..3bc6034a63 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -5861,6 +5861,13 @@ non_pred_expr ::=
   {: RESULT = new CastExpr(targetType, e); :}
   | KW_KEY encryptkey_name:name
   {: RESULT = new EncryptKeyRef(name); :}
+  | KW_CONVERT LPAREN expr:e KW_USING ident:character RPAREN
+  {: 
+    ArrayList<Expr> exprs = new ArrayList<>();
+    exprs.add(e);
+    exprs.add(new StringLiteral(character));
+    RESULT = new FunctionCallExpr("convert_to", new FunctionParams(exprs)); 
+  :}
   ;
 
 expr_pipe_list ::=
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index ed0bea7362..29f76a471c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -606,7 +606,6 @@ public class FunctionCallExpr extends Expr {
             }
             return;
         }
-
         if (fnName.getFunction().equalsIgnoreCase("group_concat")) {
             if (children.size() - orderByElements.size() > 2 || 
children.isEmpty()) {
                 throw new AnalysisException(
@@ -1229,7 +1228,13 @@ public class FunctionCallExpr extends Expr {
                 }
             }
         }
-
+        if (fnName.getFunction().equalsIgnoreCase("convert_to")) {
+            if (children.size() < 2 || !getChild(1).isConstant()) {
+                throw new AnalysisException(
+                        fnName.getFunction() + " needs two params, and the 
second is must be a constant: " + this
+                                .toSql());
+            }
+        }
         if (fn.getFunctionName().getFunction().equals("timediff")) {
             fn.getReturnType().getPrimitiveType().setTimeType();
         }
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index e369f79ffb..d34b0ba34b 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2483,6 +2483,7 @@ visible_functions = [
         '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     # Utility functions
+    [['convert_to'], 'VARCHAR', ['VARCHAR','VARCHAR'], '','', '', 'vec', ''],
     [['sleep'], 'BOOLEAN', ['INT'],
         
'_ZN5doris16UtilityFunctions5sleepEPN9doris_udf15FunctionContextERKNS1_6IntValE',
         '', '', 'vec', ''],


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to