This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d5d356b17f [vectorized](function) support order by field function 
(#14528)
d5d356b17f is described below

commit d5d356b17fc23efab680e0df238f8634d1db82b3
Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com>
AuthorDate: Fri Nov 25 14:00:46 2022 +0800

    [vectorized](function) support order by field function (#14528)
    
    * [vectorized](function) support order by field function
    
    * update
    
    * update test
---
 be/src/vec/core/accurate_comparison.h              |  5 ++
 be/src/vec/functions/least_greast.cpp              | 89 +++++++++++++++++++++-
 .../sql-functions/string-functions/field.md        | 67 ++++++++++++++++
 docs/sidebars.json                                 |  1 +
 .../sql-functions/string-functions/field.md        | 65 ++++++++++++++++
 .../apache/doris/analysis/FunctionCallExpr.java    | 13 +++-
 gensrc/script/doris_builtins_functions.py          | 12 +++
 .../test_string_function_regexp.out                | 36 +++++++--
 .../test_string_function_regexp.groovy             | 22 ++++++
 9 files changed, 300 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/core/accurate_comparison.h 
b/be/src/vec/core/accurate_comparison.h
index c4504b69b6..3f13772041 100644
--- a/be/src/vec/core/accurate_comparison.h
+++ b/be/src/vec/core/accurate_comparison.h
@@ -491,6 +491,11 @@ struct EqualsOp<DecimalV2Value, DecimalV2Value> {
     static UInt8 apply(const Int128& a, const Int128& b) { return a == b; }
 };
 
+template <>
+struct EqualsOp<StringRef, StringRef> {
+    static UInt8 apply(const StringRef& a, const StringRef& b) { return a == 
b; }
+};
+
 template <typename A, typename B>
 struct NotEqualsOp {
     using SymmetricOp = NotEqualsOp<B, A>;
diff --git a/be/src/vec/functions/least_greast.cpp 
b/be/src/vec/functions/least_greast.cpp
index c725055bb5..49ec867622 100644
--- a/be/src/vec/functions/least_greast.cpp
+++ b/be/src/vec/functions/least_greast.cpp
@@ -15,9 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "udf/udf.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_number.h"
 #include "vec/core/accurate_comparison.h"
-#include "vec/data_types/get_least_supertype.h"
+#include "vec/data_types/data_type.h"
 #include "vec/functions/function_helpers.h"
 #include "vec/functions/function_multi_same_args.h"
 #include "vec/functions/simple_function_factory.h"
@@ -110,17 +111,99 @@ struct CompareMultiImpl {
     }
 };
 
+struct FunctionFieldImpl {
+    static constexpr auto name = "field";
+
+    static DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) {
+        return std::make_shared<DataTypeInt32>();
+    }
+
+    static ColumnPtr execute(Block& block, const ColumnNumbers& arguments,
+                             size_t input_rows_count) {
+        const auto& data_type = block.get_by_position(arguments[0]).type;
+        auto result_column = ColumnInt32::create(input_rows_count, 0);
+        auto& res_data = static_cast<ColumnInt32*>(result_column)->get_data();
+
+        const auto& column_size = arguments.size();
+        ColumnPtr argument_columns[column_size];
+        for (int i = 0; i < column_size; ++i) {
+            argument_columns[i] = block.get_by_position(arguments[i]).column;
+        }
+        argument_columns[0] = 
argument_columns[0]->convert_to_full_column_if_const();
+        WhichDataType which(data_type);
+        //TODO: maybe could use hashmap to save column data, not use for loop 
ervey time to test equals.
+        if (which.is_string_or_fixed_string()) {
+            const auto& column_string = reinterpret_cast<const 
ColumnString&>(*argument_columns[0]);
+            for (int row = 0; row < input_rows_count; ++row) {
+                const auto& str_data = column_string.get_data_at(row);
+                for (int col = 1; col < column_size; ++col) {
+                    const auto& temp_data =
+                            reinterpret_cast<const 
ColumnConst&>(*argument_columns[col])
+                                    .get_data_at(0);
+                    if (EqualsOp<StringRef, StringRef>::apply(temp_data, 
str_data)) {
+                        res_data[row] = col;
+                        break;
+                    }
+                }
+            }
+        } else {
+#define DISPATCH(TYPE, COLUMN_TYPE)                                            
                   \
+    if (which.idx == TypeIndex::TYPE) {                                        
                   \
+        for (int col = 1; col < arguments.size(); ++col) {                     
                   \
+            insert_result_data<COLUMN_TYPE>(res_data, argument_columns[0], 
argument_columns[col], \
+                                            input_rows_count, col);            
                   \
+        }                                                                      
                   \
+    }
+            NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH)
+            DISPATCH(Decimal128, ColumnDecimal<Decimal128>)
+            TIME_TYPE_TO_COLUMN_TYPE(DISPATCH)
+#undef DISPATCH
+        }
+
+        return result_column;
+    }
+
+private:
+    template <typename ColumnType>
+    static void insert_result_data(PaddedPODArray<Int32>& __restrict res_data,
+                                   ColumnPtr first_column, ColumnPtr 
argument_column,
+                                   const size_t input_rows_count, const int 
col) {
+        auto* __restrict first_raw_data =
+                reinterpret_cast<const 
ColumnType*>(first_column.get())->get_data().data();
+        const auto& column_raw_data =
+                reinterpret_cast<const 
ColumnConst&>(*argument_column).get_data_column();
+        const auto& arg_data =
+                reinterpret_cast<const 
ColumnType&>(column_raw_data).get_data().data()[0];
+        if constexpr (std::is_same_v<ColumnType, ColumnDecimal128>) {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                res_data[i] |= (!res_data[i] *
+                                (EqualsOp<DecimalV2Value, 
DecimalV2Value>::apply(first_raw_data[i],
+                                                                               
  arg_data)) *
+                                col);
+            }
+        } else {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                using type = std::decay_t<decltype(first_raw_data[0])>;
+                res_data[i] |= (!res_data[i] *
+                                (EqualsOp<type, 
type>::apply(first_raw_data[i], arg_data)) * col);
+            }
+        }
+    }
+};
+
 struct LeastName {
     static constexpr auto name = "least";
 };
 struct GreastName {
     static constexpr auto name = "greatest";
 };
+
 using FunctionLeast = FunctionMultiSameArgs<CompareMultiImpl<LessOp, 
LeastName>>;
 using FunctionGreaest = FunctionMultiSameArgs<CompareMultiImpl<GreaterOp, 
GreastName>>;
-
+using FunctionField = FunctionMultiSameArgs<FunctionFieldImpl>;
 void register_function_least_greast(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionLeast>();
     factory.register_function<FunctionGreaest>();
+    factory.register_function<FunctionField>();
 }
 }; // namespace doris::vectorized
\ No newline at end of file
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/field.md 
b/docs/en/docs/sql-manual/sql-functions/string-functions/field.md
new file mode 100644
index 0000000000..01a611c102
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/field.md
@@ -0,0 +1,67 @@
+---
+{
+    "title": "field",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## field
+### description
+#### Syntax
+
+`field(Expr e, param1, param2, param3,.....)`
+
+
+In the order by clause, you can use custom sorting to arrange the data in expr 
in the specified param1, 2, and 3 order.
+The data not in the param parameter will not participate in sorting, but will 
be placed first. 
+You can use asc and desc to control the overall order.
+If there is a NULL value, you can use nulls first, nulls last to control the 
order of nulls.
+
+
+### example
+
+```
+mysql> select k1,k7 from baseall where k1 in (1,2,3) order by field(k1,2,1,3);
++------+------------+
+| k1   | k7         |
++------+------------+
+|    2 | wangyu14   |
+|    1 | wangjing04 |
+|    3 | yuanyuan06 |
++------+------------+
+3 rows in set (0.02 sec)
+
+mysql> select class_name from class_test order by 
field(class_name,'Suzi','Ben','Henry');
++------------+
+| class_name |
++------------+
+| Suzi       |
+| Suzi       |
+| Ben        |
+| Ben        |
+| Henry      |
+| Henry      |
++------------+
+
+```
+### keywords
+    field
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 2ebd3eabab..b1f0884e75 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -392,6 +392,7 @@
                                 
"sql-manual/sql-functions/string-functions/elt",
                                 
"sql-manual/sql-functions/string-functions/instr",
                                 
"sql-manual/sql-functions/string-functions/locate",
+                                
"sql-manual/sql-functions/string-functions/field",
                                 
"sql-manual/sql-functions/string-functions/find_in_set",
                                 
"sql-manual/sql-functions/string-functions/replace",
                                 
"sql-manual/sql-functions/string-functions/left",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/field.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/field.md
new file mode 100644
index 0000000000..82bbdacf97
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/field.md
@@ -0,0 +1,65 @@
+---
+{
+    "title": "field",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## field
+### description
+#### Syntax
+
+`field(Expr e, param1, param2, param3,.....)`
+
+在order by子句中,可以使用自定义排序,可以将expr中的数据按照指定的param1,2,3顺序排列。
+不在param参数中的数据不会参与排序,将会放在最前面,可以使用asc,desc控制整体顺序。
+如果有NULL值,可以使用nulls first,nulls last控制null的顺序
+
+
+### example
+
+```
+
+mysql> select k1,k7 from baseall where k1 in (1,2,3) order by field(k1,2,1,3);
++------+------------+
+| k1   | k7         |
++------+------------+
+|    2 | wangyu14   |
+|    1 | wangjing04 |
+|    3 | yuanyuan06 |
++------+------------+
+3 rows in set (0.02 sec)
+
+mysql> select class_name from class_test order by 
field(class_name,'Suzi','Ben','Henry');
++------------+
+| class_name |
++------------+
+| Suzi       |
+| Suzi       |
+| Ben        |
+| Ben        |
+| Henry      |
+| Henry      |
++------------+
+```
+### keywords
+    field
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index bf1869b277..0f1fafa10d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -599,7 +599,18 @@ public class FunctionCallExpr extends Expr {
 
             return;
         }
-
+        if (fnName.getFunction().equalsIgnoreCase("field")) {
+            if (children.size() < 2) {
+                throw new AnalysisException(fnName.getFunction() + " function 
parameter size is less than 2.");
+            } else {
+                for (int i = 1; i < children.size(); ++i) {
+                    if (!getChild(i).isConstant()) {
+                        throw new AnalysisException(fnName.getFunction()
+                                + " function except for the first argument, 
other parameter must be a constant.");
+                    }
+                }
+            }
+        }
         if (fnName.getFunction().equalsIgnoreCase("lag")
                 || fnName.getFunction().equalsIgnoreCase("lead")) {
             if (!isAnalyticFnCall) {
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 5a99b2d453..4109125494 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2405,6 +2405,18 @@ visible_functions = [
     [['version'], 'VARCHAR', [],
         '_ZN5doris16UtilityFunctions7versionEPN9doris_udf15FunctionContextE',
         '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+    [['field'], 'INT', ['TINYINT', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['SMALLINT', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['INT', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['BIGINT', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['LARGEINT', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['FLOAT', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['DOUBLE', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['DECIMALV2', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['DATEV2', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['DATETIMEV2', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['VARCHAR', '...'], '','', '', 'vec', ''],
+    [['field'], 'INT', ['STRING', '...'], '','', '', 'vec', ''],
 
     # Jsonb functions
     [['jsonb_parse'], 'JSONB', ['VARCHAR'],
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
index f5dcf7d0f4..e402a44cda 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
@@ -49,7 +49,7 @@ false 1       1989    1001    11011902        123.123 true    
1989-03-21      1989-03-21T13:00        wangjuoo4       0.
 
 -- !sql --
 false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
-false  2       1986    1001    11011903        1243.5  false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 
 -- !sql --
 
@@ -62,16 +62,16 @@ false       1       1989    1001    11011902        123.123 
true    1989-03-21      1989-03-21T13:00        wangjuoo4       0.
 1
 
 -- !sql --
-false  2       1986    1001    11011903        1243.5  false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0
 
 -- !sql --
 false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
-false  2       1986    1001    11011903        1243.5  false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0
 
 -- !sql --
-false  2       1986    1001    11011903        1243.5  false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0
 
 -- !sql --
@@ -79,14 +79,38 @@ false       3       1989    1002    11011905        
24453.325       false   2012-03-14      2000-01-01T00:00        yunlj8@nk
 
 -- !sql --
 false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
-false  2       1986    1001    11011903        1243.5  false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0
 
 -- !sql --
 false  1       1989    1001    11011902        123.123 true    1989-03-21      
1989-03-21T13:00        wangjuoo4       0.1     6.333   string12345     
170141183460469231731687303715884105727
-false  2       1986    1001    11011903        1243.5  false   1901-12-31      
1989-03-21T13:00        wangynnsf       20.268  789.25  string12345     
-170141183460469231731687303715884105727
+false  2       1986    1001    11011903        1243.500        false   
1901-12-31      1989-03-21T13:00        wangynnsf       20.268  789.25  
string12345     -170141183460469231731687303715884105727
 false  3       1989    1002    11011905        24453.325       false   
2012-03-14      2000-01-01T00:00        yunlj8@nk       78945.0 3654.0  
string12345     0
 
 -- !sql --
 2
 
+-- !sql_field1 --
+Suzi
+Suzi
+Ben
+Ben
+Henry
+Henry
+
+-- !sql_field2 --
+Suzi
+Suzi
+Ben
+Ben
+Henry
+Henry
+
+-- !sql_field3 --
+Henry
+Henry
+Ben
+Suzi
+Suzi
+Ben
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
index 1ccdf7196f..29a8c95b9e 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
@@ -75,5 +75,27 @@ suite("test_string_function_regexp") {
     qt_sql"select * from ${tableName} where lower(k7) not 
regexp'^[a-z]+[0-9]+[a-z]+\$' order by k1, k2, k3, k4"
     qt_sql"select * from ${tableName} where lower(k7) not 
regexp'^[a-o]+[0-9]+[a-z]?\$' order by k1, k2, k3, k4"
     qt_sql"select count(*) from ${tableName} where k1<10 and lower(k6) not 
regexp '^t'"
+
+    def tbName2 = "test_string_function_field"
+    sql "DROP TABLE IF EXISTS ${tbName2}"
+    sql """
+            CREATE TABLE IF NOT EXISTS ${tbName2} (
+                id int,
+                name varchar(32)
+            )
+            DISTRIBUTED BY HASH(name) BUCKETS 5 properties("replication_num" = 
"1");
+        """
+    sql """
+        INSERT INTO ${tbName2} VALUES 
+            (2,"Suzi"),
+            (9,"Ben"),
+            (7,"Suzi"),
+            (8,"Henry"),
+            (1,"Ben"),
+            (4,"Henry")
+        """
+    qt_sql_field1 "select name from ${tbName2} order by 
field(name,'Suzi','Ben','Henry');"
+    qt_sql_field2 "select name from ${tbName2} order by 
field(name,'Ben','Henry');"
+    qt_sql_field3 "select name from ${tbName2} order by field(name,'Henry') 
desc,id;"
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to