This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d5d356b17f [vectorized](function) support order by field function (#14528) d5d356b17f is described below commit d5d356b17fc23efab680e0df238f8634d1db82b3 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Fri Nov 25 14:00:46 2022 +0800 [vectorized](function) support order by field function (#14528) * [vectorized](function) support order by field function * update * update test --- be/src/vec/core/accurate_comparison.h | 5 ++ be/src/vec/functions/least_greast.cpp | 89 +++++++++++++++++++++- .../sql-functions/string-functions/field.md | 67 ++++++++++++++++ docs/sidebars.json | 1 + .../sql-functions/string-functions/field.md | 65 ++++++++++++++++ .../apache/doris/analysis/FunctionCallExpr.java | 13 +++- gensrc/script/doris_builtins_functions.py | 12 +++ .../test_string_function_regexp.out | 36 +++++++-- .../test_string_function_regexp.groovy | 22 ++++++ 9 files changed, 300 insertions(+), 10 deletions(-) diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h index c4504b69b6..3f13772041 100644 --- a/be/src/vec/core/accurate_comparison.h +++ b/be/src/vec/core/accurate_comparison.h @@ -491,6 +491,11 @@ struct EqualsOp<DecimalV2Value, DecimalV2Value> { static UInt8 apply(const Int128& a, const Int128& b) { return a == b; } }; +template <> +struct EqualsOp<StringRef, StringRef> { + static UInt8 apply(const StringRef& a, const StringRef& b) { return a == b; } +}; + template <typename A, typename B> struct NotEqualsOp { using SymmetricOp = NotEqualsOp<B, A>; diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp index c725055bb5..49ec867622 100644 --- a/be/src/vec/functions/least_greast.cpp +++ b/be/src/vec/functions/least_greast.cpp @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "udf/udf.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" #include "vec/core/accurate_comparison.h" -#include "vec/data_types/get_least_supertype.h" +#include "vec/data_types/data_type.h" #include "vec/functions/function_helpers.h" #include "vec/functions/function_multi_same_args.h" #include "vec/functions/simple_function_factory.h" @@ -110,17 +111,99 @@ struct CompareMultiImpl { } }; +struct FunctionFieldImpl { + static constexpr auto name = "field"; + + static DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) { + return std::make_shared<DataTypeInt32>(); + } + + static ColumnPtr execute(Block& block, const ColumnNumbers& arguments, + size_t input_rows_count) { + const auto& data_type = block.get_by_position(arguments[0]).type; + auto result_column = ColumnInt32::create(input_rows_count, 0); + auto& res_data = static_cast<ColumnInt32*>(result_column)->get_data(); + + const auto& column_size = arguments.size(); + ColumnPtr argument_columns[column_size]; + for (int i = 0; i < column_size; ++i) { + argument_columns[i] = block.get_by_position(arguments[i]).column; + } + argument_columns[0] = argument_columns[0]->convert_to_full_column_if_const(); + WhichDataType which(data_type); + //TODO: maybe could use hashmap to save column data, not use for loop ervey time to test equals. + if (which.is_string_or_fixed_string()) { + const auto& column_string = reinterpret_cast<const ColumnString&>(*argument_columns[0]); + for (int row = 0; row < input_rows_count; ++row) { + const auto& str_data = column_string.get_data_at(row); + for (int col = 1; col < column_size; ++col) { + const auto& temp_data = + reinterpret_cast<const ColumnConst&>(*argument_columns[col]) + .get_data_at(0); + if (EqualsOp<StringRef, StringRef>::apply(temp_data, str_data)) { + res_data[row] = col; + break; + } + } + } + } else { +#define DISPATCH(TYPE, COLUMN_TYPE) \ + if (which.idx == TypeIndex::TYPE) { \ + for (int col = 1; col < arguments.size(); ++col) { \ + insert_result_data<COLUMN_TYPE>(res_data, argument_columns[0], argument_columns[col], \ + input_rows_count, col); \ + } \ + } + NUMERIC_TYPE_TO_COLUMN_TYPE(DISPATCH) + DISPATCH(Decimal128, ColumnDecimal<Decimal128>) + TIME_TYPE_TO_COLUMN_TYPE(DISPATCH) +#undef DISPATCH + } + + return result_column; + } + +private: + template <typename ColumnType> + static void insert_result_data(PaddedPODArray<Int32>& __restrict res_data, + ColumnPtr first_column, ColumnPtr argument_column, + const size_t input_rows_count, const int col) { + auto* __restrict first_raw_data = + reinterpret_cast<const ColumnType*>(first_column.get())->get_data().data(); + const auto& column_raw_data = + reinterpret_cast<const ColumnConst&>(*argument_column).get_data_column(); + const auto& arg_data = + reinterpret_cast<const ColumnType&>(column_raw_data).get_data().data()[0]; + if constexpr (std::is_same_v<ColumnType, ColumnDecimal128>) { + for (size_t i = 0; i < input_rows_count; ++i) { + res_data[i] |= (!res_data[i] * + (EqualsOp<DecimalV2Value, DecimalV2Value>::apply(first_raw_data[i], + arg_data)) * + col); + } + } else { + for (size_t i = 0; i < input_rows_count; ++i) { + using type = std::decay_t<decltype(first_raw_data[0])>; + res_data[i] |= (!res_data[i] * + (EqualsOp<type, type>::apply(first_raw_data[i], arg_data)) * col); + } + } + } +}; + struct LeastName { static constexpr auto name = "least"; }; struct GreastName { static constexpr auto name = "greatest"; }; + using FunctionLeast = FunctionMultiSameArgs<CompareMultiImpl<LessOp, LeastName>>; using FunctionGreaest = FunctionMultiSameArgs<CompareMultiImpl<GreaterOp, GreastName>>; - +using FunctionField = FunctionMultiSameArgs<FunctionFieldImpl>; void register_function_least_greast(SimpleFunctionFactory& factory) { factory.register_function<FunctionLeast>(); factory.register_function<FunctionGreaest>(); + factory.register_function<FunctionField>(); } }; // namespace doris::vectorized \ No newline at end of file diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/field.md b/docs/en/docs/sql-manual/sql-functions/string-functions/field.md new file mode 100644 index 0000000000..01a611c102 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/field.md @@ -0,0 +1,67 @@ +--- +{ + "title": "field", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## field +### description +#### Syntax + +`field(Expr e, param1, param2, param3,.....)` + + +In the order by clause, you can use custom sorting to arrange the data in expr in the specified param1, 2, and 3 order. +The data not in the param parameter will not participate in sorting, but will be placed first. +You can use asc and desc to control the overall order. +If there is a NULL value, you can use nulls first, nulls last to control the order of nulls. + + +### example + +``` +mysql> select k1,k7 from baseall where k1 in (1,2,3) order by field(k1,2,1,3); ++------+------------+ +| k1 | k7 | ++------+------------+ +| 2 | wangyu14 | +| 1 | wangjing04 | +| 3 | yuanyuan06 | ++------+------------+ +3 rows in set (0.02 sec) + +mysql> select class_name from class_test order by field(class_name,'Suzi','Ben','Henry'); ++------------+ +| class_name | ++------------+ +| Suzi | +| Suzi | +| Ben | +| Ben | +| Henry | +| Henry | ++------------+ + +``` +### keywords + field diff --git a/docs/sidebars.json b/docs/sidebars.json index 2ebd3eabab..b1f0884e75 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -392,6 +392,7 @@ "sql-manual/sql-functions/string-functions/elt", "sql-manual/sql-functions/string-functions/instr", "sql-manual/sql-functions/string-functions/locate", + "sql-manual/sql-functions/string-functions/field", "sql-manual/sql-functions/string-functions/find_in_set", "sql-manual/sql-functions/string-functions/replace", "sql-manual/sql-functions/string-functions/left", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/field.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/field.md new file mode 100644 index 0000000000..82bbdacf97 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/field.md @@ -0,0 +1,65 @@ +--- +{ + "title": "field", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## field +### description +#### Syntax + +`field(Expr e, param1, param2, param3,.....)` + +在order by子句中,可以使用自定义排序,可以将expr中的数据按照指定的param1,2,3顺序排列。 +不在param参数中的数据不会参与排序,将会放在最前面,可以使用asc,desc控制整体顺序。 +如果有NULL值,可以使用nulls first,nulls last控制null的顺序 + + +### example + +``` + +mysql> select k1,k7 from baseall where k1 in (1,2,3) order by field(k1,2,1,3); ++------+------------+ +| k1 | k7 | ++------+------------+ +| 2 | wangyu14 | +| 1 | wangjing04 | +| 3 | yuanyuan06 | ++------+------------+ +3 rows in set (0.02 sec) + +mysql> select class_name from class_test order by field(class_name,'Suzi','Ben','Henry'); ++------------+ +| class_name | ++------------+ +| Suzi | +| Suzi | +| Ben | +| Ben | +| Henry | +| Henry | ++------------+ +``` +### keywords + field diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index bf1869b277..0f1fafa10d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -599,7 +599,18 @@ public class FunctionCallExpr extends Expr { return; } - + if (fnName.getFunction().equalsIgnoreCase("field")) { + if (children.size() < 2) { + throw new AnalysisException(fnName.getFunction() + " function parameter size is less than 2."); + } else { + for (int i = 1; i < children.size(); ++i) { + if (!getChild(i).isConstant()) { + throw new AnalysisException(fnName.getFunction() + + " function except for the first argument, other parameter must be a constant."); + } + } + } + } if (fnName.getFunction().equalsIgnoreCase("lag") || fnName.getFunction().equalsIgnoreCase("lead")) { if (!isAnalyticFnCall) { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 5a99b2d453..4109125494 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2405,6 +2405,18 @@ visible_functions = [ [['version'], 'VARCHAR', [], '_ZN5doris16UtilityFunctions7versionEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['field'], 'INT', ['TINYINT', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['SMALLINT', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['INT', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['BIGINT', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['LARGEINT', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['FLOAT', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['DOUBLE', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['DECIMALV2', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['DATEV2', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['DATETIMEV2', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['VARCHAR', '...'], '','', '', 'vec', ''], + [['field'], 'INT', ['STRING', '...'], '','', '', 'vec', ''], # Jsonb functions [['jsonb_parse'], 'JSONB', ['VARCHAR'], diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out index f5dcf7d0f4..e402a44cda 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out @@ -49,7 +49,7 @@ false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0. -- !sql -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 -false 2 1986 1001 11011903 1243.5 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 +false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 -- !sql -- @@ -62,16 +62,16 @@ false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0. 1 -- !sql -- -false 2 1986 1001 11011903 1243.5 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 +false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0 -- !sql -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 -false 2 1986 1001 11011903 1243.5 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 +false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0 -- !sql -- -false 2 1986 1001 11011903 1243.5 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 +false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0 -- !sql -- @@ -79,14 +79,38 @@ false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk -- !sql -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 -false 2 1986 1001 11011903 1243.5 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 +false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0 -- !sql -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 -false 2 1986 1001 11011903 1243.5 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 +false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0 -- !sql -- 2 +-- !sql_field1 -- +Suzi +Suzi +Ben +Ben +Henry +Henry + +-- !sql_field2 -- +Suzi +Suzi +Ben +Ben +Henry +Henry + +-- !sql_field3 -- +Henry +Henry +Ben +Suzi +Suzi +Ben + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy index 1ccdf7196f..29a8c95b9e 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy @@ -75,5 +75,27 @@ suite("test_string_function_regexp") { qt_sql"select * from ${tableName} where lower(k7) not regexp'^[a-z]+[0-9]+[a-z]+\$' order by k1, k2, k3, k4" qt_sql"select * from ${tableName} where lower(k7) not regexp'^[a-o]+[0-9]+[a-z]?\$' order by k1, k2, k3, k4" qt_sql"select count(*) from ${tableName} where k1<10 and lower(k6) not regexp '^t'" + + def tbName2 = "test_string_function_field" + sql "DROP TABLE IF EXISTS ${tbName2}" + sql """ + CREATE TABLE IF NOT EXISTS ${tbName2} ( + id int, + name varchar(32) + ) + DISTRIBUTED BY HASH(name) BUCKETS 5 properties("replication_num" = "1"); + """ + sql """ + INSERT INTO ${tbName2} VALUES + (2,"Suzi"), + (9,"Ben"), + (7,"Suzi"), + (8,"Henry"), + (1,"Ben"), + (4,"Henry") + """ + qt_sql_field1 "select name from ${tbName2} order by field(name,'Suzi','Ben','Henry');" + qt_sql_field2 "select name from ${tbName2} order by field(name,'Ben','Henry');" + qt_sql_field3 "select name from ${tbName2} order by field(name,'Henry') desc,id;" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org