This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 235c105554 [feature-array](array-type) Add array function array_enumerate (#13612) 235c105554 is described below commit 235c105554d240a09cef3d21d1f3db3572c11198 Author: lihangyu <15605149...@163.com> AuthorDate: Tue Oct 25 15:12:11 2022 +0800 [feature-array](array-type) Add array function array_enumerate (#13612) Add array function array_enumerate --- be/src/vec/CMakeLists.txt | 1 + .../functions/array/function_array_enumerate.cpp | 98 ++++++++++++++++++++++ .../functions/array/function_array_register.cpp | 2 + .../array-functions/array_enumerate.md | 60 +++++++++++++ .../array-functions/array_enumerate.md | 59 +++++++++++++ gensrc/script/doris_builtins_functions.py | 17 ++++ .../array_functions/test_array_functions.out | 36 ++++++++ .../array_functions/test_array_functions.groovy | 4 + 8 files changed, 277 insertions(+) diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 845a1c0d75..d3ae2849de 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -152,6 +152,7 @@ set(VEC_FILES functions/array/function_array_intersect.cpp functions/array/function_array_slice.cpp functions/array/function_array_difference.cpp + functions/array/function_array_enumerate.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/functions/array/function_array_enumerate.cpp b/be/src/vec/functions/array/function_array_enumerate.cpp new file mode 100644 index 0000000000..8f63e46ab1 --- /dev/null +++ b/be/src/vec/functions/array/function_array_enumerate.cpp @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <vec/columns/column_array.h> +#include <vec/columns/column_nullable.h> +#include <vec/columns/columns_number.h> +#include <vec/data_types/data_type_array.h> +#include <vec/data_types/data_type_number.h> +#include <vec/functions/function.h> +#include <vec/functions/function_helpers.h> +#include <vec/functions/simple_function_factory.h> + +namespace doris::vectorized { + +class FunctionArrayEnumerate : public IFunction { +public: + static constexpr auto name = "array_enumerate"; + static FunctionPtr create() { return std::make_shared<FunctionArrayEnumerate>(); } + String get_name() const override { return name; } + bool use_default_implementation_for_nulls() const override { return false; } + size_t get_number_of_arguments() const override { return 1; } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + const DataTypeArray* array_type = + check_and_get_data_type<DataTypeArray>(remove_nullable(arguments[0]).get()); + if (!array_type) { + LOG(FATAL) << "First argument for function " + get_name() + + " must be an array but it has type " + arguments[0]->get_name() + + "."; + } + + auto nested_type = assert_cast<const DataTypeArray&>(*array_type).get_nested_type(); + bool is_nested_nullable = nested_type->is_nullable(); + bool is_nullable = arguments[0]->is_nullable(); + auto return_nested_type = std::make_shared<DataTypeInt64>(); + DataTypePtr return_type = std::make_shared<DataTypeArray>( + is_nested_nullable ? make_nullable(return_nested_type) : return_nested_type); + if (is_nullable) { + return_type = make_nullable(return_type); + } + return return_type; + } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto left_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const ColumnArray* array = + check_and_get_column<ColumnArray>(remove_nullable(left_column->get_ptr())); + if (!array) { + return Status::RuntimeError( + fmt::format("Illegal column {}, of first argument of function {}", + left_column->get_name(), get_name())); + } + const ColumnArray::Offsets64& offsets = array->get_offsets(); + auto res_nested = ColumnInt64::create(); + ColumnInt64::Container& res_values = res_nested->get_data(); + res_values.resize(array->get_data().size()); + ColumnArray::Offset64 prev_off = 0; + for (auto off : offsets) { + for (ColumnArray::Offset64 j = prev_off; j < off; ++j) res_values[j] = j - prev_off + 1; + prev_off = off; + } + + ColumnPtr nested_column = res_nested->get_ptr(); + if (array->get_data().is_nullable()) { + nested_column = ColumnNullable::create(nested_column, + ColumnUInt8::create(nested_column->size(), 0)); + } + ColumnPtr res_column = + ColumnArray::create(std::move(nested_column), array->get_offsets_ptr()); + if (block.get_by_position(arguments[0]).column->is_nullable()) { + const ColumnNullable* nullable = check_and_get_column<ColumnNullable>(left_column); + res_column = ColumnNullable::create( + res_column, nullable->get_null_map_column().clone_resized(nullable->size())); + } + block.replace_by_position(result, std::move(res_column)); + return Status::OK(); + } +}; + +void register_function_array_enumerate(SimpleFunctionFactory& factory) { + factory.register_function<FunctionArrayEnumerate>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index aff9e67d36..7d718ccfe2 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -35,6 +35,7 @@ void register_function_array_except(SimpleFunctionFactory&); void register_function_array_intersect(SimpleFunctionFactory&); void register_function_array_slice(SimpleFunctionFactory&); void register_function_array_difference(SimpleFunctionFactory&); +void register_function_array_enumerate(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_element(factory); @@ -50,6 +51,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_intersect(factory); register_function_array_slice(factory); register_function_array_difference(factory); + register_function_array_enumerate(factory); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_enumerate.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_enumerate.md new file mode 100644 index 0000000000..96f7617723 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_enumerate.md @@ -0,0 +1,60 @@ +--- +{ + "title": "ARRAY_ENUMERATE Function", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## ARRAY_ENUMERATE + +### Name + +ARRAY_ENUMERATE + +### description + +Returns array sub item indexes eg. [1, 2, 3, …, length (arr) ] + +### example + +```shell +mysql> create table array_type_table(k1 INT, k2 Array<STRING>) duplicate key (k1) + -> distributed by hash(k1) buckets 1 properties('replication_num' = '1'); +mysql> insert into array_type_table values (0, []), ("1", [NULL]), ("2", ["1", "2", "3"]), ("3", ["1", NULL, "3"]), ("4", NULL); +mysql> set enable_vectorized_engine = true; # enable vectorized engine +mysql> select k2, array_enumerate(k2) from array_type_table; ++------------------+-----------------------+ +| k2 | array_enumerate(`k2`) | ++------------------+-----------------------+ +| [] | [] | +| [NULL] | [1] | +| ['1', '2', '3'] | [1, 2, 3] | +| ['1', NULL, '3'] | [1, 2, 3] | +| NULL | NULL | ++------------------+-----------------------+ +5 rows in set (0.01 sec) +``` + +### keywords + +ARRAY,ENUMERATE,ARRAY_ENUMERATE + diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_enumerate.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_enumerate.md new file mode 100644 index 0000000000..d7bee373e9 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_enumerate.md @@ -0,0 +1,59 @@ +--- +{ + "title": "ARRAY_ENUMERATE 函数", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## ARRAY_ENUMERATE + +### Name + +ARRAY_ENUMERATE + +### description + +返回数组下标, 例如 [1, 2, 3, …, length (arr) ] + +### example + +```shell +mysql> create table array_type_table(k1 INT, k2 Array<STRING>) duplicate key (k1) + -> distributed by hash(k1) buckets 1 properties('replication_num' = '1'); +mysql> insert into array_type_table values (0, []), ("1", [NULL]), ("2", ["1", "2", "3"]), ("3", ["1", NULL, "3"]), ("4", NULL); +mysql> set enable_vectorized_engine = true; # enable vectorized engine +mysql> select k2, array_enumerate(k2) from array_type_table; ++------------------+-----------------------+ +| k2 | array_enumerate(`k2`) | ++------------------+-----------------------+ +| [] | [] | +| [NULL] | [1] | +| ['1', '2', '3'] | [1, 2, 3] | +| ['1', NULL, '3'] | [1, 2, 3] | +| NULL | NULL | ++------------------+-----------------------+ +5 rows in set (0.01 sec) +``` + +### keywords + +ARRAY,ENUMERATE,ARRAY_ENUMERATE diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 68f1a4e48f..4715372acf 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -166,6 +166,23 @@ visible_functions = [ [['array_contains'], 'BOOLEAN', ['ARRAY_VARCHAR', 'VARCHAR'], '', '', '', 'vec', ''], [['array_contains'], 'BOOLEAN', ['ARRAY_STRING', 'STRING'], '', '', '', 'vec', ''], + + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_BOOLEAN'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_TINYINT'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_SMALLINT'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_INT'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_BIGINT'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_LARGEINT'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATETIME'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATE'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATETIMEV2'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DATEV2'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_FLOAT'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DOUBLE'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_DECIMALV2'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_VARCHAR'], '', '', '', 'vec', ''], + [['array_enumerate'], 'ARRAY_BIGINT', ['ARRAY_STRING'], '', '', '', 'vec', ''], + [['countequal'], 'BIGINT', ['ARRAY_BOOLEAN', 'BOOLEAN'], '', '', '', 'vec', ''], [['countequal'], 'BIGINT', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''], [['countequal'], 'BIGINT', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out index 8bcc53a962..7c3fa001a1 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out @@ -143,3 +143,39 @@ 6 \N 7 \N +-- !select -- +1 [1, 2, 3] +2 [1] +3 [] +4 [1, 2, 3, 4, 5, 6, 7, 8, 9] +5 [] +6 [1, 2, 3, 4, 5, 6, 7, 8, 9] +7 [1, 2, 3, 4, 5] + +-- !select -- +1 [1] +2 [1] +3 [1] +4 \N +5 \N +6 \N +7 \N + +-- !select -- +1 [1] +2 \N +3 \N +4 \N +5 \N +6 \N +7 \N + +-- !select -- +1 [1] +2 \N +3 \N +4 \N +5 \N +6 \N +7 \N + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy index 6684f4db81..c4729e0b20 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy @@ -62,4 +62,8 @@ suite("test_array_functions") { qt_select "SELECT k1, array_contains(k5, 'hi') FROM ${tableName} ORDER BY k1" qt_select "SELECT k1, array_contains(k5, 'hi222') FROM ${tableName} ORDER BY k1" qt_select "SELECT k1, array_contains(k6, null) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_enumerate(k2) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_enumerate(k5) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_enumerate(k6) from ${tableName} ORDER BY k1" + qt_select "SELECT k1, array_enumerate(k7) from ${tableName} ORDER BY k1" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org