This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5dd052d386 [Function](array) support array_range function (#13547) 5dd052d386 is described below commit 5dd052d386d40e35a44c25d5dce1b0ff1c98f23a Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Fri Oct 28 08:40:24 2022 +0800 [Function](array) support array_range function (#13547) * array_range with 3 impl * [Function](array) support array_range function * update * update code --- be/src/vec/CMakeLists.txt | 1 + .../vec/functions/array/function_array_range.cpp | 195 +++++++++++++++++++++ .../functions/array/function_array_register.cpp | 2 + .../sql-functions/array-functions/array_range.md | 76 ++++++++ docs/sidebars.json | 1 + .../sql-functions/array-functions/array_range.md | 74 ++++++++ gensrc/script/doris_builtins_functions.py | 3 + 7 files changed, 352 insertions(+) diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 2ba617295c..612fb3fac4 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -153,6 +153,7 @@ set(VEC_FILES functions/array/function_array_slice.cpp functions/array/function_array_difference.cpp functions/array/function_array_enumerate.cpp + functions/array/function_array_range.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/functions/array/function_array_range.cpp b/be/src/vec/functions/array/function_array_range.cpp new file mode 100644 index 0000000000..0e2c33f5af --- /dev/null +++ b/be/src/vec/functions/array/function_array_range.cpp @@ -0,0 +1,195 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "common/status.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_const.h" +#include "vec/columns/columns_number.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/function.h" +#include "vec/functions/function_helpers.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { + +template <typename Impl> +class FunctionArrayRange : public IFunction { +public: + static constexpr auto name = "array_range"; + + static FunctionPtr create() { return std::make_shared<FunctionArrayRange>(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return true; } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + ColumnNumbers get_arguments_that_are_always_constant() const override { + return {get_number_of_arguments()}; + } + + size_t get_number_of_arguments() const override { + return get_variadic_argument_types_impl().size(); + } + + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types(); + } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + auto nested_type = make_nullable(std::make_shared<DataTypeInt32>()); + auto res = std::make_shared<DataTypeArray>(nested_type); + return make_nullable(res); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + return Impl::execute_impl(context, block, arguments, result, input_rows_count); + } +}; + +struct RangeImplUtil { + static Status range_execute(Block& block, const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + DCHECK_EQ(arguments.size(), 3); + auto return_nested_type = make_nullable(std::make_shared<DataTypeInt32>()); + auto dest_array_column_ptr = ColumnArray::create(return_nested_type->create_column(), + ColumnArray::ColumnOffsets::create()); + IColumn* dest_nested_column = &dest_array_column_ptr->get_data(); + ColumnNullable* dest_nested_nullable_col = + reinterpret_cast<ColumnNullable*>(dest_nested_column); + dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr(); + auto& dest_nested_null_map = dest_nested_nullable_col->get_null_map_column().get_data(); + + auto args_null_map = ColumnUInt8::create(input_rows_count, 0); + ColumnPtr argument_columns[3]; + for (int i = 0; i < 3; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { + // Danger: Here must dispose the null map data first! Because + // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem + // of column nullable mem of null map + VectorizedUtils::update_null_map(args_null_map->get_data(), + nullable->get_null_map_data()); + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + auto start_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[0].get()); + auto end_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get()); + auto step_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get()); + + DCHECK(dest_nested_column != nullptr); + auto& dest_offsets = dest_array_column_ptr->get_offsets(); + auto nested_column = reinterpret_cast<ColumnVector<Int32>*>(dest_nested_column); + dest_offsets.reserve(input_rows_count); + dest_nested_column->reserve(input_rows_count); + dest_nested_null_map.reserve(input_rows_count); + + vector(start_column->get_data(), end_column->get_data(), step_column->get_data(), + args_null_map->get_data(), nested_column->get_data(), dest_nested_null_map, + dest_offsets); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(dest_array_column_ptr), std::move(args_null_map)); + return Status::OK(); + } + +private: + static void vector(const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& end, + const PaddedPODArray<Int32>& step, NullMap& args_null_map, + PaddedPODArray<Int32>& nested_column, + PaddedPODArray<UInt8>& dest_nested_null_map, + ColumnArray::Offsets64& dest_offsets) { + int rows = start.size(); + for (auto row = 0; row < rows; ++row) { + if (args_null_map[row] || start[row] < 0 || end[row] < 0 || step[row] < 0) { + nested_column.push_back(0); + dest_offsets.push_back(dest_offsets.back() + 1); + dest_nested_null_map.push_back(1); + args_null_map[row] = 1; + } else { + int offset = dest_offsets.back(); + for (auto idx = start[row]; idx < end[row]; idx = idx + step[row]) { + nested_column.push_back(idx); + dest_nested_null_map.push_back(0); + offset++; + } + dest_offsets.push_back(offset); + } + } + } +}; + +struct RangeOneImpl { + static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt32>()}; } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + auto start_column = ColumnInt32::create(input_rows_count, 0); + auto step_column = ColumnInt32::create(input_rows_count, 1); + block.insert({std::move(start_column), std::make_shared<DataTypeInt32>(), "start_column"}); + block.insert({std::move(step_column), std::make_shared<DataTypeInt32>(), "step_column"}); + ColumnNumbers temp_arguments = {block.columns() - 2, arguments[0], block.columns() - 1}; + return RangeImplUtil::range_execute(block, temp_arguments, result, input_rows_count); + } +}; + +struct RangeTwoImpl { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()}; + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + auto step_column = ColumnInt32::create(input_rows_count, 1); + block.insert({std::move(step_column), std::make_shared<DataTypeInt32>(), "step_column"}); + ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1}; + return RangeImplUtil::range_execute(block, temp_arguments, result, input_rows_count); + } +}; + +struct RangeThreeImpl { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>(), + std::make_shared<DataTypeInt32>()}; + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + return RangeImplUtil::range_execute(block, arguments, result, input_rows_count); + } +}; + +void register_function_array_range(SimpleFunctionFactory& factory) { + factory.register_function<FunctionArrayRange<RangeOneImpl>>(); + factory.register_function<FunctionArrayRange<RangeTwoImpl>>(); + factory.register_function<FunctionArrayRange<RangeThreeImpl>>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 7d718ccfe2..82c9103cb0 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -36,6 +36,7 @@ void register_function_array_intersect(SimpleFunctionFactory&); void register_function_array_slice(SimpleFunctionFactory&); void register_function_array_difference(SimpleFunctionFactory&); void register_function_array_enumerate(SimpleFunctionFactory&); +void register_function_array_range(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_element(factory); @@ -52,6 +53,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_slice(factory); register_function_array_difference(factory); register_function_array_enumerate(factory); + register_function_array_range(factory); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_range.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_range.md new file mode 100644 index 0000000000..0ceb58d755 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_range.md @@ -0,0 +1,76 @@ +--- +{ + "title": "array_range", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## array_range + +### description + +#### Syntax + +``` +ARRAY<Int> array_range(Int end) +ARRAY<Int> array_range(Int start, Int end) +ARRAY<Int> array_range(Int start, Int end, Int step) +``` +The parameters are all positive integers. +start default value is 0, and step default value is 1. +Return the array which numbers from start to end - 1 by step. + + +### notice + +`Only supported in vectorized engine` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select array_range(10); ++--------------------------------+ +| array_range(10) | ++--------------------------------+ +| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] | ++--------------------------------+ + +mysql> select array_range(10,20); ++------------------------------------------+ +| array_range(10, 20) | ++------------------------------------------+ +| [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] | ++------------------------------------------+ + +mysql> select array_range(0,20,2); ++-------------------------------------+ +| array_range(0, 20, 2) | ++-------------------------------------+ +| [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] | ++-------------------------------------+ +``` + +### keywords + +ARRAY, RANGE, ARRAY_RANGE diff --git a/docs/sidebars.json b/docs/sidebars.json index c0360c2b5c..4cf82d482b 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -257,6 +257,7 @@ "sql-manual/sql-functions/array-functions/countequal", "sql-manual/sql-functions/array-functions/element_at", "sql-manual/sql-functions/array-functions/array_avg", + "sql-manual/sql-functions/array-functions/array_range", "sql-manual/sql-functions/array-functions/size", "sql-manual/sql-functions/array-functions/array_distinct", "sql-manual/sql-functions/array-functions/array_difference", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_range.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_range.md new file mode 100644 index 0000000000..de656aa651 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_range.md @@ -0,0 +1,74 @@ +--- +{ + "title": "array_range", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## array_range + +### description + +#### Syntax + +``` +ARRAY<Int> array_range(Int end) +ARRAY<Int> array_range(Int start, Int end) +ARRAY<Int> array_range(Int start, Int end, Int step) +``` +参数均为正整数 start 默认为 0, step 默认为 1。 +最终返回一个数组,从start 到 end - 1, 步长为 step。 + +### notice + +`仅支持向量化引擎中使用` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select array_range(10); ++--------------------------------+ +| array_range(10) | ++--------------------------------+ +| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] | ++--------------------------------+ + +mysql> select array_range(10,20); ++------------------------------------------+ +| array_range(10, 20) | ++------------------------------------------+ +| [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] | ++------------------------------------------+ + +mysql> select array_range(0,20,2); ++-------------------------------------+ +| array_range(0, 20, 2) | ++-------------------------------------+ +| [0, 2, 4, 6, 8, 10, 12, 14, 16, 18] | ++-------------------------------------+ +``` + +### keywords + +ARRAY, RANGE, ARRAY_RANGE diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index ec88c654b9..8e9b447cde 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -436,6 +436,9 @@ visible_functions = [ [['array_slice', '%element_slice%'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], [['array_slice', '%element_slice%'], 'ARRAY_STRING', ['ARRAY_STRING', 'BIGINT', 'BIGINT'], '', '', '', 'vec', ''], + [['array_range'], 'ARRAY_INT', ['INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['array_range'], 'ARRAY_INT', ['INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['array_range'], 'ARRAY_INT', ['INT', 'INT', 'INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], # reverse function for string builtin [['reverse'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions7reverseEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org