This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 93e5d8e660 [Vectorized](function) support bitmap_from_array function (#14259) 93e5d8e660 is described below commit 93e5d8e66090903677f6d0927ee72a67cd45ee28 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Tue Nov 15 01:55:51 2022 +0800 [Vectorized](function) support bitmap_from_array function (#14259) --- be/src/vec/functions/function_bitmap.cpp | 82 ++++++++++++++++++++-- .../bitmap-functions/bitmap_from_array.md | 53 ++++++++++++++ docs/sidebars.json | 1 + .../bitmap-functions/bitmap_from_array.md | 53 ++++++++++++++ gensrc/script/doris_builtins_functions.py | 4 ++ .../array_functions/test_array_functions.out | 9 +++ .../array_functions/test_array_functions.groovy | 2 + 7 files changed, 198 insertions(+), 6 deletions(-) diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 5f4483253b..247dcab19b 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -21,9 +21,12 @@ #include "gutil/strings/numbers.h" #include "gutil/strings/split.h" #include "util/string_parser.hpp" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" #include "vec/functions/function_always_not_nullable.h" #include "vec/functions/function_bitmap_min_or_max.h" #include "vec/functions/function_const.h" @@ -175,6 +178,8 @@ struct ToBitmapWithCheck { }; struct BitmapFromString { + using ArgumentType = DataTypeString; + static constexpr auto name = "bitmap_from_string"; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, @@ -199,6 +204,42 @@ struct BitmapFromString { } }; +struct BitmapFromArray { + using ArgumentType = DataTypeArray; + static constexpr auto name = "bitmap_from_array"; + + template <typename ColumnType> + static Status vector(const ColumnArray::Offsets64& offset_column_data, + const IColumn& nested_column, const NullMap& nested_null_map, + std::vector<BitmapValue>& res, NullMap& null_map) { + const auto& nested_column_data = static_cast<const ColumnType&>(nested_column).get_data(); + auto size = offset_column_data.size(); + res.reserve(size); + std::vector<uint64_t> bits; + for (size_t i = 0; i < size; ++i) { + auto curr_offset = offset_column_data[i]; + auto prev_offset = offset_column_data[i - 1]; + for (auto j = prev_offset; j < curr_offset; ++j) { + auto data = nested_column_data[j]; + // invaild value + if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) { + res.emplace_back(); + null_map[i] = 1; + break; + } else { + bits.push_back(data); + } + } + //input is valid value + if (!null_map[i]) { + res.emplace_back(bits); + bits.clear(); + } + } + return Status::OK(); + } +}; + template <typename Impl> class FunctionBitmapAlwaysNull : public IFunction { public: @@ -227,12 +268,39 @@ public: ColumnPtr argument_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - const ColumnString* str_column = check_and_get_column<ColumnString>(argument_column.get()); - const ColumnString::Chars& data = str_column->get_chars(); - const ColumnString::Offsets& offsets = str_column->get_offsets(); - - Impl::vector(data, offsets, res, null_map); - + if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeString>) { + const auto& str_column = static_cast<const ColumnString&>(*argument_column); + const ColumnString::Chars& data = str_column.get_chars(); + const ColumnString::Offsets& offsets = str_column.get_offsets(); + Impl::vector(data, offsets, res, null_map); + } else if constexpr (std::is_same_v<typename Impl::ArgumentType, DataTypeArray>) { + auto argument_type = remove_nullable( + assert_cast<const DataTypeArray&>(*block.get_by_position(arguments[0]).type) + .get_nested_type()); + const auto& array_column = static_cast<const ColumnArray&>(*argument_column); + const auto& offset_column_data = array_column.get_offsets(); + const auto& nested_nullable_column = + static_cast<const ColumnNullable&>(array_column.get_data()); + const auto& nested_column = nested_nullable_column.get_nested_column(); + const auto& nested_null_map = nested_nullable_column.get_null_map_column().get_data(); + if (check_column<ColumnInt8>(nested_column)) { + Impl::template vector<ColumnInt8>(offset_column_data, nested_column, + nested_null_map, res, null_map); + } else if (check_column<ColumnInt16>(nested_column)) { + Impl::template vector<ColumnInt16>(offset_column_data, nested_column, + nested_null_map, res, null_map); + } else if (check_column<ColumnInt32>(nested_column)) { + Impl::template vector<ColumnInt32>(offset_column_data, nested_column, + nested_null_map, res, null_map); + } else if (check_column<ColumnInt64>(nested_column)) { + Impl::template vector<ColumnInt64>(offset_column_data, nested_column, + nested_null_map, res, null_map); + } + } else { + return Status::RuntimeError("Illegal column {} of argument of function {}", + block.get_by_position(arguments[0]).column->get_name(), + get_name()); + } block.get_by_position(result).column = ColumnNullable::create(std::move(res_data_column), std::move(res_null_map)); return Status::OK(); @@ -695,6 +763,7 @@ using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>; using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>; using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>; +using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>; using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>; using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>; @@ -724,6 +793,7 @@ void register_function_bitmap(SimpleFunctionFactory& factory) { factory.register_function<FunctionToBitmap>(); factory.register_function<FunctionToBitmapWithCheck>(); factory.register_function<FunctionBitmapFromString>(); + factory.register_function<FunctionBitmapFromArray>(); factory.register_function<FunctionBitmapHash>(); factory.register_function<FunctionBitmapHash64>(); factory.register_function<FunctionBitmapCount>(); diff --git a/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md new file mode 100644 index 0000000000..d72661911f --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md @@ -0,0 +1,53 @@ +--- +{ + "title": "bitmap_from_array", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## bitmap_from_array + +### description +#### Syntax + +`BITMAP BITMAP_FROM_ARRAY(ARRAY input)` + +Convert a TINYINT/SMALLINT/INT/BIGINT array to a BITMAP +When the input field is illegal, the result returns NULL + +### example + +``` +mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test; ++------+-----------------------+------------------------------------------------+ +| id | c_array | bitmap_to_string(bitmap_from_array(`c_array`)) | ++------+-----------------------+------------------------------------------------+ +| 1 | [NULL] | NULL | +| 2 | [1, 2, 3, NULL] | NULL | +| 2 | [1, 2, 3, -10] | NULL | +| 3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7 | +| 4 | [100, 200, 300, 300] | 100,200,300 | ++------+-----------------------+------------------------------------------------+ +5 rows in set (0.02 sec) +``` + +### keywords + + BITMAP_FROM_ARRAY,BITMAP diff --git a/docs/sidebars.json b/docs/sidebars.json index 66136ee3bd..ebb6bd885c 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -463,6 +463,7 @@ "sql-manual/sql-functions/bitmap-functions/bitmap_from_string", "sql-manual/sql-functions/bitmap-functions/bitmap_to_string", "sql-manual/sql-functions/bitmap-functions/bitmap_to_array", + "sql-manual/sql-functions/bitmap-functions/bitmap_from_array", "sql-manual/sql-functions/bitmap-functions/bitmap_empty", "sql-manual/sql-functions/bitmap-functions/bitmap_or", "sql-manual/sql-functions/bitmap-functions/bitmap_and", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md new file mode 100644 index 0000000000..b794d0e852 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md @@ -0,0 +1,53 @@ +--- +{ + "title": "bitmap_from_array", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## bitmap_from_array + +### description +#### Syntax + +`BITMAP BITMAP_FROM_ARRAY(ARRAY input)` + +将一个TINYINT/SMALLINT/INT/BIGINT类型的数组转化为一个BITMAP +当输入字段不合法时,结果返回NULL + +### example + +``` +mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test; ++------+-----------------------+------------------------------------------------+ +| id | c_array | bitmap_to_string(bitmap_from_array(`c_array`)) | ++------+-----------------------+------------------------------------------------+ +| 1 | [NULL] | NULL | +| 2 | [1, 2, 3, NULL] | NULL | +| 2 | [1, 2, 3, -10] | NULL | +| 3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7 | +| 4 | [100, 200, 300, 300] | 100,200,300 | ++------+-----------------------+------------------------------------------------+ +5 rows in set (0.02 sec) +``` + +### keywords + + BITMAP_FROM_ARRAY,BITMAP diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 1c5694697d..a09ad77f31 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2610,6 +2610,10 @@ visible_functions = [ [['bitmap_from_string'], 'BITMAP', ['STRING'], '_ZN5doris15BitmapFunctions18bitmap_from_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['bitmap_from_array'], 'BITMAP', ['ARRAY_TINYINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['bitmap_from_array'], 'BITMAP', ['ARRAY_SMALLINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['bitmap_from_array'], 'BITMAP', ['ARRAY_INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], + [['bitmap_from_array'], 'BITMAP', ['ARRAY_BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'], [['bitmap_contains'], 'BOOLEAN', ['BITMAP','BIGINT'], '_ZN5doris15BitmapFunctions15bitmap_containsEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValE', '', '', 'vec', ''], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out index def00a0861..5259da6587 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out @@ -290,3 +290,12 @@ 8 [] 9 [9] +-- !select -- +[1, 2, 3] 1,2,3 +[4] 4 +[] +[1, 2, 3, 4, 5, 4, 3, 2, 1] 1,2,3,4,5 +[] +[1, 2, 3, 4, 5, 4, 3, 2, 1] 1,2,3,4,5 +[8, 9, NULL, 10, NULL] \N + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy index 7f9ea92138..b3137feb1d 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy @@ -105,4 +105,6 @@ suite("test_array_functions") { qt_select "SELECT k1, array_range(k1) from ${tableName2} ORDER BY k1" qt_select "SELECT k1, array_range(k1,k2) from ${tableName2} ORDER BY k1" qt_select "SELECT k1, array_range(k1,k2,k3) from ${tableName2} ORDER BY k1" + qt_select "select k2, bitmap_to_string(bitmap_from_array(k2)) from ${tableName} order by k1;" + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org