This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 93e5d8e660 [Vectorized](function) support bitmap_from_array function 
(#14259)
93e5d8e660 is described below

commit 93e5d8e66090903677f6d0927ee72a67cd45ee28
Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com>
AuthorDate: Tue Nov 15 01:55:51 2022 +0800

    [Vectorized](function) support bitmap_from_array function (#14259)
---
 be/src/vec/functions/function_bitmap.cpp           | 82 ++++++++++++++++++++--
 .../bitmap-functions/bitmap_from_array.md          | 53 ++++++++++++++
 docs/sidebars.json                                 |  1 +
 .../bitmap-functions/bitmap_from_array.md          | 53 ++++++++++++++
 gensrc/script/doris_builtins_functions.py          |  4 ++
 .../array_functions/test_array_functions.out       |  9 +++
 .../array_functions/test_array_functions.groovy    |  2 +
 7 files changed, 198 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/functions/function_bitmap.cpp 
b/be/src/vec/functions/function_bitmap.cpp
index 5f4483253b..247dcab19b 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -21,9 +21,12 @@
 #include "gutil/strings/numbers.h"
 #include "gutil/strings/split.h"
 #include "util/string_parser.hpp"
+#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
 #include "vec/columns/columns_number.h"
 #include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
 #include "vec/functions/function_always_not_nullable.h"
 #include "vec/functions/function_bitmap_min_or_max.h"
 #include "vec/functions/function_const.h"
@@ -175,6 +178,8 @@ struct ToBitmapWithCheck {
 };
 
 struct BitmapFromString {
+    using ArgumentType = DataTypeString;
+
     static constexpr auto name = "bitmap_from_string";
 
     static Status vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
@@ -199,6 +204,42 @@ struct BitmapFromString {
     }
 };
 
+struct BitmapFromArray {
+    using ArgumentType = DataTypeArray;
+    static constexpr auto name = "bitmap_from_array";
+
+    template <typename ColumnType>
+    static Status vector(const ColumnArray::Offsets64& offset_column_data,
+                         const IColumn& nested_column, const NullMap& 
nested_null_map,
+                         std::vector<BitmapValue>& res, NullMap& null_map) {
+        const auto& nested_column_data = static_cast<const 
ColumnType&>(nested_column).get_data();
+        auto size = offset_column_data.size();
+        res.reserve(size);
+        std::vector<uint64_t> bits;
+        for (size_t i = 0; i < size; ++i) {
+            auto curr_offset = offset_column_data[i];
+            auto prev_offset = offset_column_data[i - 1];
+            for (auto j = prev_offset; j < curr_offset; ++j) {
+                auto data = nested_column_data[j];
+                // invaild value
+                if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
+                    res.emplace_back();
+                    null_map[i] = 1;
+                    break;
+                } else {
+                    bits.push_back(data);
+                }
+            }
+            //input is valid value
+            if (!null_map[i]) {
+                res.emplace_back(bits);
+                bits.clear();
+            }
+        }
+        return Status::OK();
+    }
+};
+
 template <typename Impl>
 class FunctionBitmapAlwaysNull : public IFunction {
 public:
@@ -227,12 +268,39 @@ public:
 
         ColumnPtr argument_column =
                 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-        const ColumnString* str_column = 
check_and_get_column<ColumnString>(argument_column.get());
-        const ColumnString::Chars& data = str_column->get_chars();
-        const ColumnString::Offsets& offsets = str_column->get_offsets();
-
-        Impl::vector(data, offsets, res, null_map);
-
+        if constexpr (std::is_same_v<typename Impl::ArgumentType, 
DataTypeString>) {
+            const auto& str_column = static_cast<const 
ColumnString&>(*argument_column);
+            const ColumnString::Chars& data = str_column.get_chars();
+            const ColumnString::Offsets& offsets = str_column.get_offsets();
+            Impl::vector(data, offsets, res, null_map);
+        } else if constexpr (std::is_same_v<typename Impl::ArgumentType, 
DataTypeArray>) {
+            auto argument_type = remove_nullable(
+                    assert_cast<const 
DataTypeArray&>(*block.get_by_position(arguments[0]).type)
+                            .get_nested_type());
+            const auto& array_column = static_cast<const 
ColumnArray&>(*argument_column);
+            const auto& offset_column_data = array_column.get_offsets();
+            const auto& nested_nullable_column =
+                    static_cast<const 
ColumnNullable&>(array_column.get_data());
+            const auto& nested_column = 
nested_nullable_column.get_nested_column();
+            const auto& nested_null_map = 
nested_nullable_column.get_null_map_column().get_data();
+            if (check_column<ColumnInt8>(nested_column)) {
+                Impl::template vector<ColumnInt8>(offset_column_data, 
nested_column,
+                                                  nested_null_map, res, 
null_map);
+            } else if (check_column<ColumnInt16>(nested_column)) {
+                Impl::template vector<ColumnInt16>(offset_column_data, 
nested_column,
+                                                   nested_null_map, res, 
null_map);
+            } else if (check_column<ColumnInt32>(nested_column)) {
+                Impl::template vector<ColumnInt32>(offset_column_data, 
nested_column,
+                                                   nested_null_map, res, 
null_map);
+            } else if (check_column<ColumnInt64>(nested_column)) {
+                Impl::template vector<ColumnInt64>(offset_column_data, 
nested_column,
+                                                   nested_null_map, res, 
null_map);
+            }
+        } else {
+            return Status::RuntimeError("Illegal column {} of argument of 
function {}",
+                                        
block.get_by_position(arguments[0]).column->get_name(),
+                                        get_name());
+        }
         block.get_by_position(result).column =
                 ColumnNullable::create(std::move(res_data_column), 
std::move(res_null_map));
         return Status::OK();
@@ -695,6 +763,7 @@ using FunctionToBitmap = 
FunctionAlwaysNotNullable<ToBitmap>;
 using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, 
true>;
 
 using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
+using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>;
 using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
 using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
 
@@ -724,6 +793,7 @@ void register_function_bitmap(SimpleFunctionFactory& 
factory) {
     factory.register_function<FunctionToBitmap>();
     factory.register_function<FunctionToBitmapWithCheck>();
     factory.register_function<FunctionBitmapFromString>();
+    factory.register_function<FunctionBitmapFromArray>();
     factory.register_function<FunctionBitmapHash>();
     factory.register_function<FunctionBitmapHash64>();
     factory.register_function<FunctionBitmapCount>();
diff --git 
a/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md 
b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
new file mode 100644
index 0000000000..d72661911f
--- /dev/null
+++ 
b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
@@ -0,0 +1,53 @@
+---
+{
+    "title": "bitmap_from_array",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## bitmap_from_array
+
+### description
+#### Syntax
+
+`BITMAP BITMAP_FROM_ARRAY(ARRAY input)`
+
+Convert a TINYINT/SMALLINT/INT/BIGINT array to a BITMAP
+When the input field is illegal, the result returns NULL
+
+### example
+
+```
+mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test;
++------+-----------------------+------------------------------------------------+
+| id   | c_array               | 
bitmap_to_string(bitmap_from_array(`c_array`)) |
++------+-----------------------+------------------------------------------------+
+|    1 | [NULL]                | NULL                                          
 |
+|    2 | [1, 2, 3, NULL]       | NULL                                          
 |
+|    2 | [1, 2, 3, -10]        | NULL                                          
 |
+|    3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7                                 
 |
+|    4 | [100, 200, 300, 300]  | 100,200,300                                   
 |
++------+-----------------------+------------------------------------------------+
+5 rows in set (0.02 sec)
+```
+
+### keywords
+
+    BITMAP_FROM_ARRAY,BITMAP
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 66136ee3bd..ebb6bd885c 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -463,6 +463,7 @@
                                 
"sql-manual/sql-functions/bitmap-functions/bitmap_from_string",
                                 
"sql-manual/sql-functions/bitmap-functions/bitmap_to_string",
                                 
"sql-manual/sql-functions/bitmap-functions/bitmap_to_array",
+                                
"sql-manual/sql-functions/bitmap-functions/bitmap_from_array",
                                 
"sql-manual/sql-functions/bitmap-functions/bitmap_empty",
                                 
"sql-manual/sql-functions/bitmap-functions/bitmap_or",
                                 
"sql-manual/sql-functions/bitmap-functions/bitmap_and",
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
 
b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
new file mode 100644
index 0000000000..b794d0e852
--- /dev/null
+++ 
b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
@@ -0,0 +1,53 @@
+---
+{
+    "title": "bitmap_from_array",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## bitmap_from_array
+
+### description
+#### Syntax
+
+`BITMAP BITMAP_FROM_ARRAY(ARRAY input)`
+
+将一个TINYINT/SMALLINT/INT/BIGINT类型的数组转化为一个BITMAP
+当输入字段不合法时,结果返回NULL
+
+### example
+
+```
+mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test;
++------+-----------------------+------------------------------------------------+
+| id   | c_array               | 
bitmap_to_string(bitmap_from_array(`c_array`)) |
++------+-----------------------+------------------------------------------------+
+|    1 | [NULL]                | NULL                                          
 |
+|    2 | [1, 2, 3, NULL]       | NULL                                          
 |
+|    2 | [1, 2, 3, -10]        | NULL                                          
 |
+|    3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7                                 
 |
+|    4 | [100, 200, 300, 300]  | 100,200,300                                   
 |
++------+-----------------------+------------------------------------------------+
+5 rows in set (0.02 sec)
+```
+
+### keywords
+
+    BITMAP_FROM_ARRAY,BITMAP
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 1c5694697d..a09ad77f31 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2610,6 +2610,10 @@ visible_functions = [
     [['bitmap_from_string'], 'BITMAP', ['STRING'],
         
'_ZN5doris15BitmapFunctions18bitmap_from_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE',
         '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['bitmap_from_array'], 'BITMAP', ['ARRAY_TINYINT'], '', '', '', 'vec', 
'ALWAYS_NULLABLE'],
+    [['bitmap_from_array'], 'BITMAP', ['ARRAY_SMALLINT'], '', '', '', 'vec', 
'ALWAYS_NULLABLE'],
+    [['bitmap_from_array'], 'BITMAP', ['ARRAY_INT'], '', '', '', 'vec', 
'ALWAYS_NULLABLE'],
+    [['bitmap_from_array'], 'BITMAP', ['ARRAY_BIGINT'], '', '', '', 'vec', 
'ALWAYS_NULLABLE'],
     [['bitmap_contains'], 'BOOLEAN', ['BITMAP','BIGINT'],
         
'_ZN5doris15BitmapFunctions15bitmap_containsEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValE',
         '', '', 'vec', ''],
diff --git 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
index def00a0861..5259da6587 100644
--- 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
+++ 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
@@ -290,3 +290,12 @@
 8      []
 9      [9]
 
+-- !select --
+[1, 2, 3]      1,2,3
+[4]    4
+[]     
+[1, 2, 3, 4, 5, 4, 3, 2, 1]    1,2,3,4,5
+[]     
+[1, 2, 3, 4, 5, 4, 3, 2, 1]    1,2,3,4,5
+[8, 9, NULL, 10, NULL] \N
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index 7f9ea92138..b3137feb1d 100644
--- 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -105,4 +105,6 @@ suite("test_array_functions") {
     qt_select "SELECT k1, array_range(k1) from ${tableName2} ORDER BY k1"
     qt_select "SELECT k1, array_range(k1,k2) from ${tableName2} ORDER BY k1"
     qt_select "SELECT k1, array_range(k1,k2,k3) from ${tableName2} ORDER BY k1"
+    qt_select "select k2, bitmap_to_string(bitmap_from_array(k2)) from 
${tableName} order by k1;"
+    
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to