This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2438d784e80 [Exec](performance) speed up array element at performance 
(#56880)
2438d784e80 is described below

commit 2438d784e80e0dd539d9c1fe962af889be8d42a1
Author: HappenLee <[email protected]>
AuthorDate: Tue Mar 31 15:23:37 2026 +0800

    [Exec](performance) speed up array element at performance (#56880)
    
    before
    ```
    MySQL [operator]> select sum([1,2][UserID % 2 +1]) from array_index_table;
    +---------------------------+
    | sum([1,2][UserID % 2 +1]) |
    +---------------------------+
    |                 150000000 |
    +---------------------------+
    1 row in set (4.231 sec)
    
    ```
    
    after
    ```
    MySQL [operator]> select sum([1,2][UserID % 2 +1]) from array_index_table;
    +---------------------------+
    | sum([1,2][UserID % 2 +1]) |
    +---------------------------+
    |                 150000000 |
    +---------------------------+
    1 row in set (3.851 sec)
    ```
---
 .../exprs/function/array/function_array_element.h  | 125 ++++++++++++---------
 1 file changed, 69 insertions(+), 56 deletions(-)

diff --git a/be/src/exprs/function/array/function_array_element.h 
b/be/src/exprs/function/array/function_array_element.h
index 6cc26f4e669..8449019e2a5 100644
--- a/be/src/exprs/function/array/function_array_element.h
+++ b/be/src/exprs/function/array/function_array_element.h
@@ -64,8 +64,7 @@ namespace doris {
 
 class FunctionArrayElement : public IFunction {
 public:
-    /// The count of items in the map may exceed 128(Int8).
-    using MapIndiceDataType = DataTypeInt16;
+    using MapIndiceDataType = DataTypeInt64;
 
     static constexpr auto name = "element_at";
     static FunctionPtr create() { return 
std::make_shared<FunctionArrayElement>(); }
@@ -171,24 +170,25 @@ private:
         return matched_indices;
     }
 
-    template <typename ColumnType>
+    template <typename ColumnType, typename IndexColumnType>
     ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const UInt8* arr_null_map, const IColumn& 
indices,
-                              const UInt8* nested_null_map, UInt8* 
dst_null_map) const {
+                              const UInt8* nested_null_map, UInt8* 
dst_null_map,
+                              const UInt8* idx_null_map, bool is_const_index) 
const {
         const auto& nested_data = reinterpret_cast<const 
ColumnType&>(nested_column).get_data();
+        const auto& index_data = assert_cast<const 
IndexColumnType&>(indices).get_data();
 
         auto dst_column = nested_column.clone_empty();
         auto& dst_data = reinterpret_cast<ColumnType&>(*dst_column).get_data();
         dst_data.resize(offsets.size());
 
-        // process
         for (size_t row = 0; row < offsets.size(); ++row) {
-            size_t off = offsets[row - 1];
+            size_t off = row == 0 ? 0 : offsets[row - 1];
             size_t len = offsets[row] - off;
-            auto index = indices.get_int(row);
-            // array is nullable
+            size_t idx = index_check_const(row, is_const_index);
+            auto index =
+                    (idx_null_map && idx_null_map[idx]) ? 0 : 
static_cast<Int64>(index_data[idx]);
             bool null_flag = bool(arr_null_map && arr_null_map[row]);
-            // calc index in nested column
             if (!null_flag && index > 0 && index <= len) {
                 index += off - 1;
             } else if (!null_flag && index < 0 && -index <= len) {
@@ -196,30 +196,25 @@ private:
             } else {
                 null_flag = true;
             }
-            // nested column nullable check
             if (!null_flag && nested_null_map && nested_null_map[index]) {
                 null_flag = true;
             }
-            // actual data copy
-            if (null_flag) {
-                dst_null_map[row] = true;
-                dst_data[row] = typename ColumnType::value_type();
-            } else {
-                DCHECK(index >= 0 && index < nested_data.size());
-                dst_null_map[row] = false;
-                dst_data[row] = nested_data[index];
-            }
+            dst_null_map[row] = null_flag;
+            dst_data[row] = !null_flag ? nested_data[index] : typename 
ColumnType::value_type();
         }
         return dst_column;
     }
 
+    template <typename IndexColumnType>
     ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const UInt8* arr_null_map, const IColumn& 
indices,
-                              const UInt8* nested_null_map, UInt8* 
dst_null_map) const {
+                              const UInt8* nested_null_map, UInt8* 
dst_null_map,
+                              const UInt8* idx_null_map, bool is_const_index) 
const {
         const auto& src_str_offs =
                 reinterpret_cast<const 
ColumnString&>(nested_column).get_offsets();
         const auto& src_str_chars =
                 reinterpret_cast<const 
ColumnString&>(nested_column).get_chars();
+        const auto& index_data = assert_cast<const 
IndexColumnType&>(indices).get_data();
 
         // prepare return data
         auto dst_column = ColumnString::create();
@@ -228,14 +223,13 @@ private:
         auto& dst_str_chars = dst_column->get_chars();
         dst_str_chars.reserve(src_str_chars.size());
 
-        // process
         for (size_t row = 0; row < offsets.size(); ++row) {
-            size_t off = offsets[row - 1];
+            size_t off = row == 0 ? 0 : offsets[row - 1];
             size_t len = offsets[row] - off;
-            auto index = indices.get_int(row);
-            // array is nullable
+            size_t idx = index_check_const(row, is_const_index);
+            auto index =
+                    (idx_null_map && idx_null_map[idx]) ? 0 : 
static_cast<Int64>(index_data[idx]);
             bool null_flag = bool(arr_null_map && arr_null_map[row]);
-            // calc index in nested column
             if (!null_flag && index > 0 && index <= len) {
                 index += off - 1;
             } else if (!null_flag && index < 0 && -index <= len) {
@@ -243,24 +237,21 @@ private:
             } else {
                 null_flag = true;
             }
-            // nested column nullable check
             if (!null_flag && nested_null_map && nested_null_map[index]) {
                 null_flag = true;
             }
-            // actual string copy
             if (!null_flag) {
-                DCHECK(index >= 0 && index < src_str_offs.size());
                 dst_null_map[row] = false;
                 auto element_size = src_str_offs[index] - src_str_offs[index - 
1];
-                dst_str_offs[row] = dst_str_offs[row - 1] + element_size;
+                dst_str_offs[row] = (row == 0 ? 0 : dst_str_offs[row - 1]) + 
element_size;
                 auto src_string_pos = src_str_offs[index - 1];
-                auto dst_string_pos = dst_str_offs[row - 1];
+                auto dst_string_pos = row == 0 ? 0 : dst_str_offs[row - 1];
                 dst_str_chars.resize(dst_string_pos + element_size);
                 memcpy(&dst_str_chars[dst_string_pos], 
&src_str_chars[src_string_pos],
                        element_size);
             } else {
                 dst_null_map[row] = true;
-                dst_str_offs[row] = dst_str_offs[row - 1];
+                dst_str_offs[row] = row == 0 ? 0 : dst_str_offs[row - 1];
             }
         }
         return dst_column;
@@ -290,21 +281,23 @@ private:
         return _execute_nullable(args, input_rows_count, src_null_map, 
dst_null_map);
     }
 
+    template <typename IndexColumnType>
     ColumnPtr _execute_common(const ColumnArray::Offsets64& offsets, const 
IColumn& nested_column,
                               const UInt8* arr_null_map, const IColumn& 
indices,
-                              const UInt8* nested_null_map, UInt8* 
dst_null_map) const {
-        // prepare return data
+                              const UInt8* nested_null_map, UInt8* 
dst_null_map,
+                              const UInt8* idx_null_map, bool is_const_index) 
const {
+        const auto& index_data = assert_cast<const 
IndexColumnType&>(indices).get_data();
+
         auto dst_column = nested_column.clone_empty();
         dst_column->reserve(offsets.size());
 
-        // process
         for (size_t row = 0; row < offsets.size(); ++row) {
-            size_t off = offsets[row - 1];
+            size_t off = row == 0 ? 0 : offsets[row - 1];
             size_t len = offsets[row] - off;
-            auto index = indices.get_int(row);
-            // array is nullable
+            size_t idx = index_check_const(row, is_const_index);
+            auto index =
+                    (idx_null_map && idx_null_map[idx]) ? 0 : 
static_cast<Int64>(index_data[idx]);
             bool null_flag = bool(arr_null_map && arr_null_map[row]);
-            // calc index in nested column
             if (!null_flag && index > 0 && index <= len) {
                 index += off - 1;
             } else if (!null_flag && index < 0 && -index <= len) {
@@ -312,11 +305,9 @@ private:
             } else {
                 null_flag = true;
             }
-            // nested column nullable check
             if (!null_flag && nested_null_map && nested_null_map[index]) {
                 null_flag = true;
             }
-            // actual data copy
             if (!null_flag) {
                 dst_null_map[row] = false;
                 dst_column->insert_from(nested_column, index);
@@ -350,25 +341,47 @@ private:
         auto left_element_type = remove_nullable(
                 assert_cast<const 
DataTypeArray&>(*remove_nullable(arguments[0].type))
                         .get_nested_type());
-        // because we impl use_default_implementation_for_nulls
-        // we should handle array index column by-self, and array index should 
not be nullable.
-        auto idx_col = remove_nullable(arguments[1].column);
-        // we should dispatch branch according to data type rather than column 
type
-
-        auto call = [&](const auto& type) -> bool {
-            using DispatchType = std::decay_t<decltype(type)>;
-            res = _execute_number<typename DispatchType::ColumnType>(
-                    offsets, *nested_column, src_null_map, *idx_col, 
nested_null_map, dst_null_map);
+        const UInt8* idx_null_map = nullptr;
+        auto idx_col_with_const = unpack_if_const(arguments[1].column);
+        if (idx_col_with_const.first->is_nullable()) {
+            const auto& idx_null_column =
+                    reinterpret_cast<const 
ColumnNullable&>(*idx_col_with_const.first);
+            idx_null_map = 
idx_null_column.get_null_map_column().get_data().data();
+        }
+        auto idx_col_raw = remove_nullable(idx_col_with_const.first);
+        bool is_const_index = idx_col_with_const.second;
+
+        PrimitiveType idx_ptype = 
remove_nullable(arguments[1].type)->get_primitive_type();
+
+        // Outer dispatch on index column type (Int8/Int16/Int32/Int64),
+        // inner dispatch on nested data column type.
+        auto idx_dispatch = [&](const auto& idx_type) -> bool {
+            using IdxDispatchType = std::decay_t<decltype(idx_type)>;
+            using IndexColumnType = typename IdxDispatchType::ColumnType;
+
+            auto data_call = [&](const auto& data_type) -> bool {
+                using DataDispatchType = std::decay_t<decltype(data_type)>;
+                res = _execute_number<typename DataDispatchType::ColumnType, 
IndexColumnType>(
+                        offsets, *nested_column, src_null_map, *idx_col_raw, 
nested_null_map,
+                        dst_null_map, idx_null_map, is_const_index);
+                return true;
+            };
+
+            if (is_string_type(left_element_type->get_primitive_type())) {
+                res = _execute_string<IndexColumnType>(offsets, 
*nested_column, src_null_map,
+                                                       *idx_col_raw, 
nested_null_map, dst_null_map,
+                                                       idx_null_map, 
is_const_index);
+            } else if 
(!dispatch_switch_scalar(left_element_type->get_primitive_type(),
+                                               data_call)) {
+                res = _execute_common<IndexColumnType>(offsets, 
*nested_column, src_null_map,
+                                                       *idx_col_raw, 
nested_null_map, dst_null_map,
+                                                       idx_null_map, 
is_const_index);
+            }
             return true;
         };
 
-        if (is_string_type(left_element_type->get_primitive_type())) {
-            res = _execute_string(offsets, *nested_column, src_null_map, 
*idx_col, nested_null_map,
-                                  dst_null_map);
-        } else if 
(!dispatch_switch_scalar(left_element_type->get_primitive_type(), call)) {
-            res = _execute_common(offsets, *nested_column, src_null_map, 
*idx_col, nested_null_map,
-                                  dst_null_map);
-        }
+        bool dispatched = dispatch_switch_int(idx_ptype, idx_dispatch);
+        DCHECK(dispatched) << "Unsupported index column type for element_at: " 
<< idx_ptype;
         return res;
     }
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to