This is an automated email from the ASF dual-hosted git repository.

zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d2264cd6ec [improve](function) Optimize the performance of the 
explode function (#50011)
0d2264cd6ec is described below

commit 0d2264cd6ec8651667cdaf25f11249639876d23c
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Apr 15 15:49:51 2025 +0800

    [improve](function) Optimize the performance of the explode function 
(#50011)
    
    ### What problem does this PR solve?
    Problem Summary:
    Optimize the performance of the explode function
---
 be/src/pipeline/exec/table_function_operator.cpp |  6 +---
 be/src/pipeline/exec/table_function_operator.h   |  2 --
 be/src/vec/exprs/table_function/vexplode_v2.cpp  | 44 +++++++++++-------------
 3 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/be/src/pipeline/exec/table_function_operator.cpp 
b/be/src/pipeline/exec/table_function_operator.cpp
index 0cff402264c..f38a08a2646 100644
--- a/be/src/pipeline/exec/table_function_operator.cpp
+++ b/be/src/pipeline/exec/table_function_operator.cpp
@@ -39,9 +39,7 @@ Status TableFunctionLocalState::init(RuntimeState* state, 
LocalStateInfo& info)
     SCOPED_TIMER(_init_timer);
     _init_function_timer = ADD_TIMER(_runtime_profile, 
"InitTableFunctionTime");
     _process_rows_timer = ADD_TIMER(_runtime_profile, "ProcessRowsTime");
-    _copy_data_timer = ADD_TIMER(_runtime_profile, "CopyDataTime");
     _filter_timer = ADD_TIMER(_runtime_profile, "FilterTime");
-    _repeat_data_timer = ADD_TIMER(_runtime_profile, "RepeatDataTime");
     return Status::OK();
 }
 
@@ -77,7 +75,6 @@ void TableFunctionLocalState::_copy_output_slots(
     if (!_current_row_insert_times) {
         return;
     }
-    SCOPED_TIMER(_copy_data_timer);
     auto& p = _parent->cast<TableFunctionOperatorX>();
     for (auto index : p._output_slot_indexs) {
         auto src_column = _child_block->get_by_position(index).column;
@@ -167,7 +164,7 @@ Status 
TableFunctionLocalState::get_expanded_block(RuntimeState* state,
             _fns[i]->set_nullable();
         }
     }
-
+    SCOPED_TIMER(_process_rows_timer);
     while (columns[p._child_slots.size()]->size() < state->batch_size()) {
         RETURN_IF_CANCELLED(state);
 
@@ -230,7 +227,6 @@ Status 
TableFunctionLocalState::get_expanded_block(RuntimeState* state,
 }
 
 void TableFunctionLocalState::process_next_child_row() {
-    SCOPED_TIMER(_process_rows_timer);
     _cur_child_offset++;
 
     if (_cur_child_offset >= _child_block->rows()) {
diff --git a/be/src/pipeline/exec/table_function_operator.h 
b/be/src/pipeline/exec/table_function_operator.h
index fe4ea17187c..2d7d94e486b 100644
--- a/be/src/pipeline/exec/table_function_operator.h
+++ b/be/src/pipeline/exec/table_function_operator.h
@@ -74,9 +74,7 @@ private:
 
     RuntimeProfile::Counter* _init_function_timer = nullptr;
     RuntimeProfile::Counter* _process_rows_timer = nullptr;
-    RuntimeProfile::Counter* _copy_data_timer = nullptr;
     RuntimeProfile::Counter* _filter_timer = nullptr;
-    RuntimeProfile::Counter* _repeat_data_timer = nullptr;
 };
 
 class TableFunctionOperatorX MOCK_REMOVE(final)
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp 
b/be/src/vec/exprs/table_function/vexplode_v2.cpp
index ccebae91f34..53b368bed86 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp
@@ -74,16 +74,16 @@ Status 
VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co
 }
 
 Status VExplodeV2TableFunction::process_init(Block* block, RuntimeState* 
state) {
-    CHECK(_expr_context->root()->children().size() >= 1)
-            << "VExplodeV2TableFunction support one or more child but has "
-            << _expr_context->root()->children().size();
+    auto expr_size = _expr_context->root()->children().size();
+    CHECK(expr_size >= 1) << "VExplodeV2TableFunction support one or more 
child but has "
+                          << expr_size;
 
     int value_column_idx = -1;
-    _multi_detail.resize(_expr_context->root()->children().size());
-    _array_offsets.resize(_expr_context->root()->children().size());
-    _array_columns.resize(_expr_context->root()->children().size());
+    _multi_detail.resize(expr_size);
+    _array_offsets.resize(expr_size);
+    _array_columns.resize(expr_size);
 
-    for (int i = 0; i < _expr_context->root()->children().size(); i++) {
+    for (int i = 0; i < expr_size; i++) {
         
RETURN_IF_ERROR(_expr_context->root()->children()[i]->execute(_expr_context.get(),
 block,
                                                                       
&value_column_idx));
         if 
(WhichDataType(remove_nullable(block->get_by_position(value_column_idx).type))
@@ -207,31 +207,27 @@ int VExplodeV2TableFunction::get_value(MutableColumnPtr& 
column, int max_step) {
                     
nullable_column->get_nested_column_ptr()->insert_range_from(*detail.nested_col,
                                                                                
 pos, max_step);
                     if (detail.nested_nullmap_data) {
-                        for (int j = 0; j < max_step; j++) {
-                            if (detail.nested_nullmap_data[pos + j]) {
-                                nullmap_column->insert_value(1);
-                            } else {
-                                nullmap_column->insert_value(0);
-                            }
-                        }
+                        size_t old_size = nullmap_column->size();
+                        nullmap_column->resize(old_size + max_step);
+                        memcpy(nullmap_column->get_data().data() + old_size,
+                               detail.nested_nullmap_data + pos, max_step * 
sizeof(UInt8));
                     } else {
                         nullmap_column->insert_many_defaults(max_step);
                     }
                 } else {
+                    auto current_insert_num = element_size - _cur_offset;
                     
nullable_column->get_nested_column_ptr()->insert_range_from(
-                            *detail.nested_col, pos, element_size - 
_cur_offset);
+                            *detail.nested_col, pos, current_insert_num);
                     if (detail.nested_nullmap_data) {
-                        for (int j = 0; j < element_size - _cur_offset; j++) {
-                            if (detail.nested_nullmap_data[pos + j]) {
-                                nullmap_column->insert_value(1);
-                            } else {
-                                nullmap_column->insert_value(0);
-                            }
-                        }
+                        size_t old_size = nullmap_column->size();
+                        nullmap_column->resize(old_size + current_insert_num);
+                        memcpy(nullmap_column->get_data().data() + old_size,
+                               detail.nested_nullmap_data + pos,
+                               current_insert_num * sizeof(UInt8));
                     } else {
-                        nullmap_column->insert_many_defaults(element_size - 
_cur_offset);
+                        
nullmap_column->insert_many_defaults(current_insert_num);
                     }
-                    nullable_column->insert_many_defaults(max_step - 
(element_size - _cur_offset));
+                    nullable_column->insert_many_defaults(max_step - 
current_insert_num);
                 }
             }
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to