This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0d2264cd6ec [improve](function) Optimize the performance of the
explode function (#50011)
0d2264cd6ec is described below
commit 0d2264cd6ec8651667cdaf25f11249639876d23c
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Apr 15 15:49:51 2025 +0800
[improve](function) Optimize the performance of the explode function
(#50011)
### What problem does this PR solve?
Problem Summary:
Optimize the performance of the explode function
---
be/src/pipeline/exec/table_function_operator.cpp | 6 +---
be/src/pipeline/exec/table_function_operator.h | 2 --
be/src/vec/exprs/table_function/vexplode_v2.cpp | 44 +++++++++++-------------
3 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/be/src/pipeline/exec/table_function_operator.cpp
b/be/src/pipeline/exec/table_function_operator.cpp
index 0cff402264c..f38a08a2646 100644
--- a/be/src/pipeline/exec/table_function_operator.cpp
+++ b/be/src/pipeline/exec/table_function_operator.cpp
@@ -39,9 +39,7 @@ Status TableFunctionLocalState::init(RuntimeState* state,
LocalStateInfo& info)
SCOPED_TIMER(_init_timer);
_init_function_timer = ADD_TIMER(_runtime_profile,
"InitTableFunctionTime");
_process_rows_timer = ADD_TIMER(_runtime_profile, "ProcessRowsTime");
- _copy_data_timer = ADD_TIMER(_runtime_profile, "CopyDataTime");
_filter_timer = ADD_TIMER(_runtime_profile, "FilterTime");
- _repeat_data_timer = ADD_TIMER(_runtime_profile, "RepeatDataTime");
return Status::OK();
}
@@ -77,7 +75,6 @@ void TableFunctionLocalState::_copy_output_slots(
if (!_current_row_insert_times) {
return;
}
- SCOPED_TIMER(_copy_data_timer);
auto& p = _parent->cast<TableFunctionOperatorX>();
for (auto index : p._output_slot_indexs) {
auto src_column = _child_block->get_by_position(index).column;
@@ -167,7 +164,7 @@ Status
TableFunctionLocalState::get_expanded_block(RuntimeState* state,
_fns[i]->set_nullable();
}
}
-
+ SCOPED_TIMER(_process_rows_timer);
while (columns[p._child_slots.size()]->size() < state->batch_size()) {
RETURN_IF_CANCELLED(state);
@@ -230,7 +227,6 @@ Status
TableFunctionLocalState::get_expanded_block(RuntimeState* state,
}
void TableFunctionLocalState::process_next_child_row() {
- SCOPED_TIMER(_process_rows_timer);
_cur_child_offset++;
if (_cur_child_offset >= _child_block->rows()) {
diff --git a/be/src/pipeline/exec/table_function_operator.h
b/be/src/pipeline/exec/table_function_operator.h
index fe4ea17187c..2d7d94e486b 100644
--- a/be/src/pipeline/exec/table_function_operator.h
+++ b/be/src/pipeline/exec/table_function_operator.h
@@ -74,9 +74,7 @@ private:
RuntimeProfile::Counter* _init_function_timer = nullptr;
RuntimeProfile::Counter* _process_rows_timer = nullptr;
- RuntimeProfile::Counter* _copy_data_timer = nullptr;
RuntimeProfile::Counter* _filter_timer = nullptr;
- RuntimeProfile::Counter* _repeat_data_timer = nullptr;
};
class TableFunctionOperatorX MOCK_REMOVE(final)
diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp
b/be/src/vec/exprs/table_function/vexplode_v2.cpp
index ccebae91f34..53b368bed86 100644
--- a/be/src/vec/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp
@@ -74,16 +74,16 @@ Status
VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co
}
Status VExplodeV2TableFunction::process_init(Block* block, RuntimeState*
state) {
- CHECK(_expr_context->root()->children().size() >= 1)
- << "VExplodeV2TableFunction support one or more child but has "
- << _expr_context->root()->children().size();
+ auto expr_size = _expr_context->root()->children().size();
+ CHECK(expr_size >= 1) << "VExplodeV2TableFunction support one or more
child but has "
+ << expr_size;
int value_column_idx = -1;
- _multi_detail.resize(_expr_context->root()->children().size());
- _array_offsets.resize(_expr_context->root()->children().size());
- _array_columns.resize(_expr_context->root()->children().size());
+ _multi_detail.resize(expr_size);
+ _array_offsets.resize(expr_size);
+ _array_columns.resize(expr_size);
- for (int i = 0; i < _expr_context->root()->children().size(); i++) {
+ for (int i = 0; i < expr_size; i++) {
RETURN_IF_ERROR(_expr_context->root()->children()[i]->execute(_expr_context.get(),
block,
&value_column_idx));
if
(WhichDataType(remove_nullable(block->get_by_position(value_column_idx).type))
@@ -207,31 +207,27 @@ int VExplodeV2TableFunction::get_value(MutableColumnPtr&
column, int max_step) {
nullable_column->get_nested_column_ptr()->insert_range_from(*detail.nested_col,
pos, max_step);
if (detail.nested_nullmap_data) {
- for (int j = 0; j < max_step; j++) {
- if (detail.nested_nullmap_data[pos + j]) {
- nullmap_column->insert_value(1);
- } else {
- nullmap_column->insert_value(0);
- }
- }
+ size_t old_size = nullmap_column->size();
+ nullmap_column->resize(old_size + max_step);
+ memcpy(nullmap_column->get_data().data() + old_size,
+ detail.nested_nullmap_data + pos, max_step *
sizeof(UInt8));
} else {
nullmap_column->insert_many_defaults(max_step);
}
} else {
+ auto current_insert_num = element_size - _cur_offset;
nullable_column->get_nested_column_ptr()->insert_range_from(
- *detail.nested_col, pos, element_size -
_cur_offset);
+ *detail.nested_col, pos, current_insert_num);
if (detail.nested_nullmap_data) {
- for (int j = 0; j < element_size - _cur_offset; j++) {
- if (detail.nested_nullmap_data[pos + j]) {
- nullmap_column->insert_value(1);
- } else {
- nullmap_column->insert_value(0);
- }
- }
+ size_t old_size = nullmap_column->size();
+ nullmap_column->resize(old_size + current_insert_num);
+ memcpy(nullmap_column->get_data().data() + old_size,
+ detail.nested_nullmap_data + pos,
+ current_insert_num * sizeof(UInt8));
} else {
- nullmap_column->insert_many_defaults(element_size -
_cur_offset);
+
nullmap_column->insert_many_defaults(current_insert_num);
}
- nullable_column->insert_many_defaults(max_step -
(element_size - _cur_offset));
+ nullable_column->insert_many_defaults(max_step -
current_insert_num);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]