This is an automated email from the ASF dual-hosted git repository. zhangstar333 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new fc5b87e6203 [improve](join) reuse the join block to reduce malloc memory (#43738) fc5b87e6203 is described below commit fc5b87e6203635c3f23d45a0316a97bbcd9fb2c6 Author: zhangstar333 <zhangs...@selectdb.com> AuthorDate: Fri Nov 15 10:32:54 2024 +0800 [improve](join) reuse the join block to reduce malloc memory (#43738) ### What problem does this PR solve? Problem Summary: before in the pull function, it's use tmp_block to reference data and swap with output_block. and then create empty column in join_block, so insert into data at next time, it's need malloc memory again. after fixed: ``` mysql [ssb]>set parallel_pipeline_task_num = 0; mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (0.32 sec) mysql [ssb]>set parallel_pipeline_task_num = 1; Query OK, 0 rows affected (0.00 sec) mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (5.61 sec) ``` before ``` mysql [ssb]>set parallel_pipeline_task_num = 0; Query OK, 0 rows affected (0.00 sec) mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (2.79 sec) mysql [ssb]>set parallel_pipeline_task_num = 1; Query OK, 0 rows affected (0.00 sec) mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (10.21 sec) ``` --- .../pipeline/exec/nested_loop_join_probe_operator.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp index afa1a2e59b7..f4f4ef21ece 100644 --- a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp @@ -516,23 +516,20 @@ Status NestedLoopJoinProbeOperatorX::pull(RuntimeState* state, vectorized::Block local_state._matched_rows_done : local_state._matched_rows_done); + size_t join_block_column_size = local_state._join_block.columns(); { - vectorized::Block tmp_block = local_state._join_block; - - // Here make _join_block release the columns' ptr - local_state._join_block.set_columns(local_state._join_block.clone_empty_columns()); - - local_state.add_tuple_is_null_column(&tmp_block); + local_state.add_tuple_is_null_column(&local_state._join_block); { SCOPED_TIMER(local_state._join_filter_timer); RETURN_IF_ERROR(vectorized::VExprContext::filter_block( - local_state._conjuncts, &tmp_block, tmp_block.columns())); + local_state._conjuncts, &local_state._join_block, + local_state._join_block.columns())); } - RETURN_IF_ERROR(local_state._build_output_block(&tmp_block, block, false)); + RETURN_IF_ERROR( + local_state._build_output_block(&local_state._join_block, block, false)); local_state._reset_tuple_is_null_column(); } - local_state._join_block.clear_column_data(); - + local_state._join_block.clear_column_data(join_block_column_size); if (!(*eos) and !local_state._need_more_input_data) { auto func = [&](auto&& join_op_variants, auto set_build_side_flag, auto set_probe_side_flag) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org