This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 8e730faec52 [Exec](expr) Opt the compound pred performace (#45414) (#46232) 8e730faec52 is described below commit 8e730faec5293ad828102adafdd2d26f55ff60f6 Author: HappenLee <happen...@selectdb.com> AuthorDate: Wed Jan 1 16:19:42 2025 +0800 [Exec](expr) Opt the compound pred performace (#45414) (#46232) cherry-pick #45414 before: ``` mysqlslap -hd3 -uroot -P9130 --create-schema=test_db2 -c 10 -i 500 -q "SELECT count(k) FROM sbtest1_dup WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 8776291 AND 29749077;" Benchmark Average number of seconds to run all queries: 0.041 seconds Minimum number of seconds to run all queries: 0.037 seconds Maximum number of seconds to run all queries: 0.115 seconds Number of clients running queries: 10 Average number of queries per client: 1 ``` after: ``` mysqlslap -hd3 -uroot -P9030 --create-schema=test_db -c 10 -i 500 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 8776291 AND 29749077;" Benchmark Average number of seconds to run all queries: 0.029 seconds Minimum number of seconds to run all queries: 0.027 seconds Maximum number of seconds to run all queries: 0.034 seconds Number of clients running queries: 10 Average number of queries per client: 1 ``` ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: https://github.com/apache/doris-website/pull/1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into --> --- be/src/vec/exprs/vcompound_pred.h | 86 +++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 26 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 9e39533a7ae..d1d7739e541 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -153,12 +153,15 @@ public: if (fast_execute(context, block, result_column_id)) { return Status::OK(); } - if (children().size() == 1 || !_all_child_is_compound_and_not_const()) { + if (children().size() == 1 || _has_const_child()) { return VectorizedFnCall::execute(context, block, result_column_id); } int lhs_id = -1; int rhs_id = -1; + bool lhs_mem_can_reuse = _children[0]->is_compound_predicate(); + bool rhs_mem_can_reuse = _children[1]->is_compound_predicate(); + RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id)); ColumnPtr lhs_column = block->get_by_position(lhs_id).column->convert_to_full_column_if_const(); @@ -205,13 +208,22 @@ public: return Status::OK(); }; - auto return_result_column_id = [&](ColumnPtr res_column, int res_id) -> int { + auto return_result_column_id = [&](ColumnPtr res_column, int res_id, + bool mem_reuse) -> int { + if (!mem_reuse) { + res_column = res_column->clone_resized(size); + } + if (result_is_nullable && !res_column->is_nullable()) { auto result_column = ColumnNullable::create(res_column, ColumnUInt8::create(size, 0)); res_id = block->columns(); block->insert({std::move(result_column), _data_type, _expr_name}); + } else if (!mem_reuse) { + res_id = block->columns(); + block->insert({std::move(res_column), _data_type, _expr_name}); } + return res_id; }; @@ -226,6 +238,33 @@ public: return null_map_data; }; + auto vector_vector = [&]<bool is_and_op>() { + if (lhs_mem_can_reuse) { + *result_column_id = lhs_id; + } else if (rhs_mem_can_reuse) { + *result_column_id = rhs_id; + + auto tmp_column = rhs_data_column; + rhs_data_column = lhs_data_column; + lhs_data_column = tmp_column; + } else { + *result_column_id = block->columns(); + + auto col_res = lhs_column->clone_resized(size); + lhs_data_column = assert_cast<ColumnUInt8*>(col_res.get())->get_data().data(); + block->insert({std::move(col_res), _data_type, _expr_name}); + } + + if constexpr (is_and_op) { + for (size_t i = 0; i < size; ++i) { + lhs_data_column[i] &= rhs_data_column[i]; + } + } else { + for (size_t i = 0; i < size; ++i) { + lhs_data_column[i] |= rhs_data_column[i]; + } + } + }; auto vector_vector_null = [&]<bool is_and_op>() { auto col_res = ColumnUInt8::create(size); auto col_nulls = ColumnUInt8::create(size); @@ -261,28 +300,28 @@ public: //2. nullable column: null map all is not null if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { // false and any = false, return lhs - *result_column_id = return_result_column_id(lhs_column, lhs_id); + *result_column_id = return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse); } else { RETURN_IF_ERROR(get_rhs_colum()); if ((lhs_all_true && !lhs_is_nullable) || //not null column (lhs_all_true && lhs_all_is_not_null)) { //nullable column // true and any = any, return rhs - *result_column_id = return_result_column_id(rhs_column, rhs_id); + *result_column_id = + return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse); } else if ((rhs_all_false && !rhs_is_nullable) || (rhs_all_false && rhs_all_is_not_null)) { // any and false = false, return rhs - *result_column_id = return_result_column_id(rhs_column, rhs_id); + *result_column_id = + return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse); } else if ((rhs_all_true && !rhs_is_nullable) || (rhs_all_true && rhs_all_is_not_null)) { // any and true = any, return lhs - *result_column_id = return_result_column_id(lhs_column, lhs_id); + *result_column_id = + return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse); } else { if (!result_is_nullable) { - *result_column_id = lhs_id; - for (size_t i = 0; i < size; i++) { - lhs_data_column[i] &= rhs_data_column[i]; - } + vector_vector.template operator()<true>(); } else { vector_vector_null.template operator()<true>(); } @@ -293,26 +332,26 @@ public: // false or NULL ----> NULL if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { // true or any = true, return lhs - *result_column_id = return_result_column_id(lhs_column, lhs_id); + *result_column_id = return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse); } else { RETURN_IF_ERROR(get_rhs_colum()); if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { // false or any = any, return rhs - *result_column_id = return_result_column_id(rhs_column, rhs_id); + *result_column_id = + return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse); } else if ((rhs_all_true && !rhs_is_nullable) || (rhs_all_true && rhs_all_is_not_null)) { // any or true = true, return rhs - *result_column_id = return_result_column_id(rhs_column, rhs_id); + *result_column_id = + return_result_column_id(rhs_column, rhs_id, rhs_mem_can_reuse); } else if ((rhs_all_false && !rhs_is_nullable) || (rhs_all_false && rhs_all_is_not_null)) { // any or false = any, return lhs - *result_column_id = return_result_column_id(lhs_column, lhs_id); + *result_column_id = + return_result_column_id(lhs_column, lhs_id, lhs_mem_can_reuse); } else { if (!result_is_nullable) { - *result_column_id = lhs_id; - for (size_t i = 0; i < size; i++) { - lhs_data_column[i] |= rhs_data_column[i]; - } + vector_vector.template operator()<false>(); } else { vector_vector_null.template operator()<false>(); } @@ -337,14 +376,9 @@ private: return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null ^ b)); } - bool _all_child_is_compound_and_not_const() const { - for (auto child : _children) { - // we can make sure non const compound predicate's return column is allow modifyied locally. - if (child->is_constant() || !child->is_compound_predicate()) { - return false; - } - } - return true; + bool _has_const_child() const { + return std::ranges::any_of(_children, + [](const VExprSPtr& arg) -> bool { return arg->is_constant(); }); } std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org