This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 8e730faec52 [Exec](expr) Opt the compound pred performace (#45414) 
(#46232)
8e730faec52 is described below

commit 8e730faec5293ad828102adafdd2d26f55ff60f6
Author: HappenLee <happen...@selectdb.com>
AuthorDate: Wed Jan 1 16:19:42 2025 +0800

    [Exec](expr) Opt the compound pred performace (#45414) (#46232)
    
    cherry-pick #45414
    
    before:
    ```
     mysqlslap -hd3 -uroot -P9130  --create-schema=test_db2 -c 10 -i 500 -q 
"SELECT count(k) FROM sbtest1_dup WHERE k BETWEEN 4850578 AND 8454295 OR k 
BETWEEN 8776291 AND 29749077;"
    Benchmark
            Average number of seconds to run all queries: 0.041 seconds
            Minimum number of seconds to run all queries: 0.037 seconds
            Maximum number of seconds to run all queries: 0.115 seconds
            Number of clients running queries: 10
            Average number of queries per client: 1
    ```
    
    after:
    ```
    mysqlslap -hd3 -uroot -P9030  --create-schema=test_db -c 10 -i 500 -q 
"SELECT count(k) FROM sbtest1 WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN 
8776291 AND 29749077;"
    Benchmark
            Average number of seconds to run all queries: 0.029 seconds
            Minimum number of seconds to run all queries: 0.027 seconds
            Maximum number of seconds to run all queries: 0.034 seconds
            Number of clients running queries: 10
            Average number of queries per client: 1
    ```
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/vec/exprs/vcompound_pred.h | 86 +++++++++++++++++++++++++++------------
 1 file changed, 60 insertions(+), 26 deletions(-)

diff --git a/be/src/vec/exprs/vcompound_pred.h 
b/be/src/vec/exprs/vcompound_pred.h
index 9e39533a7ae..d1d7739e541 100644
--- a/be/src/vec/exprs/vcompound_pred.h
+++ b/be/src/vec/exprs/vcompound_pred.h
@@ -153,12 +153,15 @@ public:
         if (fast_execute(context, block, result_column_id)) {
             return Status::OK();
         }
-        if (children().size() == 1 || !_all_child_is_compound_and_not_const()) 
{
+        if (children().size() == 1 || _has_const_child()) {
             return VectorizedFnCall::execute(context, block, result_column_id);
         }
 
         int lhs_id = -1;
         int rhs_id = -1;
+        bool lhs_mem_can_reuse = _children[0]->is_compound_predicate();
+        bool rhs_mem_can_reuse = _children[1]->is_compound_predicate();
+
         RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
         ColumnPtr lhs_column =
                 
block->get_by_position(lhs_id).column->convert_to_full_column_if_const();
@@ -205,13 +208,22 @@ public:
             return Status::OK();
         };
 
-        auto return_result_column_id = [&](ColumnPtr res_column, int res_id) 
-> int {
+        auto return_result_column_id = [&](ColumnPtr res_column, int res_id,
+                                           bool mem_reuse) -> int {
+            if (!mem_reuse) {
+                res_column = res_column->clone_resized(size);
+            }
+
             if (result_is_nullable && !res_column->is_nullable()) {
                 auto result_column =
                         ColumnNullable::create(res_column, 
ColumnUInt8::create(size, 0));
                 res_id = block->columns();
                 block->insert({std::move(result_column), _data_type, 
_expr_name});
+            } else if (!mem_reuse) {
+                res_id = block->columns();
+                block->insert({std::move(res_column), _data_type, _expr_name});
             }
+
             return res_id;
         };
 
@@ -226,6 +238,33 @@ public:
             return null_map_data;
         };
 
+        auto vector_vector = [&]<bool is_and_op>() {
+            if (lhs_mem_can_reuse) {
+                *result_column_id = lhs_id;
+            } else if (rhs_mem_can_reuse) {
+                *result_column_id = rhs_id;
+
+                auto tmp_column = rhs_data_column;
+                rhs_data_column = lhs_data_column;
+                lhs_data_column = tmp_column;
+            } else {
+                *result_column_id = block->columns();
+
+                auto col_res = lhs_column->clone_resized(size);
+                lhs_data_column = 
assert_cast<ColumnUInt8*>(col_res.get())->get_data().data();
+                block->insert({std::move(col_res), _data_type, _expr_name});
+            }
+
+            if constexpr (is_and_op) {
+                for (size_t i = 0; i < size; ++i) {
+                    lhs_data_column[i] &= rhs_data_column[i];
+                }
+            } else {
+                for (size_t i = 0; i < size; ++i) {
+                    lhs_data_column[i] |= rhs_data_column[i];
+                }
+            }
+        };
         auto vector_vector_null = [&]<bool is_and_op>() {
             auto col_res = ColumnUInt8::create(size);
             auto col_nulls = ColumnUInt8::create(size);
@@ -261,28 +300,28 @@ public:
             //2. nullable column: null map all is not null
             if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && 
lhs_all_is_not_null)) {
                 // false and any = false, return lhs
-                *result_column_id = return_result_column_id(lhs_column, 
lhs_id);
+                *result_column_id = return_result_column_id(lhs_column, 
lhs_id, lhs_mem_can_reuse);
             } else {
                 RETURN_IF_ERROR(get_rhs_colum());
 
                 if ((lhs_all_true && !lhs_is_nullable) ||    //not null column
                     (lhs_all_true && lhs_all_is_not_null)) { //nullable column
                     // true and any = any, return rhs
-                    *result_column_id = return_result_column_id(rhs_column, 
rhs_id);
+                    *result_column_id =
+                            return_result_column_id(rhs_column, rhs_id, 
rhs_mem_can_reuse);
                 } else if ((rhs_all_false && !rhs_is_nullable) ||
                            (rhs_all_false && rhs_all_is_not_null)) {
                     // any and false = false, return rhs
-                    *result_column_id = return_result_column_id(rhs_column, 
rhs_id);
+                    *result_column_id =
+                            return_result_column_id(rhs_column, rhs_id, 
rhs_mem_can_reuse);
                 } else if ((rhs_all_true && !rhs_is_nullable) ||
                            (rhs_all_true && rhs_all_is_not_null)) {
                     // any and true = any, return lhs
-                    *result_column_id = return_result_column_id(lhs_column, 
lhs_id);
+                    *result_column_id =
+                            return_result_column_id(lhs_column, lhs_id, 
lhs_mem_can_reuse);
                 } else {
                     if (!result_is_nullable) {
-                        *result_column_id = lhs_id;
-                        for (size_t i = 0; i < size; i++) {
-                            lhs_data_column[i] &= rhs_data_column[i];
-                        }
+                        vector_vector.template operator()<true>();
                     } else {
                         vector_vector_null.template operator()<true>();
                     }
@@ -293,26 +332,26 @@ public:
             // false or NULL ----> NULL
             if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && 
lhs_all_is_not_null)) {
                 // true or any = true, return lhs
-                *result_column_id = return_result_column_id(lhs_column, 
lhs_id);
+                *result_column_id = return_result_column_id(lhs_column, 
lhs_id, lhs_mem_can_reuse);
             } else {
                 RETURN_IF_ERROR(get_rhs_colum());
                 if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && 
lhs_all_is_not_null)) {
                     // false or any = any, return rhs
-                    *result_column_id = return_result_column_id(rhs_column, 
rhs_id);
+                    *result_column_id =
+                            return_result_column_id(rhs_column, rhs_id, 
rhs_mem_can_reuse);
                 } else if ((rhs_all_true && !rhs_is_nullable) ||
                            (rhs_all_true && rhs_all_is_not_null)) {
                     // any or true = true, return rhs
-                    *result_column_id = return_result_column_id(rhs_column, 
rhs_id);
+                    *result_column_id =
+                            return_result_column_id(rhs_column, rhs_id, 
rhs_mem_can_reuse);
                 } else if ((rhs_all_false && !rhs_is_nullable) ||
                            (rhs_all_false && rhs_all_is_not_null)) {
                     // any or false = any, return lhs
-                    *result_column_id = return_result_column_id(lhs_column, 
lhs_id);
+                    *result_column_id =
+                            return_result_column_id(lhs_column, lhs_id, 
lhs_mem_can_reuse);
                 } else {
                     if (!result_is_nullable) {
-                        *result_column_id = lhs_id;
-                        for (size_t i = 0; i < size; i++) {
-                            lhs_data_column[i] |= rhs_data_column[i];
-                        }
+                        vector_vector.template operator()<false>();
                     } else {
                         vector_vector_null.template operator()<false>();
                     }
@@ -337,14 +376,9 @@ private:
         return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null 
^ b));
     }
 
-    bool _all_child_is_compound_and_not_const() const {
-        for (auto child : _children) {
-            // we can make sure non const compound predicate's return column 
is allow modifyied locally.
-            if (child->is_constant() || !child->is_compound_predicate()) {
-                return false;
-            }
-        }
-        return true;
+    bool _has_const_child() const {
+        return std::ranges::any_of(_children,
+                                   [](const VExprSPtr& arg) -> bool { return 
arg->is_constant(); });
     }
 
     std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to