This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1523b6a9beb [opt] Optimization for short circuit of CompoundPred 
(#45422)
1523b6a9beb is described below

commit 1523b6a9beb7a4686629d7873db441ce42c37783
Author: zhiqiang <hezhiqi...@selectdb.com>
AuthorDate: Mon Dec 16 01:26:34 2024 +0800

    [opt] Optimization for short circuit of CompoundPred (#45422)
    
    ### What problem does this PR solve?
    
    Related PR: https://github.com/apache/doris/pull/45414
    
    Problem Summary:
    
    https://github.com/apache/doris/pull/45414 optimized short circuit of
    CompoundPred. But for some cases, it will introduce perf recession since
    AND/OR process of CompoundPred is not as efficient as Function AND/OR.
    
    Previous
    ```sql
    mysqlslap -hd3 -uroot -P9130  --create-schema=test_db -c 10 -i 100 -q 
"SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k 
BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 
28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 
AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 
15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295;"
    Benchmark
        Average number of seconds to run all queries: 0.315 seconds
        Minimum number of seconds to run all queries: 0.307 seconds
        Maximum number of seconds to run all queries: 0.328 seconds
        Number of clients running queries: 10
        Average number of queries per client: 1
    ```
    Now
    ```sql
    mysqlslap -hd3 -uroot -P9130  --create-schema=test_db -c 10 -i 100 -q 
"SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k 
BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 
28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 
AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 
15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295;"
    Benchmark
            Average number of seconds to run all queries: 0.222 seconds
            Minimum number of seconds to run all queries: 0.215 seconds
            Maximum number of seconds to run all queries: 0.263 seconds
            Number of clients running queries: 10
            Average number of queries per client: 1
    ```
    BTW, the performance recession only occurs when compound predicate can
    be short circuited in most cases.
    Data distribution
    ```sql
    mysql> desc sbtest1;
    +-------+--------------+------+-------+---------+-------+
    | Field | Type         | Null | Key   | Default | Extra |
    +-------+--------------+------+-------+---------+-------+
    | id    | int          | No   | true  | NULL    |       |
    | k     | int          | Yes  | false | NULL    | NONE  |
    | c     | varchar(360) | Yes  | false | NULL    | NONE  |
    | pad   | varchar(180) | Yes  | false | NULL    | NONE  |
    +-------+--------------+------+-------+---------+-------+
    4 rows in set (0.00 sec)
    
    mysql> select count(*) from sbtest1;
    +----------+
    | count(*) |
    +----------+
    |  5069205 |
    +----------+
    1 row in set (0.02 sec)
    
    mysql> SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183;
    +----------+
    | count(k) |
    +----------+
    |  4726682 |
    +----------+
    1 row in set (0.02 sec)
    
    mysql> SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 
OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k 
BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 
17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 
AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 
8454295;
    +----------+
    | count(k) |
    +----------+
    |  5066731 |
    +----------+
    1 row in set (0.06 sec)
    ```
    
    TODO: Why code is not vectorized.
---
 be/src/vec/exprs/vcompound_pred.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/exprs/vcompound_pred.h 
b/be/src/vec/exprs/vcompound_pred.h
index 8c65e6c8adb..88f3e474b58 100644
--- a/be/src/vec/exprs/vcompound_pred.h
+++ b/be/src/vec/exprs/vcompound_pred.h
@@ -234,24 +234,27 @@ public:
         auto vector_vector_null = [&]<bool is_and_op>() {
             auto col_res = ColumnUInt8::create(size);
             auto col_nulls = ColumnUInt8::create(size);
+
             auto* __restrict res_datas = 
assert_cast<ColumnUInt8*>(col_res)->get_data().data();
             auto* __restrict res_nulls = 
assert_cast<ColumnUInt8*>(col_nulls)->get_data().data();
             ColumnPtr temp_null_map = nullptr;
             // maybe both children are nullable / or one of children is 
nullable
-            lhs_null_map = create_null_map_column(temp_null_map, lhs_null_map);
-            rhs_null_map = create_null_map_column(temp_null_map, rhs_null_map);
+            auto* __restrict lhs_null_map_tmp = 
create_null_map_column(temp_null_map, lhs_null_map);
+            auto* __restrict rhs_null_map_tmp = 
create_null_map_column(temp_null_map, rhs_null_map);
+            auto* __restrict lhs_data_column_tmp = lhs_data_column;
+            auto* __restrict rhs_data_column_tmp = rhs_data_column;
 
             if constexpr (is_and_op) {
                 for (size_t i = 0; i < size; ++i) {
-                    res_nulls[i] = apply_and_null(lhs_data_column[i], 
lhs_null_map[i],
-                                                  rhs_data_column[i], 
rhs_null_map[i]);
-                    res_datas[i] = lhs_data_column[i] & rhs_data_column[i];
+                    res_nulls[i] = apply_and_null(lhs_data_column_tmp[i], 
lhs_null_map_tmp[i],
+                                                  rhs_data_column_tmp[i], 
rhs_null_map_tmp[i]);
+                    res_datas[i] = lhs_data_column_tmp[i] & 
rhs_data_column_tmp[i];
                 }
             } else {
                 for (size_t i = 0; i < size; ++i) {
-                    res_nulls[i] = apply_or_null(lhs_data_column[i], 
lhs_null_map[i],
-                                                 rhs_data_column[i], 
rhs_null_map[i]);
-                    res_datas[i] = lhs_data_column[i] | rhs_data_column[i];
+                    res_nulls[i] = apply_or_null(lhs_data_column_tmp[i], 
lhs_null_map_tmp[i],
+                                                 rhs_data_column_tmp[i], 
rhs_null_map_tmp[i]);
+                    res_datas[i] = lhs_data_column_tmp[i] | 
rhs_data_column_tmp[i];
                 }
             }
             auto result_column = ColumnNullable::create(std::move(col_res), 
std::move(col_nulls));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to