This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1523b6a9beb [opt] Optimization for short circuit of CompoundPred (#45422) 1523b6a9beb is described below commit 1523b6a9beb7a4686629d7873db441ce42c37783 Author: zhiqiang <hezhiqi...@selectdb.com> AuthorDate: Mon Dec 16 01:26:34 2024 +0800 [opt] Optimization for short circuit of CompoundPred (#45422) ### What problem does this PR solve? Related PR: https://github.com/apache/doris/pull/45414 Problem Summary: https://github.com/apache/doris/pull/45414 optimized short circuit of CompoundPred. But for some cases, it will introduce perf recession since AND/OR process of CompoundPred is not as efficient as Function AND/OR. Previous ```sql mysqlslap -hd3 -uroot -P9130 --create-schema=test_db -c 10 -i 100 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295;" Benchmark Average number of seconds to run all queries: 0.315 seconds Minimum number of seconds to run all queries: 0.307 seconds Maximum number of seconds to run all queries: 0.328 seconds Number of clients running queries: 10 Average number of queries per client: 1 ``` Now ```sql mysqlslap -hd3 -uroot -P9130 --create-schema=test_db -c 10 -i 100 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295;" Benchmark Average number of seconds to run all queries: 0.222 seconds Minimum number of seconds to run all queries: 0.215 seconds Maximum number of seconds to run all queries: 0.263 seconds Number of clients running queries: 10 Average number of queries per client: 1 ``` BTW, the performance recession only occurs when compound predicate can be short circuited in most cases. Data distribution ```sql mysql> desc sbtest1; +-------+--------------+------+-------+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-------+--------------+------+-------+---------+-------+ | id | int | No | true | NULL | | | k | int | Yes | false | NULL | NONE | | c | varchar(360) | Yes | false | NULL | NONE | | pad | varchar(180) | Yes | false | NULL | NONE | +-------+--------------+------+-------+---------+-------+ 4 rows in set (0.00 sec) mysql> select count(*) from sbtest1; +----------+ | count(*) | +----------+ | 5069205 | +----------+ 1 row in set (0.02 sec) mysql> SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183; +----------+ | count(k) | +----------+ | 4726682 | +----------+ 1 row in set (0.02 sec) mysql> SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295; +----------+ | count(k) | +----------+ | 5066731 | +----------+ 1 row in set (0.06 sec) ``` TODO: Why code is not vectorized. --- be/src/vec/exprs/vcompound_pred.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 8c65e6c8adb..88f3e474b58 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -234,24 +234,27 @@ public: auto vector_vector_null = [&]<bool is_and_op>() { auto col_res = ColumnUInt8::create(size); auto col_nulls = ColumnUInt8::create(size); + auto* __restrict res_datas = assert_cast<ColumnUInt8*>(col_res)->get_data().data(); auto* __restrict res_nulls = assert_cast<ColumnUInt8*>(col_nulls)->get_data().data(); ColumnPtr temp_null_map = nullptr; // maybe both children are nullable / or one of children is nullable - lhs_null_map = create_null_map_column(temp_null_map, lhs_null_map); - rhs_null_map = create_null_map_column(temp_null_map, rhs_null_map); + auto* __restrict lhs_null_map_tmp = create_null_map_column(temp_null_map, lhs_null_map); + auto* __restrict rhs_null_map_tmp = create_null_map_column(temp_null_map, rhs_null_map); + auto* __restrict lhs_data_column_tmp = lhs_data_column; + auto* __restrict rhs_data_column_tmp = rhs_data_column; if constexpr (is_and_op) { for (size_t i = 0; i < size; ++i) { - res_nulls[i] = apply_and_null(lhs_data_column[i], lhs_null_map[i], - rhs_data_column[i], rhs_null_map[i]); - res_datas[i] = lhs_data_column[i] & rhs_data_column[i]; + res_nulls[i] = apply_and_null(lhs_data_column_tmp[i], lhs_null_map_tmp[i], + rhs_data_column_tmp[i], rhs_null_map_tmp[i]); + res_datas[i] = lhs_data_column_tmp[i] & rhs_data_column_tmp[i]; } } else { for (size_t i = 0; i < size; ++i) { - res_nulls[i] = apply_or_null(lhs_data_column[i], lhs_null_map[i], - rhs_data_column[i], rhs_null_map[i]); - res_datas[i] = lhs_data_column[i] | rhs_data_column[i]; + res_nulls[i] = apply_or_null(lhs_data_column_tmp[i], lhs_null_map_tmp[i], + rhs_data_column_tmp[i], rhs_null_map_tmp[i]); + res_datas[i] = lhs_data_column_tmp[i] | rhs_data_column_tmp[i]; } } auto result_column = ColumnNullable::create(std::move(col_res), std::move(col_nulls)); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org