This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 9c38061d559 [Exec](expr) Opt the compound pred performace (#45414)
(#46254)
9c38061d559 is described below
commit 9c38061d5590d54eb2c852c2b59fdabbea83f9d1
Author: HappenLee <[email protected]>
AuthorDate: Thu Jan 2 12:30:11 2025 +0800
[Exec](expr) Opt the compound pred performace (#45414) (#46254)
before:
```
mysqlslap -hd3 -uroot -P9130 --create-schema=test_db2 -c 10 -i 500 -q
"SELECT count(k) FROM sbtest1_dup WHERE k BETWEEN 4850578 AND 8454295 OR k
BETWEEN 8776291 AND 29749077;"
Benchmark
Average number of seconds to run all queries: 0.041 seconds
Minimum number of seconds to run all queries: 0.037 seconds
Maximum number of seconds to run all queries: 0.115 seconds
Number of clients running queries: 10
Average number of queries per client: 1
```
after:
```
mysqlslap -hd3 -uroot -P9030 --create-schema=test_db -c 10 -i 500 -q
"SELECT count(k) FROM sbtest1 WHERE k BETWEEN 4850578 AND 8454295 OR k BETWEEN
8776291 AND 29749077;"
Benchmark
Average number of seconds to run all queries: 0.029 seconds
Minimum number of seconds to run all queries: 0.027 seconds
Maximum number of seconds to run all queries: 0.034 seconds
Number of clients running queries: 10
Average number of queries per client: 1
```
---
be/src/vec/exprs/vcompound_pred.h | 86 +++++++++++++++++++++++++++------------
1 file changed, 60 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/exprs/vcompound_pred.h
b/be/src/vec/exprs/vcompound_pred.h
index 86aa9ee07d7..f008d1e413a 100644
--- a/be/src/vec/exprs/vcompound_pred.h
+++ b/be/src/vec/exprs/vcompound_pred.h
@@ -153,12 +153,15 @@ public:
if (fast_execute(context, block, result_column_id)) {
return Status::OK();
}
- if (children().size() == 1 || !_all_child_is_compound_and_not_const())
{
+ if (get_num_children() == 1 || _has_const_child()) {
return VectorizedFnCall::execute(context, block, result_column_id);
}
int lhs_id = -1;
int rhs_id = -1;
+ bool lhs_mem_can_reuse = _children[0]->is_compound_predicate();
+ bool rhs_mem_can_reuse = _children[1]->is_compound_predicate();
+
RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
ColumnPtr lhs_column =
block->get_by_position(lhs_id).column->convert_to_full_column_if_const();
@@ -205,13 +208,22 @@ public:
return Status::OK();
};
- auto return_result_column_id = [&](ColumnPtr res_column, int res_id)
-> int {
+ auto return_result_column_id = [&](ColumnPtr res_column, int res_id,
+ bool mem_reuse) -> int {
+ if (!mem_reuse) {
+ res_column = res_column->clone_resized(size);
+ }
+
if (result_is_nullable && !res_column->is_nullable()) {
auto result_column =
ColumnNullable::create(res_column,
ColumnUInt8::create(size, 0));
res_id = block->columns();
block->insert({std::move(result_column), _data_type,
_expr_name});
+ } else if (!mem_reuse) {
+ res_id = block->columns();
+ block->insert({std::move(res_column), _data_type, _expr_name});
}
+
return res_id;
};
@@ -226,6 +238,33 @@ public:
return null_map_data;
};
+ auto vector_vector = [&]<bool is_and_op>() {
+ if (lhs_mem_can_reuse) {
+ *result_column_id = lhs_id;
+ } else if (rhs_mem_can_reuse) {
+ *result_column_id = rhs_id;
+
+ auto tmp_column = rhs_data_column;
+ rhs_data_column = lhs_data_column;
+ lhs_data_column = tmp_column;
+ } else {
+ *result_column_id = block->columns();
+
+ auto col_res = lhs_column->clone_resized(size);
+ lhs_data_column =
assert_cast<ColumnUInt8*>(col_res.get())->get_data().data();
+ block->insert({std::move(col_res), _data_type, _expr_name});
+ }
+
+ if constexpr (is_and_op) {
+ for (size_t i = 0; i < size; ++i) {
+ lhs_data_column[i] &= rhs_data_column[i];
+ }
+ } else {
+ for (size_t i = 0; i < size; ++i) {
+ lhs_data_column[i] |= rhs_data_column[i];
+ }
+ }
+ };
auto vector_vector_null = [&]<bool is_and_op>() {
auto col_res = ColumnUInt8::create(size);
auto col_nulls = ColumnUInt8::create(size);
@@ -264,28 +303,28 @@ public:
//2. nullable column: null map all is not null
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
// false and any = false, return lhs
- *result_column_id = return_result_column_id(lhs_column,
lhs_id);
+ *result_column_id = return_result_column_id(lhs_column,
lhs_id, lhs_mem_can_reuse);
} else {
RETURN_IF_ERROR(get_rhs_colum());
if ((lhs_all_true && !lhs_is_nullable) || //not null column
(lhs_all_true && lhs_all_is_not_null)) { //nullable column
// true and any = any, return rhs
- *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ *result_column_id =
+ return_result_column_id(rhs_column, rhs_id,
rhs_mem_can_reuse);
} else if ((rhs_all_false && !rhs_is_nullable) ||
(rhs_all_false && rhs_all_is_not_null)) {
// any and false = false, return rhs
- *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ *result_column_id =
+ return_result_column_id(rhs_column, rhs_id,
rhs_mem_can_reuse);
} else if ((rhs_all_true && !rhs_is_nullable) ||
(rhs_all_true && rhs_all_is_not_null)) {
// any and true = any, return lhs
- *result_column_id = return_result_column_id(lhs_column,
lhs_id);
+ *result_column_id =
+ return_result_column_id(lhs_column, lhs_id,
lhs_mem_can_reuse);
} else {
if (!result_is_nullable) {
- *result_column_id = lhs_id;
- for (size_t i = 0; i < size; i++) {
- lhs_data_column[i] &= rhs_data_column[i];
- }
+ vector_vector.template operator()<true>();
} else {
vector_vector_null.template operator()<true>();
}
@@ -296,26 +335,26 @@ public:
// false or NULL ----> NULL
if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true &&
lhs_all_is_not_null)) {
// true or any = true, return lhs
- *result_column_id = return_result_column_id(lhs_column,
lhs_id);
+ *result_column_id = return_result_column_id(lhs_column,
lhs_id, lhs_mem_can_reuse);
} else {
RETURN_IF_ERROR(get_rhs_colum());
if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
// false or any = any, return rhs
- *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ *result_column_id =
+ return_result_column_id(rhs_column, rhs_id,
rhs_mem_can_reuse);
} else if ((rhs_all_true && !rhs_is_nullable) ||
(rhs_all_true && rhs_all_is_not_null)) {
// any or true = true, return rhs
- *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ *result_column_id =
+ return_result_column_id(rhs_column, rhs_id,
rhs_mem_can_reuse);
} else if ((rhs_all_false && !rhs_is_nullable) ||
(rhs_all_false && rhs_all_is_not_null)) {
// any or false = any, return lhs
- *result_column_id = return_result_column_id(lhs_column,
lhs_id);
+ *result_column_id =
+ return_result_column_id(lhs_column, lhs_id,
lhs_mem_can_reuse);
} else {
if (!result_is_nullable) {
- *result_column_id = lhs_id;
- for (size_t i = 0; i < size; i++) {
- lhs_data_column[i] |= rhs_data_column[i];
- }
+ vector_vector.template operator()<false>();
} else {
vector_vector_null.template operator()<false>();
}
@@ -340,14 +379,9 @@ private:
return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null
^ b));
}
- bool _all_child_is_compound_and_not_const() const {
- for (auto child : _children) {
- // we can make sure non const compound predicate's return column
is allow modifyied locally.
- if (child->is_constant() || !child->is_compound_predicate()) {
- return false;
- }
- }
- return true;
+ bool _has_const_child() const {
+ return std::ranges::any_of(_children,
+ [](const VExprSPtr& arg) -> bool { return
arg->is_constant(); });
}
std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]