This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new da806a9543f branch-4.0: [Improvement](join) lazy calculate
all_match_one #58729 (#58901)
da806a9543f is described below
commit da806a9543f14c3457362789d5511230553d82fc
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Dec 11 18:51:21 2025 +0800
branch-4.0: [Improvement](join) lazy calculate all_match_one #58729 (#58901)
Cherry-picked from #58729
Co-authored-by: Pxl <[email protected]>
---
be/src/pipeline/exec/join/process_hash_table_probe.h | 3 ++-
.../pipeline/exec/join/process_hash_table_probe_impl.h | 17 +++++++++++++----
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe.h
b/be/src/pipeline/exec/join/process_hash_table_probe.h
index 68cbf4e4819..b83ac8709e2 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe.h
@@ -111,7 +111,8 @@ struct ProcessHashTableProbe {
const std::vector<bool>& _right_output_slot_flags;
// nullable column but not has null except first row
std::vector<bool> _build_column_has_null;
- bool _need_calculate_build_index_has_zero = true;
+
+ bool _need_calculate_all_match_one = false;
RuntimeProfile::Counter* _search_hashtable_timer = nullptr;
RuntimeProfile::Counter* _init_probe_side_timer = nullptr;
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 6753052f61c..1253afabdbd 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -81,7 +81,17 @@
ProcessHashTableProbe<JoinOpType>::ProcessHashTableProbe(HashJoinProbeLocalState
_probe_side_output_timer(parent->_probe_side_output_timer),
_finish_probe_phase_timer(parent->_finish_probe_phase_timer),
_right_col_idx(_parent_operator->_right_col_idx),
- _right_col_len(_parent_operator->_right_table_data_types.size()) {}
+ _right_col_len(_parent_operator->_right_table_data_types.size()) {
+ constexpr int CALCULATE_ALL_MATCH_ONE_THRESHOLD = 2;
+ int probe_output_non_lazy_materialized_count = 0;
+ for (int i = 0; i < _left_output_slot_flags.size(); i++) {
+ if (_left_output_slot_flags[i] &&
!_parent_operator->is_lazy_materialized_column(i)) {
+ probe_output_non_lazy_materialized_count++;
+ }
+ }
+ _need_calculate_all_match_one =
+ probe_output_non_lazy_materialized_count >=
CALCULATE_ALL_MATCH_ONE_THRESHOLD;
+}
template <int JoinOpType>
void
ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::MutableColumns&
mcol,
@@ -98,7 +108,6 @@ void
ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::Mut
}
if (!build_index_has_zero && _build_column_has_null.empty()) {
- _need_calculate_build_index_has_zero = false;
_build_column_has_null.resize(_right_output_slot_flags.size());
for (int i = 0; i < _right_col_len; i++) {
const auto& column = *_build_block->safe_get_by_position(i).column;
@@ -107,7 +116,6 @@ void
ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::Mut
const auto& nullable = assert_cast<const
vectorized::ColumnNullable&>(column);
_build_column_has_null[i] = !simd::contain_byte(
nullable.get_null_map_data().data() + 1,
nullable.size() - 1, 1);
- _need_calculate_build_index_has_zero |=
_build_column_has_null[i];
}
}
}
@@ -147,7 +155,8 @@ template <int JoinOpType>
void
ProcessHashTableProbe<JoinOpType>::probe_side_output_column(vectorized::MutableColumns&
mcol) {
SCOPED_TIMER(_probe_side_output_timer);
auto& probe_block = _parent->_probe_block;
- bool all_match_one = check_all_match_one(_probe_indexs.get_data());
+ bool all_match_one =
+ _need_calculate_all_match_one ?
check_all_match_one(_probe_indexs.get_data()) : false;
for (int i = 0; i < _left_output_slot_flags.size(); ++i) {
if (_left_output_slot_flags[i]) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]