This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new da806a9543f branch-4.0: [Improvement](join) lazy calculate 
all_match_one #58729 (#58901)
da806a9543f is described below

commit da806a9543f14c3457362789d5511230553d82fc
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Dec 11 18:51:21 2025 +0800

    branch-4.0: [Improvement](join) lazy calculate all_match_one #58729 (#58901)
    
    Cherry-picked from #58729
    
    Co-authored-by: Pxl <[email protected]>
---
 be/src/pipeline/exec/join/process_hash_table_probe.h    |  3 ++-
 .../pipeline/exec/join/process_hash_table_probe_impl.h  | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/be/src/pipeline/exec/join/process_hash_table_probe.h 
b/be/src/pipeline/exec/join/process_hash_table_probe.h
index 68cbf4e4819..b83ac8709e2 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe.h
@@ -111,7 +111,8 @@ struct ProcessHashTableProbe {
     const std::vector<bool>& _right_output_slot_flags;
     // nullable column but not has null except first row
     std::vector<bool> _build_column_has_null;
-    bool _need_calculate_build_index_has_zero = true;
+
+    bool _need_calculate_all_match_one = false;
 
     RuntimeProfile::Counter* _search_hashtable_timer = nullptr;
     RuntimeProfile::Counter* _init_probe_side_timer = nullptr;
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h 
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 6753052f61c..1253afabdbd 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -81,7 +81,17 @@ 
ProcessHashTableProbe<JoinOpType>::ProcessHashTableProbe(HashJoinProbeLocalState
           _probe_side_output_timer(parent->_probe_side_output_timer),
           _finish_probe_phase_timer(parent->_finish_probe_phase_timer),
           _right_col_idx(_parent_operator->_right_col_idx),
-          _right_col_len(_parent_operator->_right_table_data_types.size()) {}
+          _right_col_len(_parent_operator->_right_table_data_types.size()) {
+    constexpr int CALCULATE_ALL_MATCH_ONE_THRESHOLD = 2;
+    int probe_output_non_lazy_materialized_count = 0;
+    for (int i = 0; i < _left_output_slot_flags.size(); i++) {
+        if (_left_output_slot_flags[i] && 
!_parent_operator->is_lazy_materialized_column(i)) {
+            probe_output_non_lazy_materialized_count++;
+        }
+    }
+    _need_calculate_all_match_one =
+            probe_output_non_lazy_materialized_count >= 
CALCULATE_ALL_MATCH_ONE_THRESHOLD;
+}
 
 template <int JoinOpType>
 void 
ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::MutableColumns&
 mcol,
@@ -98,7 +108,6 @@ void 
ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::Mut
     }
 
     if (!build_index_has_zero && _build_column_has_null.empty()) {
-        _need_calculate_build_index_has_zero = false;
         _build_column_has_null.resize(_right_output_slot_flags.size());
         for (int i = 0; i < _right_col_len; i++) {
             const auto& column = *_build_block->safe_get_by_position(i).column;
@@ -107,7 +116,6 @@ void 
ProcessHashTableProbe<JoinOpType>::build_side_output_column(vectorized::Mut
                 const auto& nullable = assert_cast<const 
vectorized::ColumnNullable&>(column);
                 _build_column_has_null[i] = !simd::contain_byte(
                         nullable.get_null_map_data().data() + 1, 
nullable.size() - 1, 1);
-                _need_calculate_build_index_has_zero |= 
_build_column_has_null[i];
             }
         }
     }
@@ -147,7 +155,8 @@ template <int JoinOpType>
 void 
ProcessHashTableProbe<JoinOpType>::probe_side_output_column(vectorized::MutableColumns&
 mcol) {
     SCOPED_TIMER(_probe_side_output_timer);
     auto& probe_block = _parent->_probe_block;
-    bool all_match_one = check_all_match_one(_probe_indexs.get_data());
+    bool all_match_one =
+            _need_calculate_all_match_one ? 
check_all_match_one(_probe_indexs.get_data()) : false;
 
     for (int i = 0; i < _left_output_slot_flags.size(); ++i) {
         if (_left_output_slot_flags[i]) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to