This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch tpcds
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/tpcds by this push:
     new 4305c6f9d92 support rf change bf size by build exactly (#30324)
4305c6f9d92 is described below

commit 4305c6f9d923049c0b4da6e2c08a1d6ecf971c4a
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Wed Jan 24 18:04:21 2024 +0800

    support rf change bf size by build exactly (#30324)
---
 be/src/exprs/bloom_filter_func.h             | 12 +++---------
 be/src/exprs/runtime_filter.cpp              |  5 ++++-
 be/src/pipeline/exec/hashjoin_build_sink.cpp |  7 -------
 be/src/vec/exec/join/vhash_join_node.cpp     |  5 -----
 4 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index 71dc3f6e663..fa3a8e49d1a 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -92,12 +92,7 @@ public:
 
     void set_build_bf_exactly(bool build_bf_exactly) { _build_bf_exactly = 
build_bf_exactly; }
 
-    Status init_with_fixed_length() {
-        if (_build_bf_exactly) {
-            return Status::OK();
-        }
-        return init_with_fixed_length(_bloom_filter_length);
-    }
+    Status init_with_fixed_length() { return 
init_with_fixed_length(_bloom_filter_length); }
 
     Status init_with_cardinality(const size_t build_bf_cardinality) {
         if (_build_bf_exactly) {
@@ -109,10 +104,9 @@ public:
 
             // Handle case where ndv == 1 => ceil(log2(m/8)) < 0.
             int log_filter_size = std::max(0, (int)(std::ceil(std::log(m / 8) 
/ std::log(2))));
-            _bloom_filter_length = std::min(((int64_t)1) << log_filter_size, 
_bloom_filter_length);
-            return init_with_fixed_length(_bloom_filter_length);
+            _bloom_filter_length = (((int64_t)1) << log_filter_size);
         }
-        return Status::OK();
+        return init_with_fixed_length(_bloom_filter_length);
     }
 
     Status init_with_fixed_length(int64_t bloom_filter_length) {
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index 3215b842afb..9b15f3dce2b 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -1198,7 +1198,10 @@ Status IRuntimeFilter::init_with_desc(const 
TRuntimeFilterDesc* desc, const TQue
     // 1. Only 1 join key
     // 2. Do not have remote target (e.g. do not need to merge)
     // 3. Bloom filter
-    params.build_bf_exactly = build_bf_exactly && !_has_remote_target &&
+    // 4. FE do not use ndv stat to predict the bf size, only the row count. 
BE have more
+    // exactly row count stat
+    params.build_bf_exactly = build_bf_exactly && 
!desc->bloom_filter_size_calculated_by_ndv &&
+                              (!_has_remote_target || _is_broadcast_join) &&
                               (_runtime_filter_type == 
RuntimeFilterType::BLOOM_FILTER ||
                                _runtime_filter_type == 
RuntimeFilterType::IN_OR_BLOOM_FILTER);
     if (desc->__isset.bloom_filter_size_bytes) {
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp 
b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index f02e203c783..b9e63e63449 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -116,13 +116,6 @@ Status HashJoinBuildSinkLocalState::open(RuntimeState* 
state) {
     SCOPED_TIMER(exec_time_counter());
     SCOPED_TIMER(_open_timer);
     RETURN_IF_ERROR(JoinBuildSinkLocalState::open(state));
-    auto& p = _parent->cast<HashJoinBuildSinkOperatorX>();
-
-    for (size_t i = 0; i < p._runtime_filter_descs.size(); i++) {
-        if (auto* bf = _runtime_filters[i]->get_bloomfilter()) {
-            RETURN_IF_ERROR(bf->init_with_fixed_length());
-        }
-    }
     return Status::OK();
 }
 
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 94cb5be876f..1f1448ad19d 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -661,11 +661,6 @@ Status HashJoinNode::alloc_resource(doris::RuntimeState* 
state) {
     SCOPED_TIMER(_exec_timer);
     SCOPED_TIMER(_allocate_resource_timer);
     RETURN_IF_ERROR(VJoinNodeBase::alloc_resource(state));
-    for (size_t i = 0; i < _runtime_filter_descs.size(); i++) {
-        if (auto* bf = _runtime_filters[i]->get_bloomfilter()) {
-            RETURN_IF_ERROR(bf->init_with_fixed_length());
-        }
-    }
     RETURN_IF_ERROR(VExpr::open(_build_expr_ctxs, state));
     RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state));
     for (auto& conjunct : _other_join_conjuncts) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to