This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 1e253cd04a4 [Improvement](runtime-filter) send 
RUNTIME_BLOOM_FILTER_MAX_SIZE to b… #38972 (#42321)
1e253cd04a4 is described below

commit 1e253cd04a4d091ef5bbb926d1292ff44ea2fc07
Author: Pxl <x...@selectdb.com>
AuthorDate: Thu Nov 21 14:46:36 2024 +0800

    [Improvement](runtime-filter) send RUNTIME_BLOOM_FILTER_MAX_SIZE to b… 
#38972 (#42321)
    
    …ackends (#38972)
    
    send RUNTIME_BLOOM_FILTER_MAX_SIZE to backends
    
    ## Proposed changes
    pick from #38972
---
 be/src/exprs/bloom_filter_func.h                   | 31 +++++++++++++---------
 be/src/exprs/runtime_filter.cpp                    |  3 +++
 be/src/exprs/runtime_filter.h                      |  1 +
 .../java/org/apache/doris/qe/SessionVariable.java  |  1 +
 4 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index ae206615bd7..ad43b78cc6a 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -100,14 +100,14 @@ public:
     virtual ~BloomFilterFuncBase() = default;
 
     void init_params(const RuntimeFilterParams* params) {
-        _bloom_filter_length =
-                params->runtime_bloom_filter_min_size > 0
-                        ? std::max(params->bloom_filter_size, 
params->runtime_bloom_filter_min_size)
-                        : params->bloom_filter_size;
+        _bloom_filter_length = params->bloom_filter_size;
+
         _build_bf_exactly = params->build_bf_exactly;
         _runtime_bloom_filter_min_size = params->runtime_bloom_filter_min_size;
+        _runtime_bloom_filter_max_size = params->runtime_bloom_filter_max_size;
         _null_aware = params->null_aware;
         _bloom_filter_size_calculated_by_ndv = 
params->bloom_filter_size_calculated_by_ndv;
+        _limit_length();
     }
 
     Status init_with_fixed_length() { return 
init_with_fixed_length(_bloom_filter_length); }
@@ -128,17 +128,11 @@ public:
             // if FE do use ndv stat to predict the bf size, BE only use the 
row count. FE have more
             // exactly row count stat. which one is min is more correctly.
             if (_bloom_filter_size_calculated_by_ndv) {
-                _bloom_filter_length =
-                        _runtime_bloom_filter_min_size > 0
-                                ? std::max(_runtime_bloom_filter_min_size,
-                                           std::min(be_calculate_size, 
_bloom_filter_length))
-                                : std::min(be_calculate_size, 
_bloom_filter_length);
+                _bloom_filter_length = std::min(be_calculate_size, 
_bloom_filter_length);
             } else {
-                _bloom_filter_length =
-                        _runtime_bloom_filter_min_size > 0
-                                ? std::max(_runtime_bloom_filter_min_size, 
be_calculate_size)
-                                : be_calculate_size;
+                _bloom_filter_length = be_calculate_size;
             }
+            _limit_length();
         }
         return init_with_fixed_length(_bloom_filter_length);
     }
@@ -229,6 +223,16 @@ public:
                                                 uint16_t* offsets, int number,
                                                 bool is_parse_column) = 0;
 
+private:
+    void _limit_length() {
+        if (_runtime_bloom_filter_min_size > 0) {
+            _bloom_filter_length = std::max(_bloom_filter_length, 
_runtime_bloom_filter_min_size);
+        }
+        if (_runtime_bloom_filter_max_size > 0) {
+            _bloom_filter_length = std::min(_bloom_filter_length, 
_runtime_bloom_filter_max_size);
+        }
+    }
+
 protected:
     // bloom filter size
     int32_t _bloom_filter_alloced;
@@ -236,6 +240,7 @@ protected:
     bool _inited = false;
     int64_t _bloom_filter_length;
     int64_t _runtime_bloom_filter_min_size;
+    int64_t _runtime_bloom_filter_max_size;
     bool _build_bf_exactly = false;
     bool _bloom_filter_size_calculated_by_ndv = false;
 };
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index b7af2561fe0..f8f6b001982 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -1337,6 +1337,9 @@ Status IRuntimeFilter::init_with_desc(const 
TRuntimeFilterDesc* desc, const TQue
     params.runtime_bloom_filter_min_size = 
options->__isset.runtime_bloom_filter_min_size
                                                    ? 
options->runtime_bloom_filter_min_size
                                                    : 0;
+    params.runtime_bloom_filter_max_size = 
options->__isset.runtime_bloom_filter_max_size
+                                                   ? 
options->runtime_bloom_filter_max_size
+                                                   : 0;
     // We build runtime filter by exact distinct count iff three conditions 
are met:
     // 1. Only 1 join key
     // 2. Do not have remote target (e.g. do not need to merge), or broadcast 
join
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index 6632c5dc872..584d3d4e535 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -128,6 +128,7 @@ struct RuntimeFilterParams {
     int64_t bloom_filter_size;
     int32_t max_in_num;
     int64_t runtime_bloom_filter_min_size;
+    int64_t runtime_bloom_filter_max_size;
     int32_t filter_id;
     bool bitmap_filter_not_in;
     bool build_bf_exactly;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index c6b7757e2c8..8b1442b7464 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -3842,6 +3842,7 @@ public class SessionVariable implements Serializable, 
Writable {
         tResult.setRuntimeFilterWaitTimeMs(runtimeFilterWaitTimeMs);
         tResult.setRuntimeFilterMaxInNum(runtimeFilterMaxInNum);
         tResult.setRuntimeBloomFilterMinSize(runtimeBloomFilterMinSize);
+        tResult.setRuntimeBloomFilterMaxSize(runtimeBloomFilterMaxSize);
         tResult.setRuntimeFilterWaitInfinitely(runtimeFilterWaitInfinitely);
 
         if (cpuResourceLimit > 0) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to