924060929 commented on a change in pull request #7547:
URL: https://github.com/apache/incubator-doris/pull/7547#discussion_r779321380



##########
File path: be/src/exprs/runtime_filter_slots.h
##########
@@ -62,16 +69,42 @@ class RuntimeFilterSlotsBase {
             DCHECK(runtime_filter->expr_order() < _probe_expr_context.size());
 
             // do not create 'in filter' when hash_table size over limit
-            bool over_max_in_num = (hash_table_size >= 
state->runtime_filter_max_in_num());
+            auto max_in_num = state->runtime_filter_max_in_num();
+            bool over_max_in_num = (hash_table_size >= max_in_num);
 
             bool is_in_filter = (runtime_filter->type() == 
RuntimeFilterType::IN_FILTER);
 
-            // do not create 'bloom filter' and 'minmax filter' when 'in 
filter' has created
-            bool pass_not_in = (has_in_filter[runtime_filter->expr_order()] &&
-                                !runtime_filter->has_remote_target());
-
-            if (over_max_in_num == is_in_filter && (is_in_filter || 
pass_not_in)) {
-                ignore_filter(filter_desc.filter_id);
+            // Note:
+            // In the case that exist *remote target* and in filter and other 
filter,
+            // we must merge other filter whatever in filter is over the max 
num in current node,
+            // because:
+            // case 1: (in filter >= max num) in current node, so in filter 
will be ignored,
+            //         and then other filter can be used
+            // case 2: (in filter < max num) in current node, we don't know 
whether the in filter
+            //         will be ignored in merge node, so we must transfer 
other filter to merge node
+            if (!runtime_filter->has_remote_target()) {
+                bool exists_in_filter = 
has_in_filter[runtime_filter->expr_order()];
+                if (is_in_filter && over_max_in_num) {
+                    LOG(INFO) << "fragment instance " << 
print_id(state->fragment_instance_id())
+                              << " ignore runtime filter(in filter id " << 
filter_desc.filter_id
+                              << ") because: in_num(" << hash_table_size
+                              << ") >= max_in_num(" << max_in_num << ")";
+                    ignore_local_filter(filter_desc.filter_id);
+                    continue;
+                } else if (!is_in_filter && exists_in_filter) {

Review comment:
       So the else if condition `!is_in_filter && exists_in_filter` means: 
current filter is not in filter, and in filter isn't be ignored, we can ignore 
non-in filter

##########
File path: be/src/exprs/runtime_filter_slots.h
##########
@@ -62,16 +69,42 @@ class RuntimeFilterSlotsBase {
             DCHECK(runtime_filter->expr_order() < _probe_expr_context.size());
 
             // do not create 'in filter' when hash_table size over limit
-            bool over_max_in_num = (hash_table_size >= 
state->runtime_filter_max_in_num());
+            auto max_in_num = state->runtime_filter_max_in_num();
+            bool over_max_in_num = (hash_table_size >= max_in_num);
 
             bool is_in_filter = (runtime_filter->type() == 
RuntimeFilterType::IN_FILTER);
 
-            // do not create 'bloom filter' and 'minmax filter' when 'in 
filter' has created
-            bool pass_not_in = (has_in_filter[runtime_filter->expr_order()] &&
-                                !runtime_filter->has_remote_target());
-
-            if (over_max_in_num == is_in_filter && (is_in_filter || 
pass_not_in)) {
-                ignore_filter(filter_desc.filter_id);
+            // Note:
+            // In the case that exist *remote target* and in filter and other 
filter,
+            // we must merge other filter whatever in filter is over the max 
num in current node,
+            // because:
+            // case 1: (in filter >= max num) in current node, so in filter 
will be ignored,
+            //         and then other filter can be used
+            // case 2: (in filter < max num) in current node, we don't know 
whether the in filter
+            //         will be ignored in merge node, so we must transfer 
other filter to merge node
+            if (!runtime_filter->has_remote_target()) {
+                bool exists_in_filter = 
has_in_filter[runtime_filter->expr_order()];
+                if (is_in_filter && over_max_in_num) {
+                    LOG(INFO) << "fragment instance " << 
print_id(state->fragment_instance_id())
+                              << " ignore runtime filter(in filter id " << 
filter_desc.filter_id
+                              << ") because: in_num(" << hash_table_size
+                              << ") >= max_in_num(" << max_in_num << ")";
+                    ignore_local_filter(filter_desc.filter_id);
+                    continue;
+                } else if (!is_in_filter && exists_in_filter) {

Review comment:
       This code in the loop of filters.
   In the first loop, the in filter can be judge over_max_in_num. if in num < 
over_max_in_num, and than set `has_in_filter[xxx] = true`.
   In the second loop, other filter (e.g. bloom filter) will check 
has_in_filter, if has_in_filter[xxx] is true, the in num of in filter must < 
over_max_in_num




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to