This is an automated email from the ASF dual-hosted git repository.

luozenglin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 00727e8c11 [fix](in-bitmap) fix result may be wrong if the left side 
of the in bitmap predicate is a constant (#17570)
00727e8c11 is described below

commit 00727e8c113575252f9b5b1f6503966cb8c334c0
Author: luozenglin <37725793+luozeng...@users.noreply.github.com>
AuthorDate: Thu Mar 9 10:59:05 2023 +0800

    [fix](in-bitmap) fix result may be wrong if the left side of the in bitmap 
predicate is a constant (#17570)
---
 be/src/vec/exec/join/vnested_loop_join_node.cpp    |  8 ++++-
 .../org/apache/doris/analysis/StmtRewriter.java    |  3 ++
 .../java/org/apache/doris/analysis/TableRef.java   |  9 +++++
 .../apache/doris/planner/NestedLoopJoinNode.java   | 14 +++++++-
 .../apache/doris/planner/SingleNodePlanner.java    |  1 +
 .../data/query_p0/join/test_bitmap_filter.out      | 40 ++++++++++++++++++++++
 .../suites/query_p0/join/test_bitmap_filter.groovy |  8 +++++
 7 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp 
b/be/src/vec/exec/join/vnested_loop_join_node.cpp
index 2555cea209..3a26ef165c 100644
--- a/be/src/vec/exec/join/vnested_loop_join_node.cpp
+++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp
@@ -96,7 +96,6 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, 
RuntimeState* state) {
     }
     RETURN_IF_ERROR(
             vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs, 
&_filter_src_expr_ctxs));
-    DCHECK(!filter_src_exprs.empty() == _is_output_left_side_only);
     return Status::OK();
 }
 
@@ -189,6 +188,13 @@ Status VNestedLoopJoinNode::sink(doris::RuntimeState* 
state, vectorized::Block*
     if (eos) {
         COUNTER_UPDATE(_build_rows_counter, _build_rows);
         RuntimeFilterBuild(this)(state);
+
+        // optimize `in bitmap`, see 
https://github.com/apache/doris/issues/14338
+        if (_is_output_left_side_only &&
+            ((_join_op == TJoinOp::type::LEFT_SEMI_JOIN && 
_build_blocks.empty()) ||
+             (_join_op == TJoinOp::type::LEFT_ANTI_JOIN && 
!_build_blocks.empty()))) {
+            _left_side_eos = true;
+        }
     }
 
     return Status::OK();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
index 5b4fce1f0f..0184bf1b64 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java
@@ -816,6 +816,7 @@ public class StmtRewriter {
             break;
         }
 
+        boolean isInBitmap = false;
         if (!hasEqJoinPred && !inlineView.isCorrelated()) {
             // Join with InPredicate is actually an equal join, so we choose 
HashJoin.
             if (expr instanceof ExistsPredicate) {
@@ -825,6 +826,7 @@ public class StmtRewriter {
                     && (((FunctionCallExpr) 
joinConjunct).getFnName().getFunction()
                     .equalsIgnoreCase(BITMAP_CONTAINS))) {
                 joinOp = ((InPredicate) expr).isNotIn() ? 
JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN;
+                isInBitmap = true;
             } else {
                 joinOp = JoinOperator.CROSS_JOIN;
                 // We can equal the aggregate subquery using a cross join. All 
conjuncts
@@ -843,6 +845,7 @@ public class StmtRewriter {
 
             inlineView.setMark(markTuple);
             inlineView.setJoinOp(joinOp);
+            inlineView.setInBitmap(isInBitmap);
             if (joinOp != JoinOperator.CROSS_JOIN) {
                 inlineView.setOnClause(onClausePredicate);
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
index e081540ec9..90d3460e85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
@@ -94,6 +94,7 @@ public class TableRef implements ParseNode, Writable {
     // Indicates whether this table ref is given an explicit alias,
     protected boolean hasExplicitAlias;
     protected JoinOperator joinOp;
+    protected boolean isInBitmap;
     // for mark join
     protected boolean isMark;
     // we must record mark tuple name for re-analyze
@@ -280,6 +281,14 @@ public class TableRef implements ParseNode, Writable {
         this.joinOp = op;
     }
 
+    public boolean isInBitmap() {
+        return isInBitmap;
+    }
+
+    public void setInBitmap(boolean inBitmap) {
+        isInBitmap = inBitmap;
+    }
+
     public boolean isMark() {
         return isMark;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
index 33611ff64e..fffadd7f58 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java
@@ -49,6 +49,18 @@ import java.util.List;
 public class NestedLoopJoinNode extends JoinNodeBase {
     private static final Logger LOG = 
LogManager.getLogger(NestedLoopJoinNode.class);
 
+    // If isOutputLeftSideOnly=true, the data from the left table is returned 
directly without a join operation.
+    // This is used to optimize `in bitmap`, because bitmap will make a lot of 
copies when doing Nested Loop Join,
+    // which is very resource intensive.
+    // `in bitmap` has two cases:
+    // 1. select * from tbl1 where k1 in (select bitmap_col from tbl2);
+    //   This will generate a bitmap runtime filter to filter the left table, 
because the bitmap is an exact filter
+    //   and does not need to be filtered again in the NestedLoopJoinNode, so 
it returns the left table data directly.
+    // 2. select * from tbl1 where 1 in (select bitmap_col from tbl2);
+    //    This sql will be rewritten to
+    //    "select * from tbl1 left semi join tbl2 where 
bitmap_contains(tbl2.bitmap_col, 1);"
+    //    return all data in the left table to parent node when there is data 
on the build side, and return empty when
+    //    there is no data on the build side.
     private boolean isOutputLeftSideOnly = false;
 
     private List<Expr> runtimeFilterExpr = Lists.newArrayList();
@@ -245,8 +257,8 @@ public class NestedLoopJoinNode extends JoinNodeBase {
         if (!runtimeFilters.isEmpty()) {
             output.append(detailPrefix).append("runtime filters: ");
             output.append(getRuntimeFilterExplainString(true));
-            output.append(detailPrefix).append("is output left side only: 
").append(isOutputLeftSideOnly).append("\n");
         }
+        output.append(detailPrefix).append("is output left side only: 
").append(isOutputLeftSideOnly).append("\n");
         output.append(detailPrefix).append(String.format("cardinality=%,d", 
cardinality)).append("\n");
         // todo unify in plan node
         if (vOutputTupleDesc != null) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index cab354b0be..a9d8cdc603 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -2115,6 +2115,7 @@ public class SingleNodePlanner {
             result.setJoinConjuncts(joinConjuncts);
             result.addConjuncts(analyzer.getMarkConjuncts(innerRef));
             result.init(analyzer);
+            result.setOutputLeftSideOnly(innerRef.isInBitmap() && 
joinConjuncts.isEmpty());
             return result;
         }
 
diff --git a/regression-test/data/query_p0/join/test_bitmap_filter.out 
b/regression-test/data/query_p0/join/test_bitmap_filter.out
index f44900b0e0..1ad7849124 100644
--- a/regression-test/data/query_p0/join/test_bitmap_filter.out
+++ b/regression-test/data/query_p0/join/test_bitmap_filter.out
@@ -98,3 +98,43 @@
 1      1989
 10     1991
 
+-- !sql15 --
+\N     \N
+1      1989
+2      1986
+3      1989
+4      1991
+5      1985
+6      32767
+7      -32767
+8      255
+9      1991
+10     1991
+11     1989
+12     32767
+13     -32767
+14     255
+15     1992
+
+-- !sql16 --
+
+-- !sql17 --
+
+-- !sql18 --
+\N     \N
+1      1989
+2      1986
+3      1989
+4      1991
+5      1985
+6      32767
+7      -32767
+8      255
+9      1991
+10     1991
+11     1989
+12     32767
+13     -32767
+14     255
+15     1992
+
diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy 
b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
index 022307f9e0..fe18451978 100644
--- a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
+++ b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy
@@ -69,6 +69,14 @@ suite("test_bitmap_filter", "query_p0") {
 
     qt_sql14 "select k1, k2 from ${tbl1} where k1 in (select 
bitmap_from_string('1,10')) order by 1, 2"
 
+    qt_sql15 "select k1, k2 from ${tbl1} t where 11 in (select k2 from 
${tbl2}) order by 1, 2;"
+
+    qt_sql16 "select k1, k2 from ${tbl1} t where 100 in (select k2 from 
${tbl2}) order by 1, 2;"
+
+    qt_sql17 "select k1, k2 from ${tbl1} t where 10 not in (select k2 from 
${tbl2}) order by 1, 2;"
+
+    qt_sql18 "select k1, k2 from ${tbl1} t where 100 not in (select k2 from 
${tbl2}) order by 1, 2;"
+
     test {
         sql "select k1, k2 from ${tbl1} b1 where k1 in (select k2 from ${tbl2} 
b2 where b1.k2 = b2.k1) order by k1;"
         exception "In bitmap does not support correlated subquery"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to