This is an automated email from the ASF dual-hosted git repository. luozenglin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 00727e8c11 [fix](in-bitmap) fix result may be wrong if the left side of the in bitmap predicate is a constant (#17570) 00727e8c11 is described below commit 00727e8c113575252f9b5b1f6503966cb8c334c0 Author: luozenglin <37725793+luozeng...@users.noreply.github.com> AuthorDate: Thu Mar 9 10:59:05 2023 +0800 [fix](in-bitmap) fix result may be wrong if the left side of the in bitmap predicate is a constant (#17570) --- be/src/vec/exec/join/vnested_loop_join_node.cpp | 8 ++++- .../org/apache/doris/analysis/StmtRewriter.java | 3 ++ .../java/org/apache/doris/analysis/TableRef.java | 9 +++++ .../apache/doris/planner/NestedLoopJoinNode.java | 14 +++++++- .../apache/doris/planner/SingleNodePlanner.java | 1 + .../data/query_p0/join/test_bitmap_filter.out | 40 ++++++++++++++++++++++ .../suites/query_p0/join/test_bitmap_filter.groovy | 8 +++++ 7 files changed, 81 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index 2555cea209..3a26ef165c 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -96,7 +96,6 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { } RETURN_IF_ERROR( vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs, &_filter_src_expr_ctxs)); - DCHECK(!filter_src_exprs.empty() == _is_output_left_side_only); return Status::OK(); } @@ -189,6 +188,13 @@ Status VNestedLoopJoinNode::sink(doris::RuntimeState* state, vectorized::Block* if (eos) { COUNTER_UPDATE(_build_rows_counter, _build_rows); RuntimeFilterBuild(this)(state); + + // optimize `in bitmap`, see https://github.com/apache/doris/issues/14338 + if (_is_output_left_side_only && + ((_join_op == TJoinOp::type::LEFT_SEMI_JOIN && _build_blocks.empty()) || + (_join_op == TJoinOp::type::LEFT_ANTI_JOIN && !_build_blocks.empty()))) { + _left_side_eos = true; + } } return Status::OK(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java index 5b4fce1f0f..0184bf1b64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java @@ -816,6 +816,7 @@ public class StmtRewriter { break; } + boolean isInBitmap = false; if (!hasEqJoinPred && !inlineView.isCorrelated()) { // Join with InPredicate is actually an equal join, so we choose HashJoin. if (expr instanceof ExistsPredicate) { @@ -825,6 +826,7 @@ public class StmtRewriter { && (((FunctionCallExpr) joinConjunct).getFnName().getFunction() .equalsIgnoreCase(BITMAP_CONTAINS))) { joinOp = ((InPredicate) expr).isNotIn() ? JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN; + isInBitmap = true; } else { joinOp = JoinOperator.CROSS_JOIN; // We can equal the aggregate subquery using a cross join. All conjuncts @@ -843,6 +845,7 @@ public class StmtRewriter { inlineView.setMark(markTuple); inlineView.setJoinOp(joinOp); + inlineView.setInBitmap(isInBitmap); if (joinOp != JoinOperator.CROSS_JOIN) { inlineView.setOnClause(onClausePredicate); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java index e081540ec9..90d3460e85 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java @@ -94,6 +94,7 @@ public class TableRef implements ParseNode, Writable { // Indicates whether this table ref is given an explicit alias, protected boolean hasExplicitAlias; protected JoinOperator joinOp; + protected boolean isInBitmap; // for mark join protected boolean isMark; // we must record mark tuple name for re-analyze @@ -280,6 +281,14 @@ public class TableRef implements ParseNode, Writable { this.joinOp = op; } + public boolean isInBitmap() { + return isInBitmap; + } + + public void setInBitmap(boolean inBitmap) { + isInBitmap = inBitmap; + } + public boolean isMark() { return isMark; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java index 33611ff64e..fffadd7f58 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java @@ -49,6 +49,18 @@ import java.util.List; public class NestedLoopJoinNode extends JoinNodeBase { private static final Logger LOG = LogManager.getLogger(NestedLoopJoinNode.class); + // If isOutputLeftSideOnly=true, the data from the left table is returned directly without a join operation. + // This is used to optimize `in bitmap`, because bitmap will make a lot of copies when doing Nested Loop Join, + // which is very resource intensive. + // `in bitmap` has two cases: + // 1. select * from tbl1 where k1 in (select bitmap_col from tbl2); + // This will generate a bitmap runtime filter to filter the left table, because the bitmap is an exact filter + // and does not need to be filtered again in the NestedLoopJoinNode, so it returns the left table data directly. + // 2. select * from tbl1 where 1 in (select bitmap_col from tbl2); + // This sql will be rewritten to + // "select * from tbl1 left semi join tbl2 where bitmap_contains(tbl2.bitmap_col, 1);" + // return all data in the left table to parent node when there is data on the build side, and return empty when + // there is no data on the build side. private boolean isOutputLeftSideOnly = false; private List<Expr> runtimeFilterExpr = Lists.newArrayList(); @@ -245,8 +257,8 @@ public class NestedLoopJoinNode extends JoinNodeBase { if (!runtimeFilters.isEmpty()) { output.append(detailPrefix).append("runtime filters: "); output.append(getRuntimeFilterExplainString(true)); - output.append(detailPrefix).append("is output left side only: ").append(isOutputLeftSideOnly).append("\n"); } + output.append(detailPrefix).append("is output left side only: ").append(isOutputLeftSideOnly).append("\n"); output.append(detailPrefix).append(String.format("cardinality=%,d", cardinality)).append("\n"); // todo unify in plan node if (vOutputTupleDesc != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index cab354b0be..a9d8cdc603 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -2115,6 +2115,7 @@ public class SingleNodePlanner { result.setJoinConjuncts(joinConjuncts); result.addConjuncts(analyzer.getMarkConjuncts(innerRef)); result.init(analyzer); + result.setOutputLeftSideOnly(innerRef.isInBitmap() && joinConjuncts.isEmpty()); return result; } diff --git a/regression-test/data/query_p0/join/test_bitmap_filter.out b/regression-test/data/query_p0/join/test_bitmap_filter.out index f44900b0e0..1ad7849124 100644 --- a/regression-test/data/query_p0/join/test_bitmap_filter.out +++ b/regression-test/data/query_p0/join/test_bitmap_filter.out @@ -98,3 +98,43 @@ 1 1989 10 1991 +-- !sql15 -- +\N \N +1 1989 +2 1986 +3 1989 +4 1991 +5 1985 +6 32767 +7 -32767 +8 255 +9 1991 +10 1991 +11 1989 +12 32767 +13 -32767 +14 255 +15 1992 + +-- !sql16 -- + +-- !sql17 -- + +-- !sql18 -- +\N \N +1 1989 +2 1986 +3 1989 +4 1991 +5 1985 +6 32767 +7 -32767 +8 255 +9 1991 +10 1991 +11 1989 +12 32767 +13 -32767 +14 255 +15 1992 + diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy index 022307f9e0..fe18451978 100644 --- a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy +++ b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy @@ -69,6 +69,14 @@ suite("test_bitmap_filter", "query_p0") { qt_sql14 "select k1, k2 from ${tbl1} where k1 in (select bitmap_from_string('1,10')) order by 1, 2" + qt_sql15 "select k1, k2 from ${tbl1} t where 11 in (select k2 from ${tbl2}) order by 1, 2;" + + qt_sql16 "select k1, k2 from ${tbl1} t where 100 in (select k2 from ${tbl2}) order by 1, 2;" + + qt_sql17 "select k1, k2 from ${tbl1} t where 10 not in (select k2 from ${tbl2}) order by 1, 2;" + + qt_sql18 "select k1, k2 from ${tbl1} t where 100 not in (select k2 from ${tbl2}) order by 1, 2;" + test { sql "select k1, k2 from ${tbl1} b1 where k1 in (select k2 from ${tbl2} b2 where b1.k2 = b2.k1) order by k1;" exception "In bitmap does not support correlated subquery" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org