This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new baf76c40ac [fix](in-bitmap) fix result may be wrong if the left side of the in bitmap predicate is a constant (#17570) (#17643) baf76c40ac is described below commit baf76c40ac23ae94daaf4341289022239b28a354 Author: luozenglin <37725793+luozeng...@users.noreply.github.com> AuthorDate: Sat Mar 11 14:49:15 2023 +0800 [fix](in-bitmap) fix result may be wrong if the left side of the in bitmap predicate is a constant (#17570) (#17643) cherry pick: #17570 --- be/src/vec/exec/join/vnested_loop_join_node.cpp | 12 +++++-- .../org/apache/doris/analysis/StmtRewriter.java | 3 ++ .../java/org/apache/doris/analysis/TableRef.java | 9 +++++ .../apache/doris/planner/NestedLoopJoinNode.java | 14 +++++++- .../apache/doris/planner/SingleNodePlanner.java | 1 + .../data/query_p0/join/test_bitmap_filter.out | 40 ++++++++++++++++++++++ .../suites/query_p0/join/test_bitmap_filter.groovy | 8 +++++ 7 files changed, 84 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index ce6ff568d2..a83ef25e70 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -97,7 +97,6 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { } RETURN_IF_ERROR( vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs, &_filter_src_expr_ctxs)); - DCHECK(!filter_src_exprs.empty() == _is_output_left_side_only); return Status::OK(); } @@ -181,6 +180,13 @@ Status VNestedLoopJoinNode::_materialize_build_side(RuntimeState* state) { RuntimeFilterBuild processRuntimeFilterBuild {this}; processRuntimeFilterBuild(state); + // optimize `in bitmap`, see https://github.com/apache/doris/issues/14338 + if (_is_output_left_side_only && + ((_join_op == TJoinOp::type::LEFT_SEMI_JOIN && _build_blocks.empty()) || + (_join_op == TJoinOp::type::LEFT_ANTI_JOIN && !_build_blocks.empty()))) { + _left_side_eos = true; + } + return Status::OK(); } @@ -207,7 +213,9 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo RETURN_IF_CANCELLED(state); if (_is_output_left_side_only) { - RETURN_IF_ERROR(get_left_side(state, &_left_block)); + if (!_left_side_eos) { + RETURN_IF_ERROR(get_left_side(state, &_left_block)); + } RETURN_IF_ERROR(_build_output_block(&_left_block, block)); *eos = _left_side_eos; reached_limit(block, eos); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java index fd8a0710f7..12725fbd1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java @@ -802,6 +802,7 @@ public class StmtRewriter { break; } + boolean isInBitmap = false; if (!hasEqJoinPred && !inlineView.isCorrelated()) { // Join with InPredicate is actually an equal join, so we choose HashJoin. if (expr instanceof ExistsPredicate) { @@ -811,6 +812,7 @@ public class StmtRewriter { && (((FunctionCallExpr) joinConjunct).getFnName().getFunction() .equalsIgnoreCase(BITMAP_CONTAINS))) { joinOp = ((InPredicate) expr).isNotIn() ? JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN; + isInBitmap = true; } else { joinOp = JoinOperator.CROSS_JOIN; // We can equal the aggregate subquery using a cross join. All conjuncts @@ -829,6 +831,7 @@ public class StmtRewriter { inlineView.setMark(markTuple); inlineView.setJoinOp(joinOp); + inlineView.setInBitmap(isInBitmap); if (joinOp != JoinOperator.CROSS_JOIN) { inlineView.setOnClause(onClausePredicate); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java index cc36f45ec5..1caa0bba97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java @@ -94,6 +94,7 @@ public class TableRef implements ParseNode, Writable { // Indicates whether this table ref is given an explicit alias, protected boolean hasExplicitAlias; protected JoinOperator joinOp; + protected boolean isInBitmap; // for mark join protected boolean isMark; // we must record mark tuple name for re-analyze @@ -280,6 +281,14 @@ public class TableRef implements ParseNode, Writable { this.joinOp = op; } + public boolean isInBitmap() { + return isInBitmap; + } + + public void setInBitmap(boolean inBitmap) { + isInBitmap = inBitmap; + } + public boolean isMark() { return isMark; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java index 89c83df1fa..4f4f0dfded 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/NestedLoopJoinNode.java @@ -49,6 +49,18 @@ import java.util.List; public class NestedLoopJoinNode extends JoinNodeBase { private static final Logger LOG = LogManager.getLogger(NestedLoopJoinNode.class); + // If isOutputLeftSideOnly=true, the data from the left table is returned directly without a join operation. + // This is used to optimize `in bitmap`, because bitmap will make a lot of copies when doing Nested Loop Join, + // which is very resource intensive. + // `in bitmap` has two cases: + // 1. select * from tbl1 where k1 in (select bitmap_col from tbl2); + // This will generate a bitmap runtime filter to filter the left table, because the bitmap is an exact filter + // and does not need to be filtered again in the NestedLoopJoinNode, so it returns the left table data directly. + // 2. select * from tbl1 where 1 in (select bitmap_col from tbl2); + // This sql will be rewritten to + // "select * from tbl1 left semi join tbl2 where bitmap_contains(tbl2.bitmap_col, 1);" + // return all data in the left table to parent node when there is data on the build side, and return empty when + // there is no data on the build side. private boolean isOutputLeftSideOnly = false; private List<Expr> runtimeFilterExpr = Lists.newArrayList(); @@ -246,8 +258,8 @@ public class NestedLoopJoinNode extends JoinNodeBase { if (!runtimeFilters.isEmpty()) { output.append(detailPrefix).append("runtime filters: "); output.append(getRuntimeFilterExplainString(true)); - output.append(detailPrefix).append("is output left side only: ").append(isOutputLeftSideOnly).append("\n"); } + output.append(detailPrefix).append("is output left side only: ").append(isOutputLeftSideOnly).append("\n"); output.append(detailPrefix).append(String.format("cardinality=%,d", cardinality)).append("\n"); // todo unify in plan node if (vOutputTupleDesc != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index b9f69ec915..5db89f6e33 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -2096,6 +2096,7 @@ public class SingleNodePlanner { result.setJoinConjuncts(joinConjuncts); result.addConjuncts(analyzer.getMarkConjuncts(innerRef)); result.init(analyzer); + result.setOutputLeftSideOnly(innerRef.isInBitmap() && joinConjuncts.isEmpty()); return result; } diff --git a/regression-test/data/query_p0/join/test_bitmap_filter.out b/regression-test/data/query_p0/join/test_bitmap_filter.out index f44900b0e0..1ad7849124 100644 --- a/regression-test/data/query_p0/join/test_bitmap_filter.out +++ b/regression-test/data/query_p0/join/test_bitmap_filter.out @@ -98,3 +98,43 @@ 1 1989 10 1991 +-- !sql15 -- +\N \N +1 1989 +2 1986 +3 1989 +4 1991 +5 1985 +6 32767 +7 -32767 +8 255 +9 1991 +10 1991 +11 1989 +12 32767 +13 -32767 +14 255 +15 1992 + +-- !sql16 -- + +-- !sql17 -- + +-- !sql18 -- +\N \N +1 1989 +2 1986 +3 1989 +4 1991 +5 1985 +6 32767 +7 -32767 +8 255 +9 1991 +10 1991 +11 1989 +12 32767 +13 -32767 +14 255 +15 1992 + diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy index 53522289c9..61d9f74f92 100644 --- a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy +++ b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy @@ -70,6 +70,14 @@ suite("test_bitmap_filter", "query_p0") { qt_sql14 "select k1, k2 from ${tbl1} where k1 in (select bitmap_from_string('1,10')) order by 1, 2" + qt_sql15 "select k1, k2 from ${tbl1} t where 11 in (select k2 from ${tbl2}) order by 1, 2;" + + qt_sql16 "select k1, k2 from ${tbl1} t where 100 in (select k2 from ${tbl2}) order by 1, 2;" + + qt_sql17 "select k1, k2 from ${tbl1} t where 10 not in (select k2 from ${tbl2}) order by 1, 2;" + + qt_sql18 "select k1, k2 from ${tbl1} t where 100 not in (select k2 from ${tbl2}) order by 1, 2;" + test { sql "select k1, k2 from ${tbl1} b1 where k1 in (select k2 from ${tbl2} b2 where b1.k2 = b2.k1) order by k1;" exception "In bitmap does not support correlated subquery" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org