This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch dev-1.1.2 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.2 by this push: new 6dff66f099 [fix](analyzer) InferFilterRule bug: equations in on clause of outer/anti join are not inferable. (#11515) 6dff66f099 is described below commit 6dff66f099e402df9e3c35a7027f9c561ff185df Author: minghong <minghong.z...@163.com> AuthorDate: Thu Aug 11 09:36:43 2022 +0800 [fix](analyzer) InferFilterRule bug: equations in on clause of outer/anti join are not inferable. (#11515) --- .../java/org/apache/doris/analysis/TableRef.java | 3 +- .../org/apache/doris/rewrite/ExprRewriter.java | 43 ++++++++++++- .../org/apache/doris/rewrite/InferFiltersRule.java | 70 +++++++++++----------- .../apache/doris/rewrite/InferFiltersRuleTest.java | 26 ++++++-- .../test_forbidden_infer_filter_rule.out | 5 ++ .../test_forbidden_infer_filter_rule.groovy | 44 ++++++++++++++ 6 files changed, 149 insertions(+), 42 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java index fea56df87f..3f2a279197 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java @@ -27,6 +27,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.rewrite.ExprRewriter; +import org.apache.doris.rewrite.ExprRewriter.ClauseType; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; @@ -573,7 +574,7 @@ public class TableRef implements ParseNode, Writable { Preconditions.checkState(isAnalyzed); if (onClause != null) { Expr expr = onClause.clone(); - onClause = rewriter.rewrite(onClause, analyzer, ExprRewriter.ClauseType.ON_CLAUSE); + onClause = rewriter.rewrite(onClause, analyzer, ClauseType.fromJoinType(joinOp)); if (joinOp.isOuterJoin() || joinOp.isSemiAntiJoin()) { if (onClause instanceof BoolLiteral && !((BoolLiteral) onClause).getValue()) { onClause = expr; diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java index eb211807bc..632bc3948f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java @@ -19,6 +19,7 @@ package org.apache.doris.rewrite; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.JoinOperator; import org.apache.doris.common.AnalysisException; import com.google.common.collect.Lists; @@ -50,9 +51,47 @@ public class ExprRewriter { // The type of clause that executes the rule. // This type is only used in InferFiltersRule, RewriteDateLiteralRule, other rules are not used public enum ClauseType { - ON_CLAUSE, + INNER_JOIN_CLAUSE, + LEFT_OUTER_JOIN_CLAUSE, + RIGHT_OUTER_JOIN_CLAUSE, + FULL_OUTER_JOIN_CLAUSE, + LEFT_SEMI_JOIN_CLAUSE, + RIGHT_SEMI_JOIN_CLAUSE, + LEFT_ANTI_JOIN_CLAUSE, + RIGHT_ANTI_JOIN_CLAUSE, + CROSS_JOIN_CLAUSE, WHERE_CLAUSE, - OTHER_CLAUSE, // All other clauses that are not on and not where + OTHER_CLAUSE; // All other clauses that are not on and not where + + public static ClauseType fromJoinType(JoinOperator joinOp) { + switch (joinOp) { + case INNER_JOIN: return INNER_JOIN_CLAUSE; + case LEFT_OUTER_JOIN: return LEFT_OUTER_JOIN_CLAUSE; + case RIGHT_OUTER_JOIN: return RIGHT_OUTER_JOIN_CLAUSE; + case FULL_OUTER_JOIN: return FULL_OUTER_JOIN_CLAUSE; + case LEFT_SEMI_JOIN: return LEFT_SEMI_JOIN_CLAUSE; + case RIGHT_SEMI_JOIN: return RIGHT_SEMI_JOIN_CLAUSE; + case LEFT_ANTI_JOIN: return LEFT_ANTI_JOIN_CLAUSE; + case RIGHT_ANTI_JOIN: return RIGHT_ANTI_JOIN_CLAUSE; + case CROSS_JOIN: return CROSS_JOIN_CLAUSE; + default: return OTHER_CLAUSE; + } + } + + public boolean isInferable() { + if (this == INNER_JOIN_CLAUSE + || this == LEFT_SEMI_JOIN_CLAUSE + || this == RIGHT_SEMI_JOIN_CLAUSE + || this == WHERE_CLAUSE + || this == OTHER_CLAUSE) { + return true; + } + return false; + } + + public boolean isOnClause() { + return this.compareTo(WHERE_CLAUSE) < 0; + } } // Once-only Rules diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java index c83d3b94c8..bfa2d4b3fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java @@ -72,19 +72,23 @@ public class InferFiltersRule implements ExprRewriteRule { return expr; } + if (!clauseType.isInferable()) { + return expr; + } + // slotEqSlotExpr: Record existing and infer equivalent connections List<Expr> slotEqSlotExpr = analyzer.getOnSlotEqSlotExpr(); // slotEqSlotDeDuplication: De-Duplication for slotEqSlotExpr - Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = - (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) ? analyzer.getOnSlotEqSlotDeDuplication() : Sets.newHashSet(); + Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = (clauseType.isOnClause()) + ? analyzer.getOnSlotEqSlotDeDuplication() : Sets.newHashSet(); // slotToLiteralExpr: Record existing and infer expr which slot and literal are equal List<Expr> slotToLiteralExpr = analyzer.getOnSlotToLiteralExpr(); // slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr - Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = - (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) ? analyzer.getOnSlotToLiteralDeDuplication() : Sets.newHashSet(); + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = (clauseType.isOnClause()) + ? analyzer.getOnSlotToLiteralDeDuplication() : Sets.newHashSet(); // newExprWithState: just record infer expr which slot and literal are equal and which is not null predicate // false : Unexecutable intermediate results will be produced during the derivation process. @@ -95,15 +99,15 @@ public class InferFiltersRule implements ExprRewriteRule { List<Expr> isNullExpr = analyzer.getOnIsNullExpr(); // isNullDeDuplication: De-Duplication for isNullExpr - Set<Expr> isNullDeDuplication = - (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) ? analyzer.getOnIsNullDeDuplication() : Sets.newHashSet(); + Set<Expr> isNullDeDuplication = (clauseType.isOnClause()) + ? analyzer.getOnIsNullDeDuplication() : Sets.newHashSet(); // inExpr: Record existing and infer in predicate List<Expr> inExpr = analyzer.getInExpr(); // inDeDuplication: De-Duplication for inExpr Set<Expr> inDeDuplication = - (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) ? analyzer.getInDeDuplication() : Sets.newHashSet(); + (clauseType.isOnClause()) ? analyzer.getInDeDuplication() : Sets.newHashSet(); // exprToWarshallArraySubscript/warshallArraySubscriptToExpr: function is easy to build warshall and newExprWithState Map<Expr, Integer> exprToWarshallArraySubscript = new HashMap<>(); @@ -180,7 +184,7 @@ public class InferFiltersRule implements ExprRewriteRule { if (!slotToLiteralDeDuplication.contains(pair)) { slotToLiteralDeDuplication.add(pair); slotToLiteralExpr.add(conjunct); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerOnSlotToLiteralDeDuplication(pair); analyzer.registerOnSlotToLiteralExpr(conjunct); } @@ -197,7 +201,7 @@ public class InferFiltersRule implements ExprRewriteRule { && !slotEqSlotDeDuplication.contains(eqPair)) { slotEqSlotDeDuplication.add(pair); slotEqSlotExpr.add(conjunct); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerOnSlotEqSlotDeDuplication(pair); analyzer.registerOnSlotEqSlotExpr(conjunct); } @@ -210,7 +214,7 @@ public class InferFiltersRule implements ExprRewriteRule { && ((IsNullPredicate) conjunct).isNotNull()) { isNullDeDuplication.add(conjunct.getChild(0).unwrapSlotRef()); isNullExpr.add(conjunct); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerOnIsNullDeDuplication(conjunct.getChild(0).unwrapSlotRef()); analyzer.registerOnIsNullExpr(conjunct); } @@ -221,7 +225,7 @@ public class InferFiltersRule implements ExprRewriteRule { if (!inDeDuplication.contains(conjunct.getChild(0).unwrapSlotRef())) { inDeDuplication.add(conjunct.getChild(0).unwrapSlotRef()); inExpr.add(conjunct); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerInExpr(conjunct); analyzer.registerInDeDuplication(conjunct.getChild(0).unwrapSlotRef()); } @@ -349,26 +353,22 @@ public class InferFiltersRule implements ExprRewriteRule { Analyzer analyzer, ExprRewriter.ClauseType clauseType) { for (Pair<Integer, Integer> slotPair : newSlots) { - Pair<Expr, Expr> pair = new Pair<>( - warshallArraySubscriptToExpr.get(slotPair.first), warshallArraySubscriptToExpr.get(slotPair.second)); - Pair<Expr, Expr> eqPair = new Pair<>( - warshallArraySubscriptToExpr.get(slotPair.second), warshallArraySubscriptToExpr.get(slotPair.first)); - if (!slotEqSlotDeDuplication.contains(pair) - && !slotEqSlotDeDuplication.contains(eqPair)) { - slotEqSlotDeDuplication.add(pair); - slotEqSlotExpr.add( - new BinaryPredicate(BinaryPredicate.Operator.EQ, - warshallArraySubscriptToExpr.get(slotPair.first), - warshallArraySubscriptToExpr.get(slotPair.second))); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { - analyzer.registerOnSlotEqSlotDeDuplication(pair); - analyzer.registerOnSlotEqSlotExpr( - new BinaryPredicate(BinaryPredicate.Operator.EQ, - warshallArraySubscriptToExpr.get(slotPair.first), - warshallArraySubscriptToExpr.get(slotPair.second)) - ); - } - } + Pair<Expr, Expr> pair = new Pair<>(warshallArraySubscriptToExpr.get(slotPair.first), + warshallArraySubscriptToExpr.get(slotPair.second)); + Pair<Expr, Expr> eqPair = new Pair<>(warshallArraySubscriptToExpr.get(slotPair.second), + warshallArraySubscriptToExpr.get(slotPair.first)); + if (!slotEqSlotDeDuplication.contains(pair) && !slotEqSlotDeDuplication.contains(eqPair)) { + slotEqSlotDeDuplication.add(pair); + slotEqSlotExpr.add(new BinaryPredicate(BinaryPredicate.Operator.EQ, + warshallArraySubscriptToExpr.get(slotPair.first), + warshallArraySubscriptToExpr.get(slotPair.second))); + if (clauseType.isOnClause()) { + analyzer.registerOnSlotEqSlotDeDuplication(pair); + analyzer.registerOnSlotEqSlotExpr(new BinaryPredicate(BinaryPredicate.Operator.EQ, + warshallArraySubscriptToExpr.get(slotPair.first), + warshallArraySubscriptToExpr.get(slotPair.second))); + } + } } } @@ -450,7 +450,7 @@ public class InferFiltersRule implements ExprRewriteRule { */ private boolean checkNeedInfer(JoinOperator joinOperator, boolean needChange, ExprRewriter.ClauseType clauseType) { boolean ret = false; - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { if (joinOperator.isInnerJoin() || (joinOperator == JoinOperator.LEFT_SEMI_JOIN) || (!needChange && joinOperator == JoinOperator.RIGHT_OUTER_JOIN) @@ -496,7 +496,7 @@ public class InferFiltersRule implements ExprRewriteRule { slotToLiteralDeDuplication.add(pair); Pair<Expr, Boolean> newBPWithBool = new Pair<>(newBP, needAddnewExprWithState); newExprWithState.add(newBPWithBool); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerOnSlotToLiteralDeDuplication(pair); analyzer.registerOnSlotToLiteralExpr(newBP); } @@ -575,7 +575,7 @@ public class InferFiltersRule implements ExprRewriteRule { isNullDeDuplication.add(newExpr.getChild(0)); Pair<Expr, Boolean> newExprWithBoolean = new Pair<>(newExpr, true); newExprWithState.add(newExprWithBoolean); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerOnIsNullExpr(newExpr); analyzer.registerOnIsNullDeDuplication(newExpr); } @@ -660,7 +660,7 @@ public class InferFiltersRule implements ExprRewriteRule { inDeDuplication.add(newIP); Pair<Expr, Boolean> newBPWithBool = new Pair<>(newIP, needAddnewExprWithState); newExprWithState.add(newBPWithBool); - if (clauseType == ExprRewriter.ClauseType.ON_CLAUSE) { + if (clauseType.isOnClause()) { analyzer.registerInDeDuplication(newIP); analyzer.registerInExpr(newIP); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java index c5c5be25a6..56a2b9d4be 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/InferFiltersRuleTest.java @@ -142,15 +142,24 @@ public class InferFiltersRuleTest { Assert.assertFalse(planString.contains("`tb1`.`k1` = 1")); } @Test - public void testOn2TablesLeftAntiJoinEqLiteralAt1st() throws Exception { + public void testOn2TablesLeftJoinNotInferable() throws Exception { SessionVariable sessionVariable = dorisAssert.getSessionVariable(); sessionVariable.setEnableInferPredicate(true); Assert.assertTrue(sessionVariable.isEnableInferPredicate()); - String query = "select * from tb1 left anti join tb2 on tb1.k1 = tb2.k1 and tb1.k1 = 1"; + String query = "select * from tb1 left join tb2 on tb1.k1 = tb2.k1 and tb2.k1 = 1"; String planString = dorisAssert.query(query).explainQuery(); - Assert.assertTrue(planString.contains("`tb2`.`k1` = 1")); + Assert.assertFalse(planString.contains("`tb1`.`k1` = 1")); } + /* + the following 3 test case is valid. But we cannot tell them from other incorrect inferences. + In origin design we made a mistake: we assume inference is symmetrical. + For example, t1.x=t2.x and t1.x=1 => t2.x=1 + this is not always true. + if this is left join, t1 is left, t2.x=1 is not valid. + However, in inferFilterRule, we do not know whether t1.x is from left or right table. + And hence, we have to skip inference for outer/anti join for quick fix. + @Test public void testOn3Tables1stInner2ndRightJoinEqLiteralAt2nd() throws Exception { SessionVariable sessionVariable = dorisAssert.getSessionVariable(); @@ -174,7 +183,16 @@ public class InferFiltersRuleTest { Assert.assertTrue(planString.contains("`tb2`.`k1` = 1")); Assert.assertTrue(planString.contains("`tb1`.`k1` = 1")); } - + @Test + public void testOn2TablesLeftAntiJoinEqLiteralAt1st() throws Exception { + SessionVariable sessionVariable = dorisAssert.getSessionVariable(); + sessionVariable.setEnableInferPredicate(true); + Assert.assertTrue(sessionVariable.isEnableInferPredicate()); + String query = "select * from tb1 left anti join tb2 on tb1.k1 = tb2.k1 and tb1.k1 = 1"; + String planString = dorisAssert.query(query).explainQuery(); + Assert.assertTrue(planString.contains("`tb2`.`k1` = 1")); + } + */ @Test public void testOnIsNotNullPredicate() throws Exception { SessionVariable sessionVariable = dorisAssert.getSessionVariable(); diff --git a/regression-test/data/correctness/test_forbidden_infer_filter_rule.out b/regression-test/data/correctness/test_forbidden_infer_filter_rule.out new file mode 100644 index 0000000000..2cfc6b4ac3 --- /dev/null +++ b/regression-test/data/correctness/test_forbidden_infer_filter_rule.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +-1 \N +1 1 + diff --git a/regression-test/suites/correctness/test_forbidden_infer_filter_rule.groovy b/regression-test/suites/correctness/test_forbidden_infer_filter_rule.groovy new file mode 100644 index 0000000000..fe6ad1ec3e --- /dev/null +++ b/regression-test/suites/correctness/test_forbidden_infer_filter_rule.groovy @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_forbidden_infer_filter_rule") { + sql """ DROP TABLE IF EXISTS A """ + sql """ + CREATE TABLE IF NOT EXISTS A + ( + a_id int + ) + DISTRIBUTED BY HASH(a_id) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + + sql """ DROP TABLE IF EXISTS B """ + sql """ + CREATE TABLE IF NOT EXISTS B + ( + b_id int + ) + DISTRIBUTED BY HASH(b_id) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + + sql " INSERT INTO A values (1), (-1);" + + sql " INSERT INTO B values (1);" + + qt_sql "select * from A left join B on a_id=b_id and b_id in (1,2) where a_id < 100 order by a_id;" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org