wangshuo128 commented on a change in pull request #7357: URL: https://github.com/apache/incubator-doris/pull/7357#discussion_r775045875
########## File path: fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java ########## @@ -0,0 +1,523 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.CompoundPredicate; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.IsNullPredicate; +import org.apache.doris.analysis.JoinOperator; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.TupleId; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Pair; + +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Set; +import java.util.Map; + +/** + * The function of this rule is to derive a new predicate based on the current predicate. + * eg. + * t1.id = t2.id and t2.id = t3.id and t3.id = 100; + * --> + * t1.id = 100 and t2.id = 100 and t3.id = 100; + * + * 1. Register a new rule InferFiltersRule and add it to GlobalState. + * 2. Traverse Conjunct to construct on/where equivalence connection, numerical connection and isNullPredicate. + * 3. Use Warshall to infer all equivalence connections + * 4. Construct additional numerical connections and isNullPredicate + */ +public class InferFiltersRule implements ExprRewriteRule { + private final static Logger LOG = LogManager.getLogger(InferFiltersRule.class); + public static InferFiltersRule INSTANCE = new InferFiltersRule(); + + @Override + public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { + if (expr == null) { + return null; + } + + if (!analyzer.enableInferPredicate() || clauseType == ExprRewriter.ClauseType.OTHERCLAUSE) { + return expr; + } + + // slotEqSlotExpr: Record existing and infer equivalent connections + List<Expr> slotEqSlotExpr = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotEqSlotExpr() : new ArrayList<>(); + + // slotEqSlotDeDuplication: De-Duplication for slotEqSlotExpr + Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotEqSlotDeDuplication() : Sets.newHashSet(); + + // slotToLiteralExpr: Record existing and infer expr which slot and literal are equal + List<Expr> slotToLiteralExpr = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotToLiteralExpr() : new ArrayList<>(); + + // slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotToLiteralDeDuplication() : Sets.newHashSet(); + + // newExprWithState: just record infer expr which slot and literal are equal and which is not null predicate + // false : Unexecutable intermediate results will be produced during the derivation process. + // true : The new expr will be add to expr. + List<Pair<Expr, Boolean>> newExprWithState = new ArrayList<>(); + + // isNullExpr: Record existing and infer not null predicate + List<Expr> isNullExpr = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnIsNullExpr() : new ArrayList<>(); + + //isNullDeDuplication: De-Duplication for isNullExpr + Set<Expr> isNullDeDuplication = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnIsNullDeDuplication() : Sets.newHashSet(); + + + // exprToInteger/integerToExpr: function is easy to build warshall and newExprWithState + Map<Expr, Integer> exprToInteger = new HashMap<>(); + Map<Integer, Expr> integerToExpr = new HashMap<>(); + + + initAllStructure(expr, slotEqSlotExpr, slotEqSlotDeDuplication, + slotToLiteralExpr, slotToLiteralDeDuplication, + isNullExpr, isNullDeDuplication, analyzer, clauseType); + + + genNewSlotEqSlotPredicate(slotEqSlotExpr, slotEqSlotDeDuplication, exprToInteger, + integerToExpr, analyzer, clauseType); + + inferSlotToLiteralPredicates(slotEqSlotExpr, slotToLiteralDeDuplication, + slotToLiteralExpr, newExprWithState, analyzer, clauseType); + + inferIsNotNullPredicates(slotEqSlotExpr, isNullExpr, + isNullDeDuplication, newExprWithState, analyzer, clauseType); + + if (!newExprWithState.isEmpty()) { + Expr rewriteExpr = expr; + for (int index = 0; index < newExprWithState.size(); index++) { + if (newExprWithState.get(index).second) { + rewriteExpr = new CompoundPredicate(CompoundPredicate.Operator.AND, + rewriteExpr, newExprWithState.get(index).first); + } + } + return rewriteExpr; + } + + return expr; + } + + /** + * Initialize all data structures, count and connect compound predicates. + * Recursively traverse the compoundPredicate that is and, and add it to different structures according to the type + * @param slotEqSlotExpr: Expr is BinaryPredicate. Left expr is slot and right expr is slot; + * @param slotToLiteralExpr: Expr is BinaryPredicate. Left expr is slot and right expr is Literal; + * @param isNullExpr: Expr is isNullPredicate; + */ + private void initAllStructure(Expr conjunct, + List<Expr> slotEqSlotExpr, + Set<Pair<Expr, Expr>> slotEqSlotDeDuplication, + List<Expr> slotToLiteralExpr, + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication, + List<Expr> isNullExpr, + Set<Expr> isNullDeDuplication, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + if (conjunct instanceof CompoundPredicate + && ((CompoundPredicate) conjunct).getOp() == CompoundPredicate.Operator.AND) { + for (int index = 0; index < conjunct.getChildren().size(); ++index) { + initAllStructure(conjunct.getChild(index), slotEqSlotExpr, + slotEqSlotDeDuplication, slotToLiteralExpr, + slotToLiteralDeDuplication, isNullExpr, + isNullDeDuplication, analyzer, clauseType); + } + } + + if (conjunct instanceof BinaryPredicate) { + if (conjunct.getChild(0).unwrapSlotRef() instanceof SlotRef + && conjunct.getChild(1) instanceof LiteralExpr) { + Pair<Expr, Expr> pair = new Pair<>(conjunct.getChild(0).unwrapSlotRef(), conjunct.getChild(1)); + if (!slotToLiteralDeDuplication.contains(pair)) { + slotToLiteralDeDuplication.add(pair); + slotToLiteralExpr.add(conjunct); + if (clauseType == ExprRewriter.ClauseType.ONCLAUSE) { + analyzer.registerOnSlotToLiteralDeDuplication(pair); + analyzer.registerOnSlotToLiteralExpr(conjunct); + } + } + } else if (conjunct.getChild(0).unwrapSlotRef() instanceof SlotRef + && conjunct.getChild(1).unwrapSlotRef() instanceof SlotRef) { + Pair<Expr, Expr> pair = new Pair<>(conjunct.getChild(0).unwrapSlotRef(), + conjunct.getChild(1).unwrapSlotRef()); + Pair<Expr, Expr> eqPair = new Pair<>(conjunct.getChild(1).unwrapSlotRef(), + conjunct.getChild(0).unwrapSlotRef()); + if (!slotEqSlotDeDuplication.contains(pair) + && !slotEqSlotDeDuplication.contains(eqPair)) { + slotEqSlotDeDuplication.add(pair); + slotEqSlotExpr.add(conjunct); + if (clauseType == ExprRewriter.ClauseType.ONCLAUSE) { + analyzer.registerOnSlotEqSlotDeDuplication(pair); + analyzer.registerOnSlotEqSlotExpr(conjunct); + } + } + } + } else if (conjunct instanceof IsNullPredicate) { + if (!isNullDeDuplication.contains(conjunct.getChild(0).unwrapSlotRef()) + && ((IsNullPredicate) conjunct).isNotNull()) { + isNullDeDuplication.add(conjunct.getChild(0).unwrapSlotRef()); + isNullExpr.add(conjunct); + if (clauseType == ExprRewriter.ClauseType.ONCLAUSE) { + analyzer.registerOnIsNullDeDuplication(conjunct.getChild(0).unwrapSlotRef()); + analyzer.registerOnIsNullExpr(conjunct); + } + } + } + } + + /** + * According to the current slotEqSlotExpr infer all slotEqSlotPredicate. + * Use warshall algorithm to generate new slotEqSlotExpr + * eg: + * old expr:t1.id = t2.id and t2.id = t3.id and t3.id = t4.id + * new expr:t1.id = t2.id and t2.id = t3.id and t3.id = t4.id and t1.id = t3.id and t1.id = t4.id and t2.id = t4.id + * + * @param slotEqSlotExpr + * @param slotEqSlotDeDuplication + * @param exprToInteger: A Map the key is Expr, the value is int + * @param integerToExpr: A Map the key is int, the value is exper + */ + private void genNewSlotEqSlotPredicate(List<Expr> slotEqSlotExpr, + Set<Pair<Expr, Expr>> slotEqSlotDeDuplication, + Map<Expr, Integer> exprToInteger, + Map<Integer, Expr> integerToExpr, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + int arrayMaxSize = slotEqSlotExpr.size() * 2; + int warshall[][] = new int[arrayMaxSize][arrayMaxSize]; + for (int index = 0; index < arrayMaxSize; index++) { + warshall[index] = new int[arrayMaxSize]; + Arrays.fill(warshall[index], 0); + } + boolean needGenWarshallArray = initWarshallArray(warshall, arrayMaxSize, + slotEqSlotExpr, exprToInteger, integerToExpr); + if (needGenWarshallArray) { + List<Pair<Integer, Integer>> newSlotArray = new ArrayList<>(); + genWarshallArray(warshall, arrayMaxSize, newSlotArray); + buildNewSlotEqSlotPredicate(newSlotArray, integerToExpr, slotEqSlotExpr, + slotEqSlotDeDuplication, analyzer, clauseType); + } + } + + /** + * Initialize warshall array. + * Specify a corresponding array_id for each slot, and add the two slots in slotEqSlotExpr to the array in rows and columns + * + * @param warshall: Two-dimensional array + * @param arrayMaxSize: slotEqSlotExpr.size() * 2 + * @param slotEqSlotExpr + * @param exprToInteger + * @param integerToExpr + * @return needGenWarshallArray. True:needGen; False:don't needGen + */ + private boolean initWarshallArray(int warshall[][], + int arrayMaxSize, + List<Expr> slotEqSlotExpr, + Map<Expr, Integer> exprToInteger, + Map<Integer, Expr> integerToExpr) { + boolean needGenWarshallArray = false; + int index = 0; + for (Expr slotEqSlot : slotEqSlotExpr) { + int row = 0; + int column = 0; + if (!exprToInteger.containsKey(slotEqSlot.getChild(0))) { + exprToInteger.put(slotEqSlot.getChild(0), index); + integerToExpr.put(index, slotEqSlot.getChild(0)); + row = index; + index++; + } else { + row = exprToInteger.get(slotEqSlot.getChild(0)); + } + + if (!exprToInteger.containsKey(slotEqSlot.getChild(1))) { + exprToInteger.put(slotEqSlot.getChild(1), index); + integerToExpr.put(index, slotEqSlot.getChild(1)); + column = index; + index++; + } else { + column = exprToInteger.get(slotEqSlot.getChild(1)); + } + + if (row >= arrayMaxSize + || column >= arrayMaxSize) { + LOG.debug("Error row or column", row, column, arrayMaxSize); + needGenWarshallArray = false; + break; + } else { + needGenWarshallArray = true; + warshall[row][column] = 1; + warshall[column][row] = 1; + } + } + return needGenWarshallArray; + } + + private void genWarshallArray(int warshall[][], int arrayMaxSize, List<Pair<Integer, Integer>> newSlotsArray) { + for (int k = 0; k < arrayMaxSize; k++) { + for (int i = 0; i < arrayMaxSize; i++) { + if (warshall[i][k] == 0) { + continue; + } + for (int j = 0; j < arrayMaxSize; j++) { + if (warshall[i][k] == 1 + && warshall[k][j] == 1) { + if (i == j) { + continue; + } + warshall[i][j] = 1; + Pair<Integer, Integer> pair = new Pair<>(i, j); + newSlotsArray.add(pair); + } + } + } + } + } + + /** + * Construct a new SlotEqSLot based on the results of warshall. + * Build new BinaryPredicate and add it into structures. + */ + private void buildNewSlotEqSlotPredicate(List<Pair<Integer, Integer>> newSlots, + Map<Integer, Expr> integerToExpr, + List<Expr> slotEqSlotExpr, + Set<Pair<Expr, Expr>> slotEqSlotDeDuplication, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + for (Pair<Integer, Integer> slotPair : newSlots) { + Pair<Expr, Expr> pair = new Pair<>(integerToExpr.get(slotPair.first), integerToExpr.get(slotPair.second)); + Pair<Expr, Expr> eqPair = new Pair<>(integerToExpr.get(slotPair.second), integerToExpr.get(slotPair.first)); + if (!slotEqSlotDeDuplication.contains(pair) + && !slotEqSlotDeDuplication.contains(eqPair)) { + slotEqSlotDeDuplication.add(pair); + slotEqSlotExpr.add( + new BinaryPredicate(BinaryPredicate.Operator.EQ, integerToExpr.get(slotPair.first), integerToExpr.get(slotPair.second))); + if (clauseType == ExprRewriter.ClauseType.ONCLAUSE) { + analyzer.registerOnSlotEqSlotDeDuplication(pair); + analyzer.registerOnSlotEqSlotExpr( + new BinaryPredicate(BinaryPredicate.Operator.EQ, integerToExpr.get(slotPair.first), integerToExpr.get(slotPair.second)) + ); + } + } + } + } + + private void inferSlotToLiteralPredicates(List<Expr> slotEqSlotExpr, + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication, + List<Expr> slotToLiteralExpr, + List<Pair<Expr, Boolean>> newExprWithState, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + for (Expr slotToLiteral : slotToLiteralExpr) { + buildNewBinaryPredicate(slotToLiteral, slotEqSlotExpr, + slotToLiteralDeDuplication, newExprWithState, analyzer, clauseType); + } + } + + /** + * Traverse slotEqSlot to extract slots with equal expressions and construct a new slotToLiteral. + */ + private void buildNewBinaryPredicate(Expr slotToLiteral, + List<Expr> slotEqSlotExpr, + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication, + List<Pair<Expr, Boolean>> newExprWithState, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + SlotRef checkSlot = slotToLiteral.getChild(0).unwrapSlotRef(); + for (Expr conjunct : slotEqSlotExpr) { + SlotRef leftSlot = conjunct.getChild(0).unwrapSlotRef(); + SlotRef rightSlot = conjunct.getChild(1).unwrapSlotRef(); + + if (checkSlot.notCheckDescIdEquals(leftSlot)) { + addNewBinaryPredicate(genNewBinaryPredicate(slotToLiteral, rightSlot), + slotToLiteralDeDuplication, newExprWithState, + isNeedInfer(rightSlot, leftSlot, analyzer, clauseType), + analyzer, clauseType); + } else if (checkSlot.notCheckDescIdEquals(rightSlot)) { + addNewBinaryPredicate(genNewBinaryPredicate(slotToLiteral, leftSlot), + slotToLiteralDeDuplication, newExprWithState, + isNeedInfer(leftSlot, rightSlot, analyzer, clauseType), + analyzer, clauseType); + } + } + } + + /** + * To determine whether it needs to be extended. + * eg:t1.id = t2.id and t2.id = 1; + * + * @param newSlot: t1.id + * @param checkSlot: t2.id + * @return needInfer. True: needInfer. False: not needInfer + */ + private boolean isNeedInfer(SlotRef newSlot, SlotRef checkSlot, Analyzer analyzer, ExprRewriter.ClauseType clauseType) { + boolean ret = false; + TupleId newTid = newSlot.getDesc().getParent().getRef().getId(); + TupleId checkTid = checkSlot.getDesc().getParent().getRef().getId(); + boolean needChange = false; + Pair<TupleId, TupleId> tids = new Pair<>(newTid, checkTid); + if (analyzer.isContainTupleIds(tids)) { + JoinOperator joinOperator = analyzer.getAnyTwoTablesJoinOp(tids); + ret = checkNeedInfer(joinOperator, needChange, clauseType); + } else { + Pair<TupleId, TupleId> changeTids = new Pair<>(checkTid, newTid); + if (analyzer.isContainTupleIds(changeTids)) { + needChange = true; + JoinOperator joinOperator = analyzer.getAnyTwoTablesJoinOp(changeTids); + ret = checkNeedInfer(joinOperator, needChange, clauseType); + } + } + return ret; + } + + private boolean checkNeedInfer(JoinOperator joinOperator, boolean needChange, ExprRewriter.ClauseType clauseType) { + boolean ret = false; + if (clauseType == ExprRewriter.ClauseType.ONCLAUSE) { + if (joinOperator.isInnerJoin() + || (joinOperator == JoinOperator.LEFT_SEMI_JOIN) + || (!needChange && joinOperator == JoinOperator.RIGHT_OUTER_JOIN) + || (needChange && (joinOperator == JoinOperator.LEFT_OUTER_JOIN || joinOperator == JoinOperator.LEFT_ANTI_JOIN))) { + ret = true; + } + } else if (clauseType == ExprRewriter.ClauseType.WHERECLAUSE) { + if (joinOperator.isInnerJoin() + || (joinOperator == JoinOperator.LEFT_SEMI_JOIN + || (needChange && joinOperator == JoinOperator.RIGHT_OUTER_JOIN)) + || (!needChange && (joinOperator == JoinOperator.LEFT_OUTER_JOIN || joinOperator == JoinOperator.LEFT_ANTI_JOIN))) { + ret = true; + } + } + return ret; + } + + /** + * generate a new BinaryPredicate. + * @return new BinaryPredicate. + */ + private Expr genNewBinaryPredicate(Expr oldExpr, Expr newSlot) { + if (oldExpr instanceof BinaryPredicate) { + BinaryPredicate oldBP = (BinaryPredicate) oldExpr; + BinaryPredicate newBP = new BinaryPredicate(oldBP.getOp(), newSlot, oldBP.getChild(1)); + return newBP; + } + return oldExpr; + } + + /** + * add the new BinaryPredicate to slotToLiteralDeDuplication and newExprWithState and simultaneous deduplication + */ + private void addNewBinaryPredicate(Expr expr, + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication, + List<Pair<Expr, Boolean> > newExprWithState, + boolean needAddnewExprWithState, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + if (expr instanceof BinaryPredicate) { + BinaryPredicate newBP = (BinaryPredicate) expr; + Pair<Expr, Expr> pair = new Pair<>(newBP.getChild(0), newBP.getChild(1)); + if (!slotToLiteralDeDuplication.contains(pair)) { + slotToLiteralDeDuplication.add(pair); + Pair<Expr, Boolean> newBPWithBool = new Pair<>(newBP, needAddnewExprWithState); + newExprWithState.add(newBPWithBool); + if (clauseType == ExprRewriter.ClauseType.ONCLAUSE) { + analyzer.registerOnSlotToLiteralDeDuplication(pair); + analyzer.registerOnSlotToLiteralExpr(newBP); + } + } + } + } + + private void inferIsNotNullPredicates(List<Expr> slotEqSlotExpr, + List<Expr> isNullExpr, + Set<Expr> isNullDeDuplication, + List<Pair<Expr, Boolean>> newExprWithState, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + for (Expr isNullPredicate : isNullExpr) { + buildNewIsNotNullPredicate(isNullPredicate, slotEqSlotExpr, isNullDeDuplication, + newExprWithState, analyzer, clauseType); + } + } + + private void buildNewIsNotNullPredicate(Expr expr, + List<Expr> slotEqSlotExpr, + Set<Expr> isNullDeDuplication, + List<Pair<Expr, Boolean>> newExprWithState, + Analyzer analyzer, + ExprRewriter.ClauseType clauseType) { + if (expr instanceof IsNullPredicate) { Review comment: We should check `IsNullPredicate.isSlotRefChildren` here. Otherwise `checkSlot` could be null. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org