zhengshiJ commented on a change in pull request #7357: URL: https://github.com/apache/incubator-doris/pull/7357#discussion_r775407111
########## File path: fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java ########## @@ -0,0 +1,411 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.CompoundPredicate; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.IsNullPredicate; +import org.apache.doris.analysis.JoinOperator; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Pair; + +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Set; +import java.util.Map; + +/** + * The function of this rule is to derive a new predicate based on the current predicate. + * eg. + * t1.id = t2.id and t2.id = t3.id and t3.id = 100; + * --> + * t1.id = 100 and t2.id = 100 and t3.id = 100; + * + * 1. Register a new rule InferFiltersRule and add it to GlobalState. + * 2. Traverse Conjunct to construct on/where equivalence connection, numerical connection and isNullPredicate. + * 3. Use Warshall to infer all equivalence connections + * 4. Construct additional numerical connections and isNullPredicate + */ +public class InferFiltersRule implements ExprRewriteRule { + private final static Logger LOG = LogManager.getLogger(InferFiltersRule.class); + public static InferFiltersRule INSTANCE = new InferFiltersRule(); + private static int indexNum = 0; + + @Override + public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException { + clearIndexNum(); + if (expr == null) { + return null; + } else if (!analyzer.enableInferPredicate()) { + return expr; + } else { + ArrayList<Expr> slotEqSlotPredicatesList = new ArrayList<>(); + Set<Pair<Expr, Expr>> slotEqSlotPredicatesSet = Sets.newHashSet(); + ArrayList<Expr> slotToLiteralPredicatesList = new ArrayList<>(); + Set<Pair<Expr, Expr>> slotToLiteralPredicatesSet = Sets.newHashSet(); + ArrayList<Pair<Expr, Boolean>> newPredicatesList = new ArrayList<>(); + ArrayList<Expr> isNullPredicatesList = new ArrayList<>(); + Set<Expr> isNullPredicatesSet = Sets.newHashSet(); + + Map<Expr, Integer> exprIntegerMap = new HashMap<>(); + Map<Integer, Expr> integerExprMap = new HashMap<>(); + + initAllStructure(expr, slotEqSlotPredicatesList, slotEqSlotPredicatesSet, + slotToLiteralPredicatesList, slotToLiteralPredicatesSet, + isNullPredicatesList, isNullPredicatesSet); + + genNewSlotEqSlotArray(slotEqSlotPredicatesList, slotEqSlotPredicatesSet, exprIntegerMap, integerExprMap); + + inferSlotPredicates(slotEqSlotPredicatesList, slotToLiteralPredicatesSet, + slotToLiteralPredicatesList, newPredicatesList); + + inferIsNullPredicates(slotEqSlotPredicatesList, isNullPredicatesList, + isNullPredicatesSet, newPredicatesList); + + if (!newPredicatesList.isEmpty()) { + Expr rewriteExpr = expr; + for (int index = 0; index < newPredicatesList.size(); index++) { + if (newPredicatesList.get(index).second) { + rewriteExpr = new CompoundPredicate(CompoundPredicate.Operator.AND, + rewriteExpr, newPredicatesList.get(index).first); + } + } + return rewriteExpr; + } + } + return expr; + } + + private void clearIndexNum() { indexNum = 0; } + private void addIndexNum() { indexNum++; } + private int getIndexNum() { return indexNum; } + + /** + * Initialize all data structures, count and connect compound predicates. + * @param slotEqSlotPredicatesList: Expr is BinaryPredicate. Left expr is slot and right expr is slot; + * @param slotToLiteralPredicatesList: Expr is BinaryPredicate. Left expr is slot and right expr is Literal; + * @param isNullPredicatesList: Expr is isNullPredicate; + */ + private void initAllStructure(Expr conjunct, + ArrayList<Expr> slotEqSlotPredicatesList, + Set<Pair<Expr, Expr>> slotEqSlotPredicatesSet, + ArrayList<Expr> slotToLiteralPredicatesList, + Set<Pair<Expr, Expr>> slotToLiteralPredicatesSet, + ArrayList<Expr> isNullPredicatesList, + Set<Expr> isNullPredicatesSet) { + if (conjunct instanceof CompoundPredicate + && ((CompoundPredicate) conjunct).getOp() == CompoundPredicate.Operator.AND) { + for (int index = 0; index < conjunct.getChildren().size(); ++index) { + initAllStructure(conjunct.getChild(index), slotEqSlotPredicatesList, + slotEqSlotPredicatesSet, slotToLiteralPredicatesList, + slotToLiteralPredicatesSet, isNullPredicatesList, isNullPredicatesSet); + } + } + + if (conjunct instanceof BinaryPredicate) { + if (conjunct.getChild(0).unwrapSlotRef() instanceof SlotRef + && conjunct.getChild(1) instanceof LiteralExpr) { + Pair<Expr, Expr> pair = new Pair<>(conjunct.getChild(0).unwrapSlotRef(), conjunct.getChild(1)); + if (!slotToLiteralPredicatesSet.contains(pair)) { + slotToLiteralPredicatesSet.add(pair); + slotToLiteralPredicatesList.add(conjunct); + } + } else if (conjunct.getChild(0).unwrapSlotRef() instanceof SlotRef + && conjunct.getChild(1).unwrapSlotRef() instanceof SlotRef) { + Pair<Expr, Expr> pair = new Pair<>(conjunct.getChild(0).unwrapSlotRef(), + conjunct.getChild(1).unwrapSlotRef()); + Pair<Expr, Expr> eqPair = new Pair<>(conjunct.getChild(1).unwrapSlotRef(), + conjunct.getChild(0).unwrapSlotRef()); + if (!slotEqSlotPredicatesSet.contains(pair) + && !slotEqSlotPredicatesSet.contains(eqPair)) { + slotEqSlotPredicatesSet.add(pair); + slotEqSlotPredicatesList.add(conjunct); + } + } + } else if (conjunct instanceof IsNullPredicate) { + if (!isNullPredicatesSet.contains(conjunct.getChild(0).unwrapSlotRef())) { + isNullPredicatesSet.add(conjunct.getChild(0).unwrapSlotRef()); + isNullPredicatesList.add(conjunct); + } + } + } + + /** + * According to the current slotEqSlotPredicates infer all slotEqSlotPredicate. + * @param slotEqSlotPredicatesList + * @param slotEqSlotPredicatesSet + * @param exprIntegerMap: A Map the key is Expr, the value is int + * @param integerExprMap: A Map the key is int, the value is exper + */ + private void genNewSlotEqSlotArray(ArrayList<Expr> slotEqSlotPredicatesList, + Set<Pair<Expr, Expr>> slotEqSlotPredicatesSet, + Map<Expr, Integer> exprIntegerMap, + Map<Integer, Expr> integerExprMap) { + int arrayMaxSize = slotEqSlotPredicatesList.size() * 2; + int warshall[][] = new int[arrayMaxSize][arrayMaxSize]; + for (int index = 0; index < arrayMaxSize; index++) { + warshall[index] = new int[arrayMaxSize]; + Arrays.fill(warshall[index], 0); + } + boolean needGenWarshallArray = initWarshallArray(warshall, arrayMaxSize, + slotEqSlotPredicatesList, exprIntegerMap, integerExprMap); + if (needGenWarshallArray) { + ArrayList<Pair<Integer, Integer>> newSlotArray = new ArrayList<>(); + genWarshallArray(warshall, arrayMaxSize, newSlotArray); + buildNewSlotEqSlotArray(newSlotArray, integerExprMap, slotEqSlotPredicatesList, slotEqSlotPredicatesSet); + } + } + + /** + * Initialize warshall array. + * @param warshall: Two-dimensional array + * @param arrayMaxSize: slotEqSlotPredicatesList.size() * 2 + * @param slotEqSlotPredicatesList + * @param exprIntegerMap + * @param integerExprMap + * @return needGenWarshallArray. True:needGen; False:don't needGen + */ + private boolean initWarshallArray(int warshall[][], + int arrayMaxSize, + ArrayList<Expr> slotEqSlotPredicatesList, + Map<Expr, Integer> exprIntegerMap, + Map<Integer, Expr> integerExprMap) { + boolean needGenWarshallArray = false; + for (Expr slotEqSlot : slotEqSlotPredicatesList) { + int row = 0; + int column = 0; + if (!exprIntegerMap.containsKey(slotEqSlot.getChild(0))) { + exprIntegerMap.put(slotEqSlot.getChild(0), getIndexNum()); + integerExprMap.put(getIndexNum(), slotEqSlot.getChild(0)); + row = getIndexNum(); + addIndexNum(); + } else { + row = exprIntegerMap.get(slotEqSlot.getChild(0)); + } + + if (!exprIntegerMap.containsKey(slotEqSlot.getChild(1))) { + exprIntegerMap.put(slotEqSlot.getChild(1), getIndexNum()); + integerExprMap.put(getIndexNum(), slotEqSlot.getChild(1)); + column = getIndexNum(); + addIndexNum(); + } else { + column = exprIntegerMap.get(slotEqSlot.getChild(1)); + } + + if (row >= arrayMaxSize + || column >= arrayMaxSize) { + LOG.debug("Error row or column", row, column, arrayMaxSize); + needGenWarshallArray = false; + break; + } else { + needGenWarshallArray = true; + warshall[row][column] = 1; + warshall[column][row] = 1; + } + } + return needGenWarshallArray; + } + + private void genWarshallArray(int warshall[][], int arrayMaxSize, ArrayList<Pair<Integer, Integer>> newSlotsArray) { + for (int k = 0; k < arrayMaxSize; k++) { + for (int i = 0; i < arrayMaxSize; i++) { + if (warshall[i][k] == 0) { + continue; + } + for (int j = 0; j < arrayMaxSize; j++) { + if (warshall[i][k] == 1 + && warshall[k][j] == 1) { + warshall[i][j] = 1; + Pair<Integer, Integer> pair = new Pair<>(i, j); + newSlotsArray.add(pair); + } + } + } + } + } + + /** + * Construct a new SlotEqSLot based on the results of warshall. + */ + private void buildNewSlotEqSlotArray(ArrayList<Pair<Integer, Integer>> newSlotsArray, + Map<Integer, Expr> integerExprMap, + ArrayList<Expr> slotEqSlotPredicatesList, + Set<Pair<Expr, Expr>> slotEqSlotPredicatesSet) { + for (Pair<Integer, Integer> slotPair : newSlotsArray) { + Pair<Expr, Expr> pair = new Pair<>(integerExprMap.get(slotPair.first), integerExprMap.get(slotPair.second)); + Pair<Expr, Expr> eqPair = new Pair<>(integerExprMap.get(slotPair.second), integerExprMap.get(slotPair.first)); + if (!slotEqSlotPredicatesSet.contains(pair) + && !slotEqSlotPredicatesSet.contains(eqPair)) { + slotEqSlotPredicatesSet.add(pair); + slotEqSlotPredicatesList.add( + new BinaryPredicate(BinaryPredicate.Operator.EQ, integerExprMap.get(slotPair.first), integerExprMap.get(slotPair.second))); + } + } + } + + private void inferSlotPredicates(ArrayList<Expr> slotEqSlotPredicatesList, + Set<Pair<Expr, Expr>> slotToLiteralPredicatesSet, + ArrayList<Expr> slotToLiteralPredicatesList, + ArrayList<Pair<Expr, Boolean>> newPredicatesList) { + for (Expr slotToLiteral : slotToLiteralPredicatesList) { + buildNewBinaryPredicate(slotToLiteral, slotEqSlotPredicatesList, + slotToLiteralPredicatesSet, newPredicatesList); + } + } + + /** + * Traverse slotEqSlot to extract slots with equal expressions and construct a new slotToLiteral. + */ + private void buildNewBinaryPredicate(Expr slotToLiteral, + ArrayList<Expr> slotEqSlotPredicatesList, + Set<Pair<Expr, Expr>> slotToLiteralPredicatesSet, + ArrayList<Pair<Expr, Boolean>> newPredicatesList) { + SlotRef checkSlot = slotToLiteral.getChild(0).unwrapSlotRef(); + for (Expr conjunct : slotEqSlotPredicatesList) { + SlotRef leftSlot = conjunct.getChild(0).unwrapSlotRef(); + SlotRef rightSlot = conjunct.getChild(1).unwrapSlotRef(); + + if (checkSlot.simple_equals(leftSlot)) { + addNewBinaryPredicate(genNewBinaryPredicate(slotToLiteral, rightSlot), + slotToLiteralPredicatesSet, newPredicatesList, + isNeedInfer(rightSlot, leftSlot)); + } else if (checkSlot.simple_equals(rightSlot)) { + addNewBinaryPredicate(genNewBinaryPredicate(slotToLiteral, leftSlot), + slotToLiteralPredicatesSet, newPredicatesList, + isNeedInfer(leftSlot, rightSlot)); + } + } + } + + /** + * To determine whether it needs to be extended. + * eg:t1.id = t2.id and t2.id = 1; + * @param newSlot: t1.id + * @param checkSlot: t2.id + * @return needInfer. True: needInfer. False: not needInfer + */ + private boolean isNeedInfer(SlotRef newSlot, SlotRef checkSlot) { Review comment: done ########## File path: fe/fe-core/src/main/java/org/apache/doris/rewrite/InferFiltersRule.java ########## @@ -0,0 +1,523 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.CompoundPredicate; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.IsNullPredicate; +import org.apache.doris.analysis.JoinOperator; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.TupleId; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Pair; + +import com.google.common.collect.Sets; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Set; +import java.util.Map; + +/** + * The function of this rule is to derive a new predicate based on the current predicate. + * eg. + * t1.id = t2.id and t2.id = t3.id and t3.id = 100; + * --> + * t1.id = 100 and t2.id = 100 and t3.id = 100; + * + * 1. Register a new rule InferFiltersRule and add it to GlobalState. + * 2. Traverse Conjunct to construct on/where equivalence connection, numerical connection and isNullPredicate. + * 3. Use Warshall to infer all equivalence connections + * 4. Construct additional numerical connections and isNullPredicate + */ +public class InferFiltersRule implements ExprRewriteRule { + private final static Logger LOG = LogManager.getLogger(InferFiltersRule.class); + public static InferFiltersRule INSTANCE = new InferFiltersRule(); + + @Override + public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { + if (expr == null) { + return null; + } + + if (!analyzer.enableInferPredicate() || clauseType == ExprRewriter.ClauseType.OTHERCLAUSE) { + return expr; + } + + // slotEqSlotExpr: Record existing and infer equivalent connections + List<Expr> slotEqSlotExpr = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotEqSlotExpr() : new ArrayList<>(); + + // slotEqSlotDeDuplication: De-Duplication for slotEqSlotExpr + Set<Pair<Expr, Expr>> slotEqSlotDeDuplication = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotEqSlotDeDuplication() : Sets.newHashSet(); + + // slotToLiteralExpr: Record existing and infer expr which slot and literal are equal + List<Expr> slotToLiteralExpr = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotToLiteralExpr() : new ArrayList<>(); + + // slotToLiteralDeDuplication: De-Duplication for slotToLiteralExpr + Set<Pair<Expr, Expr>> slotToLiteralDeDuplication = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnSlotToLiteralDeDuplication() : Sets.newHashSet(); + + // newExprWithState: just record infer expr which slot and literal are equal and which is not null predicate + // false : Unexecutable intermediate results will be produced during the derivation process. + // true : The new expr will be add to expr. + List<Pair<Expr, Boolean>> newExprWithState = new ArrayList<>(); + + // isNullExpr: Record existing and infer not null predicate + List<Expr> isNullExpr = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnIsNullExpr() : new ArrayList<>(); + + //isNullDeDuplication: De-Duplication for isNullExpr + Set<Expr> isNullDeDuplication = + (clauseType == ExprRewriter.ClauseType.ONCLAUSE) ? analyzer.getOnIsNullDeDuplication() : Sets.newHashSet(); + + + // exprToInteger/integerToExpr: function is easy to build warshall and newExprWithState + Map<Expr, Integer> exprToInteger = new HashMap<>(); Review comment: already edited ########## File path: fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java ########## @@ -46,6 +46,15 @@ public class ExprRewriter { private int numChanges_ = 0; private final List<ExprRewriteRule> rules_; + + // The type of clause that executes the rule. + // This type is only used in InferFiltersRule, other rules are not used + public enum ClauseType { + ONCLAUSE, Review comment: done ########## File path: fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java ########## @@ -46,6 +46,15 @@ public class ExprRewriter { private int numChanges_ = 0; private final List<ExprRewriteRule> rules_; + + // The type of clause that executes the rule. + // This type is only used in InferFiltersRule, other rules are not used + public enum ClauseType { + ONCLAUSE, + WHERECLAUSE, Review comment: done ########## File path: fe/fe-core/src/main/java/org/apache/doris/rewrite/ExprRewriter.java ########## @@ -46,6 +46,15 @@ public class ExprRewriter { private int numChanges_ = 0; private final List<ExprRewriteRule> rules_; + + // The type of clause that executes the rule. + // This type is only used in InferFiltersRule, other rules are not used + public enum ClauseType { + ONCLAUSE, + WHERECLAUSE, + OTHERCLAUSE, // All other clauses that are not on and not where Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org