This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 9bc5f9a39ea [Fix](Nereids) fix leading with brace can not generate correct plan (#36193) (#36549) 9bc5f9a39ea is described below commit 9bc5f9a39eafffab47d08db072efd71dd64f6178 Author: LiBinfeng <46676950+libinfeng...@users.noreply.github.com> AuthorDate: Thu Jun 27 16:19:07 2024 +0800 [Fix](Nereids) fix leading with brace can not generate correct plan (#36193) (#36549) cherry-pick from master #36193 Problem: when using leading like: leading(t1 {t2 t3} {t4 t5} t6) it would not generate correct plan because levellist can not express enough message of braces Solved: remove levellist express of leading levels and use reverse polish expression Algorithm: leading(t1 {t2 t3} {t4 t5} t6) ==> stack top to down(t1 t2 t3 join join t4 t5 join t6 join) when generate leading join, we can pop items in stack, when it's a table, make logicalscan when it's a join operator, make logical join and push back to stack --- .../org/apache/doris/nereids/hint/LeadingHint.java | 235 ++++++++++----------- .../nereids/trees/plans/logical/LogicalJoin.java | 14 -- .../data/nereids_p0/hint/fix_leading.out | 8 +- .../data/nereids_p0/hint/test_leading.out | 92 ++++---- 4 files changed, 160 insertions(+), 189 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java index 92ff5c8bfc2..e139fa4e09c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java @@ -50,8 +50,10 @@ import java.util.Stack; */ public class LeadingHint extends Hint { private String originalString = ""; + + private List<String> addJoinParameters; + private List<String> normalizedParameters; private final List<String> tablelist = new ArrayList<>(); - private final List<Integer> levelList = new ArrayList<>(); private final Map<RelationId, LogicalPlan> relationIdToScanMap = Maps.newLinkedHashMap(); @@ -81,68 +83,77 @@ public class LeadingHint extends Hint { public LeadingHint(String hintName, List<String> parameters, String originalString) { super(hintName); this.originalString = originalString; - int level = 0; - Stack<Boolean> brace = new Stack<>(); - String lastParameter = ""; - for (String parameter : parameters) { - if (parameter.equals("{")) { - if (lastParameter.equals("}")) { - level += 2; - brace.push(true); - } else { - ++level; - brace.push(false); - } - } else if (parameter.equals("}")) { - if (brace.pop().equals(true)) { - level -= 2; - } else { - level--; - } - } else { - tablelist.add(parameter); - levelList.add(level); - } - lastParameter = parameter; - } - normalizeLevelList(); + addJoinParameters = insertJoinIntoParameters(parameters); + normalizedParameters = parseIntoReversePolishNotation(addJoinParameters); } - private void removeGap(int left, int right, int gap) { - for (int i = left; i <= right; i++) { - levelList.set(i, levelList.get(i) - (gap - 1)); + /** + * insert join string into leading string + * @param list of sql input leading string + * @return list of string adding joins into tables + */ + public static List<String> insertJoinIntoParameters(List<String> list) { + List<String> output = new ArrayList<>(); + + for (String item : list) { + if (item.equals("shuffle") || item.equals("broadcast")) { + output.remove(output.size() - 1); + output.add(item); + continue; + } else if (item.equals("{")) { + output.add(item); + continue; + } else if (item.equals("}")) { + output.remove(output.size() - 1); + output.add(item); + } else { + output.add(item); + } + output.add("join"); } + output.remove(output.size() - 1); + return output; } - // when we write leading like: leading(t1 {{t2 t3} {t4 t5}} t6) - // levelList would like 0 2 2 3 3 0, it could be reduced to 0 1 1 2 2 0 like leading(t1 {t2 t3 {t4 t5}} t6) - // gap is like 0 to 2 or 3 to 0 in upper example, and this function is to remove gap when we use a lot of braces - private void normalizeLevelList() { - int leftIndex = 0; - // at lease two tables were needed - for (int i = 1; i < levelList.size(); i++) { - if ((levelList.get(i) - levelList.get(leftIndex)) > 1) { - int rightIndex = i; - for (int j = i; j < levelList.size(); j++) { - if ((levelList.get(rightIndex) - levelList.get(j)) > 1) { - removeGap(i, rightIndex, Math.min(levelList.get(i) - levelList.get(leftIndex), - levelList.get(rightIndex) - levelList.get(j))); - } - rightIndex = j; + /** + * parse list string of original leading string with join string to Reverse Polish notation + * @param list of leading with join string + * @return Reverse Polish notation which can be used directly changed into logical join + */ + public List<String> parseIntoReversePolishNotation(List<String> list) { + Stack<String> s1 = new Stack<>(); + List<String> s2 = new ArrayList<>(); + + for (String item : list) { + if (!(item.equals("shuffle") || item.equals("broadcast") || item.equals("{") + || item.equals("}") || item.equals("join"))) { + tablelist.add(item); + s2.add(item); + } else if (item.equals("{")) { + s1.push(item); + } else if (item.equals("}")) { + while (!s1.peek().equals("{")) { + String pop = s1.pop(); + s2.add(pop); } + s1.pop(); + } else { + while (s1.size() != 0 && !s1.peek().equals("{")) { + s2.add(s1.pop()); + } + s1.push(item); } - leftIndex = i; } + while (s1.size() > 0) { + s2.add(s1.pop()); + } + return s2; } public List<String> getTablelist() { return tablelist; } - public List<Integer> getLevelList() { - return levelList; - } - public Map<RelationId, LogicalPlan> getRelationIdToScanMap() { return relationIdToScanMap; } @@ -150,8 +161,16 @@ public class LeadingHint extends Hint { @Override public String getExplainString() { StringBuilder out = new StringBuilder(); - out.append(originalString); - return out.toString(); + for (String parameter : addJoinParameters) { + if (parameter.equals("{") || parameter.equals("}") || parameter.equals("[") || parameter.equals("]")) { + out.append(parameter + " "); + } else if (parameter.equals("join")) { + continue; + } else { + out.append(parameter + " "); + } + } + return "leading(" + out.toString() + ")"; } /** @@ -465,92 +484,58 @@ public class LeadingHint extends Hint { return JoinType.INNER_JOIN; } + private LogicalPlan makeJoinPlan(LogicalPlan leftChild, LogicalPlan rightChild, String distributeJoinType) { + List<Expression> conditions = getJoinConditions( + getFilters(), leftChild, rightChild); + Pair<List<Expression>, List<Expression>> pair = JoinUtils.extractExpressionForHashTable( + leftChild.getOutput(), rightChild.getOutput(), conditions); + // leading hint would set status inside if not success + JoinType joinType = computeJoinType(getBitmap(leftChild), + getBitmap(rightChild), conditions); + if (joinType == null) { + this.setStatus(HintStatus.SYNTAX_ERROR); + this.setErrorMessage("JoinType can not be null"); + } else if (!isConditionJoinTypeMatched(conditions, joinType)) { + this.setStatus(HintStatus.UNUSED); + this.setErrorMessage("condition does not matched joinType"); + } + if (!this.isSuccess()) { + return null; + } + // get joinType + LogicalJoin logicalJoin = new LogicalJoin<>(joinType, pair.first, + pair.second, + JoinHint.NONE, + Optional.empty(), + leftChild, + rightChild); + logicalJoin.setBitmap(LongBitmap.or(getBitmap(leftChild), getBitmap(rightChild))); + return logicalJoin; + } + /** * using leading to generate plan, it could be failed, if failed set leading status to unused or syntax error * @return plan */ public Plan generateLeadingJoinPlan() { - Stack<Pair<Integer, LogicalPlan>> stack = new Stack<>(); - int index = 0; - LogicalPlan logicalPlan = getLogicalPlanByName(getTablelist().get(index)); - if (logicalPlan == null) { - return null; - } - logicalPlan = makeFilterPlanIfExist(getFilters(), logicalPlan); - assert (logicalPlan != null); - stack.push(Pair.of(getLevelList().get(index), logicalPlan)); - int stackTopLevel = getLevelList().get(index++); - while (index < getTablelist().size()) { - int currentLevel = getLevelList().get(index); - if (currentLevel == stackTopLevel) { - // should return error if can not found table - logicalPlan = getLogicalPlanByName(getTablelist().get(index++)); - if (logicalPlan == null) { + Stack<LogicalPlan> stack = new Stack<>(); + for (String item : normalizedParameters) { + if (item.equals("join") || item.equals("shuffle") || item.equals("broadcast")) { + LogicalPlan rightChild = stack.pop(); + LogicalPlan leftChild = stack.pop(); + LogicalPlan joinPlan = makeJoinPlan(leftChild, rightChild, item); + if (joinPlan == null) { return null; } - logicalPlan = makeFilterPlanIfExist(getFilters(), logicalPlan); - Pair<Integer, LogicalPlan> newStackTop = stack.peek(); - while (!(stack.isEmpty() || stackTopLevel != newStackTop.first)) { - // check join is legal and get join type - newStackTop = stack.pop(); - List<Expression> conditions = getJoinConditions( - getFilters(), newStackTop.second, logicalPlan); - Pair<List<Expression>, List<Expression>> pair = JoinUtils.extractExpressionForHashTable( - newStackTop.second.getOutput(), logicalPlan.getOutput(), conditions); - // leading hint would set status inside if not success - JoinType joinType = computeJoinType(getBitmap(newStackTop.second), - getBitmap(logicalPlan), conditions); - if (joinType == null) { - this.setStatus(HintStatus.SYNTAX_ERROR); - this.setErrorMessage("JoinType can not be null"); - } else if (!isConditionJoinTypeMatched(conditions, joinType)) { - this.setStatus(HintStatus.UNUSED); - this.setErrorMessage("condition does not matched joinType"); - } - if (!this.isSuccess()) { - return null; - } - // get joinType - LogicalJoin logicalJoin = new LogicalJoin<>(joinType, pair.first, - pair.second, - JoinHint.NONE, - Optional.empty(), - newStackTop.second, - logicalPlan); - logicalJoin.setBitmap(LongBitmap.or(getBitmap(newStackTop.second), getBitmap(logicalPlan))); - if (stackTopLevel > 0) { - if (index < getTablelist().size()) { - if (stackTopLevel > getLevelList().get(index)) { - stackTopLevel--; - } - } else { - stackTopLevel--; - } - } - if (!stack.isEmpty()) { - newStackTop = stack.peek(); - } - logicalPlan = logicalJoin; - } - stack.push(Pair.of(stackTopLevel, logicalPlan)); + stack.push(joinPlan); } else { - // push - logicalPlan = getLogicalPlanByName(getTablelist().get(index++)); - if (logicalPlan == null) { - return null; - } + LogicalPlan logicalPlan = getLogicalPlanByName(item); logicalPlan = makeFilterPlanIfExist(getFilters(), logicalPlan); - stack.push(Pair.of(currentLevel, logicalPlan)); - stackTopLevel = currentLevel; + stack.push(logicalPlan); } } - if (stack.size() > 1) { - this.setStatus(HintStatus.SYNTAX_ERROR); - this.setErrorMessage("please check your brace pairs in leading"); - return null; - } - LogicalJoin finalJoin = (LogicalJoin) stack.pop().second; + LogicalJoin finalJoin = (LogicalJoin) stack.pop(); // we want all filters been remove assert (filters.isEmpty()); if (finalJoin != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java index a05a704acd0..55fff8c44b7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java @@ -98,20 +98,6 @@ public class LogicalJoin<LEFT_CHILD_TYPE extends Plan, RIGHT_CHILD_TYPE extends Optional.empty(), Optional.empty(), leftChild, rightChild); } - public LogicalJoin( - long bitmap, - JoinType joinType, - List<Expression> hashJoinConjuncts, - List<Expression> otherJoinConjuncts, - JoinHint hint, - Optional<MarkJoinSlotReference> markJoinSlotReference, - LEFT_CHILD_TYPE leftChild, RIGHT_CHILD_TYPE rightChild) { - this(joinType, hashJoinConjuncts, - otherJoinConjuncts, hint, markJoinSlotReference, - Optional.empty(), Optional.empty(), leftChild, rightChild); - this.bitmap = LongBitmap.or(this.bitmap, bitmap); - } - public LogicalJoin( JoinType joinType, List<Expression> hashJoinConjuncts, diff --git a/regression-test/data/nereids_p0/hint/fix_leading.out b/regression-test/data/nereids_p0/hint/fix_leading.out index 4a2ceae5c3f..9b75ff8d907 100644 --- a/regression-test/data/nereids_p0/hint/fix_leading.out +++ b/regression-test/data/nereids_p0/hint/fix_leading.out @@ -16,8 +16,8 @@ PhysicalResultSink --------------PhysicalOlapScan[t4] Hint log: -Used: leading({ t1 t2 } { t3 t4 }) -UnUsed: +Used: leading({ t1 t2 } { t3 t4 } ) +UnUsed: SyntaxError: -- !select2_1_1 -- @@ -248,7 +248,7 @@ PhysicalResultSink Hint log: Used: leading(t1 t2 t3 ) -UnUsed: +UnUsed: SyntaxError: -- !select6_1 -- @@ -275,6 +275,6 @@ PhysicalResultSink Hint log: Used: leading(t1 { { t2 t3 } { t4 t5 } } t6 ) -UnUsed: +UnUsed: SyntaxError: diff --git a/regression-test/data/nereids_p0/hint/test_leading.out b/regression-test/data/nereids_p0/hint/test_leading.out index 058fdf8b52f..470dc086812 100644 --- a/regression-test/data/nereids_p0/hint/test_leading.out +++ b/regression-test/data/nereids_p0/hint/test_leading.out @@ -2214,7 +2214,7 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 shuffle t2 broadcast t3 ) +Used: leading(t1 shuffle t2 broadcast t3 ) UnUsed: SyntaxError: @@ -2232,7 +2232,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 shuffle { t2 broadcast t3 } ) +Used: leading(t1 shuffle { t2 broadcast t3 } ) UnUsed: SyntaxError: @@ -2250,7 +2250,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t2] Hint log: -Used: leading(t1 shuffle { t3 broadcast t2 } ) +Used: leading(t1 shuffle { t3 broadcast t2 } ) UnUsed: SyntaxError: @@ -2268,7 +2268,7 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 shuffle t1 broadcast t3 ) +Used: leading(t2 shuffle t1 broadcast t3 ) UnUsed: SyntaxError: @@ -2286,7 +2286,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 shuffle { t1 broadcast t3 } ) +Used: leading(t2 shuffle { t1 broadcast t3 } ) UnUsed: SyntaxError: @@ -2304,7 +2304,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t1] Hint log: -Used: leading(t2 shuffle { t3 broadcast t1 } ) +Used: leading(t2 shuffle { t3 broadcast t1 } ) UnUsed: SyntaxError: @@ -2322,8 +2322,8 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 t2 broadcast t3 ) -UnUsed: +Used: leading(t1 t2 broadcast t3 ) +UnUsed: SyntaxError: -- !select93_2 -- @@ -2340,8 +2340,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 { t2 broadcast t3 } ) -UnUsed: +Used: leading(t1 { t2 broadcast t3 } ) +UnUsed: SyntaxError: -- !select93_3 -- @@ -2358,8 +2358,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t2] Hint log: -Used: leading(t1 { t3 broadcast t2 } ) -UnUsed: +Used: leading(t1 { t3 broadcast t2 } ) +UnUsed: SyntaxError: -- !select93_4 -- @@ -2376,8 +2376,8 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 t1 broadcast t3 ) -UnUsed: +Used: leading(t2 t1 broadcast t3 ) +UnUsed: SyntaxError: -- !select93_5 -- @@ -2394,8 +2394,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 { t1 broadcast t3 } ) -UnUsed: +Used: leading(t2 { t1 broadcast t3 } ) +UnUsed: SyntaxError: -- !select93_6 -- @@ -2412,8 +2412,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t1] Hint log: -Used: leading(t2 { t3 broadcast t1 } ) -UnUsed: +Used: leading(t2 { t3 broadcast t1 } ) +UnUsed: SyntaxError: -- !select94_2 -- @@ -2430,8 +2430,8 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 shuffle t2 t3 ) -UnUsed: +Used: leading(t1 shuffle t2 t3 ) +UnUsed: SyntaxError: -- !select94_2 -- @@ -2448,8 +2448,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 shuffle { t2 t3 } ) -UnUsed: +Used: leading(t1 shuffle { t2 t3 } ) +UnUsed: SyntaxError: -- !select94_2 -- @@ -2466,8 +2466,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t2] Hint log: -Used: leading(t1 shuffle { t3 t2 } ) -UnUsed: +Used: leading(t1 shuffle { t3 t2 } ) +UnUsed: SyntaxError: -- !select94_2 -- @@ -2484,8 +2484,8 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 shuffle t1 t3 ) -UnUsed: +Used: leading(t2 shuffle t1 t3 ) +UnUsed: SyntaxError: -- !select94_2 -- @@ -2502,8 +2502,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 shuffle { t1 t3 } ) -UnUsed: +Used: leading(t2 shuffle { t1 t3 } ) +UnUsed: SyntaxError: -- !select94_2 -- @@ -2520,8 +2520,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t1] Hint log: -Used: leading(t2 shuffle { t3 t1 } ) -UnUsed: +Used: leading(t2 shuffle { t3 t1 } ) +UnUsed: SyntaxError: -- !select95_1 -- @@ -2538,8 +2538,8 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 broadcast t2 t3 ) -UnUsed: +Used: leading(t1 broadcast t2 t3 ) +UnUsed: SyntaxError: -- !select95_4 -- @@ -2556,8 +2556,8 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 broadcast t1 t3 ) -UnUsed: +Used: leading(t2 broadcast t1 t3 ) +UnUsed: SyntaxError: -- !select95_8 -- @@ -2574,8 +2574,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t2] Hint log: -Used: leading(t3 broadcast { t1 t2 } ) -UnUsed: +Used: leading(t3 broadcast { t1 t2 } ) +UnUsed: SyntaxError: -- !select95_9 -- @@ -2592,8 +2592,8 @@ PhysicalResultSink ----------------PhysicalOlapScan[t1] Hint log: -Used: leading(t3 broadcast { t2 t1 } ) -UnUsed: +Used: leading(t3 broadcast { t2 t1 } ) +UnUsed: SyntaxError: -- !select96_1 -- @@ -2610,7 +2610,7 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 shuffle t2 broadcast t3 ) +Used: leading(t1 shuffle t2 broadcast t3 ) UnUsed: SyntaxError: @@ -2628,7 +2628,7 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 shuffle t1 broadcast t3 ) +Used: leading(t2 shuffle t1 broadcast t3 ) UnUsed: SyntaxError: @@ -2646,7 +2646,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t2] Hint log: -Used: leading(t3 shuffle { t1 broadcast t2 } ) +Used: leading(t3 shuffle { t1 broadcast t2 } ) UnUsed: SyntaxError: @@ -2664,7 +2664,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t1] Hint log: -Used: leading(t3 shuffle { t2 broadcast t1 } ) +Used: leading(t3 shuffle { t2 broadcast t1 } ) UnUsed: SyntaxError: @@ -2683,7 +2683,7 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t1 broadcast t2 shuffle t3 ) +Used: leading(t1 broadcast t2 shuffle t3 ) UnUsed: SyntaxError: @@ -2701,7 +2701,7 @@ PhysicalResultSink ------------PhysicalOlapScan[t3] Hint log: -Used: leading(t2 broadcast t1 shuffle t3 ) +Used: leading(t2 broadcast t1 shuffle t3 ) UnUsed: SyntaxError: @@ -2719,7 +2719,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t2] Hint log: -Used: leading(t3 broadcast { t1 shuffle t2 } ) +Used: leading(t3 broadcast { t1 shuffle t2 } ) UnUsed: SyntaxError: @@ -2737,7 +2737,7 @@ PhysicalResultSink ----------------PhysicalOlapScan[t1] Hint log: -Used: leading(t3 broadcast { t2 shuffle t1 } ) +Used: leading(t3 broadcast { t2 shuffle t1 } ) UnUsed: SyntaxError: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org