This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d0a8cd0fc5 [fix](nereids) dphyper join reorder may lost some join conjuncts (#19318) d0a8cd0fc5 is described below commit d0a8cd0fc5ad8febe6afad8b7570a10b722c6419 Author: starocean999 <40539150+starocean...@users.noreply.github.com> AuthorDate: Wed May 10 19:02:35 2023 +0800 [fix](nereids) dphyper join reorder may lost some join conjuncts (#19318) --- .../org/apache/doris/nereids/NereidsPlanner.java | 10 +- .../hypergraph/receiver/PlanReceiver.java | 40 +++---- .../java/org/apache/doris/nereids/memo/Group.java | 8 +- .../java/org/apache/doris/nereids/memo/Memo.java | 4 +- .../functions_test/test_dpjoin_reorder.groovy | 126 +++++++++++++++++++++ 5 files changed, 157 insertions(+), 31 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index e85babed40..7bf0339488 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -54,6 +54,7 @@ import org.apache.doris.planner.ScanNode; import org.apache.doris.qe.ConnectContext; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.opentelemetry.api.trace.Span; @@ -270,16 +271,23 @@ public class NereidsPlanner extends Planner { private void dpHypOptimize() { Group root = getRoot(); + boolean changeRoot = false; if (root.isInnerJoinGroup()) { // If the root group is join group, DPHyp can change the root group. // To keep the root group is not changed, we add a project operator above join List<NamedExpression> outputs = ImmutableList.copyOf(root.getLogicalExpression().getPlan().getOutput()); LogicalPlan plan = new LogicalProject<>(outputs, root.getLogicalExpression().getPlan()); - CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan, null, false); + CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan, null, true); root = copyInResult.correspondingExpression.getOwnerGroup(); + Preconditions.checkArgument(copyInResult.generateNewExpression, + "the top project node can't be generated for dpHypOptimize"); + changeRoot = true; } cascadesContext.pushJob(new JoinOrderJob(root, cascadesContext.getCurrentJobContext())); cascadesContext.getJobScheduler().executeJobPool(cascadesContext); + if (changeRoot) { + cascadesContext.getMemo().setRoot(root.getLogicalExpression().child(0)); + } } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java index 892e963810..5f36971fd2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java @@ -102,11 +102,7 @@ public class PlanReceiver implements AbstractReceiver { Preconditions.checkArgument(planTable.containsKey(left)); Preconditions.checkArgument(planTable.containsKey(right)); - // check if the missed edges can be correctly connected by add it to edges - // if not, the plan is invalid because of the missed edges, just return and seek for another valid plan - if (!processMissedEdges(left, right, edges)) { - return true; - } + processMissedEdges(left, right, edges); Memo memo = jobContext.getCascadesContext().getMemo(); emitCount += 1; @@ -165,37 +161,27 @@ public class PlanReceiver implements AbstractReceiver { return outputSlots; } - // check if the missed edges can be used to connect left and right together with edges - // return true if no missed edge or the missed edge can be used to connect left and right - // the returned edges includes missed edges if there is any. - private boolean processMissedEdges(long left, long right, List<Edge> edges) { - boolean canAddMisssedEdges = true; - - // find all reference nodes assume left and right sub graph is connected + // add any missed edge into edges to connect left and right + private void processMissedEdges(long left, long right, List<Edge> edges) { + // find all used edges BitSet usedEdgesBitmap = new BitSet(); usedEdgesBitmap.or(usdEdges.get(left)); usedEdgesBitmap.or(usdEdges.get(right)); edges.forEach(edge -> usedEdgesBitmap.set(edge.getIndex())); - long allReferenceNodes = getAllReferenceNodes(usedEdgesBitmap); - // check all edges - // the edge is a missed edge if the edge is not used and its reference nodes is a subset of allReferenceNodes + // find all referenced nodes + long allReferenceNodes = LongBitmap.or(left, right); + + // find the edge which is not in usedEdgesBitmap and its referenced nodes is subset of allReferenceNodes for (Edge edge : hyperGraph.getEdges()) { - if (LongBitmap.isSubset(edge.getReferenceNodes(), allReferenceNodes) && !usedEdgesBitmap.get( - edge.getIndex())) { - // check the missed edge can be used to connect left and right together with edges - // if the missed edge meet the 2 conditions, it is a valid edge - // 1. the edge's left child's referenced nodes is subset of the left - // 2. the edge's original right node is subset of right - canAddMisssedEdges = canAddMisssedEdges && LongBitmap.isSubset(edge.getLeft(), - left) && LongBitmap.isSubset(edge.getOriginalRight(), right); - - // always add the missed edge to edges - // because the caller will return immediately if canAddMisssedEdges is false + long referenceNodes = + LongBitmap.newBitmapUnion(edge.getOriginalLeft(), edge.getOriginalRight()); + if (LongBitmap.isSubset(referenceNodes, allReferenceNodes) + && !usedEdgesBitmap.get(edge.getIndex())) { + // add the missed edge to edges edges.add(edge); } } - return canAddMisssedEdges; } private long getAllReferenceNodes(BitSet edgesBitmap) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java index 5bd11c71e7..5bc32cb64f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Pair; import org.apache.doris.nereids.cost.Cost; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; @@ -368,8 +369,11 @@ public class Group { public boolean isInnerJoinGroup() { Plan plan = getLogicalExpression().getPlan(); if (plan instanceof LogicalJoin) { - // Right now, we only support inner join - return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN; + // Right now, we only support inner join with some join conditions + return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN + && (((LogicalJoin) plan).getOtherJoinConjuncts().isEmpty() + || !(((LogicalJoin) plan).getOtherJoinConjuncts() + .get(0) instanceof Literal)); } return false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java index f8d7694dcc..bb7b540f78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java @@ -524,7 +524,9 @@ public class Memo { } public Group newGroup(LogicalProperties logicalProperties) { - return new Group(groupIdGenerator.getNextId(), logicalProperties); + Group group = new Group(groupIdGenerator.getNextId(), logicalProperties); + groups.put(group.getGroupId(), group); + return group; } // This function is used to copy new group expression diff --git a/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy b/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy new file mode 100644 index 0000000000..d30ddef991 --- /dev/null +++ b/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_dp_join_reorder") { + sql """set enable_nereids_planner=true""" + sql """set enable_dphyp_optimizer=true""" + sql """set enable_fallback_to_original_planner=false""" + sql "use regression_test_tpcds_sf1_p1" + + explain { + sql(""" + WITH + cs_ui AS ( + SELECT + cs_item_sk + , sum(cs_ext_list_price) sale + , sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)) refund + FROM + catalog_sales + , catalog_returns + WHERE (cs_item_sk = cr_item_sk) + AND (cs_order_number = cr_order_number) + GROUP BY cs_item_sk + HAVING (sum(cs_ext_list_price) > (2 * sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)))) + ), + cross_sales AS ( + SELECT + i_product_name product_name + , i_item_sk item_sk + , s_store_name store_name + , s_zip store_zip + , ad1.ca_street_number b_street_number + , ad1.ca_street_name b_street_name + , ad1.ca_city b_city + , ad1.ca_zip b_zip + , ad2.ca_street_number c_street_number + , ad2.ca_street_name c_street_name + , ad2.ca_city c_city + , ad2.ca_zip c_zip + , d1.d_year syear + , d2.d_year fsyear + , d3.d_year s2year + FROM + store_sales + , store_returns + , cs_ui + , date_dim d1 + , date_dim d2 + , date_dim d3 + , store + , customer + , customer_demographics cd1 + , customer_demographics cd2 + , promotion + , household_demographics hd1 + , household_demographics hd2 + , customer_address ad1 + , customer_address ad2 + , income_band ib1 + , income_band ib2 + , item + WHERE (ss_store_sk = s_store_sk) + AND (ss_sold_date_sk = d1.d_date_sk) + AND (ss_customer_sk = c_customer_sk) + AND (ss_cdemo_sk = cd1.cd_demo_sk) + AND (ss_hdemo_sk = hd1.hd_demo_sk) + AND (ss_addr_sk = ad1.ca_address_sk) + AND (ss_item_sk = i_item_sk) + AND (ss_item_sk = sr_item_sk) + AND (ss_ticket_number = sr_ticket_number) + AND (ss_item_sk = cs_ui.cs_item_sk) + AND (c_current_cdemo_sk = cd2.cd_demo_sk) + AND (c_current_hdemo_sk = hd2.hd_demo_sk) + AND (c_current_addr_sk = ad2.ca_address_sk) + AND (c_first_sales_date_sk = d2.d_date_sk) + AND (c_first_shipto_date_sk = d3.d_date_sk) + AND (ss_promo_sk = p_promo_sk) + AND (hd1.hd_income_band_sk = ib1.ib_income_band_sk) + AND (hd2.hd_income_band_sk = ib2.ib_income_band_sk) + AND (cd1.cd_marital_status <> cd2.cd_marital_status) + AND (i_color IN ('purple' , 'burlywood' , 'indian' , 'spring' , 'floral' , 'medium')) + AND (i_current_price BETWEEN 64 AND (64 + 10)) + AND (i_current_price BETWEEN (64 + 1) AND (64 + 15)) + ) + SELECT + cs1.product_name + , cs1.store_name + , cs1.store_zip + , cs1.b_street_number + , cs1.b_street_name + , cs1.b_city + , cs1.b_zip + , cs1.c_street_number + , cs1.c_street_name + , cs1.c_city + , cs1.c_zip + , cs1.syear + , cs2.syear + FROM + cross_sales cs1 + , cross_sales cs2 + , cross_sales cs3 + WHERE (cs1.item_sk = cs2.item_sk) + AND (cs1.syear = 1999) + AND (cs2.syear = (1999 + 1)) + AND (cs1.store_name = cs2.store_name) + AND (cs1.store_zip = cs2.store_zip) + AND (cs1.store_name = cs3.store_name) + AND (cs1.store_zip = cs3.store_zip); + """) + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org