This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d0a8cd0fc5 [fix](nereids) dphyper join reorder may lost some join 
conjuncts (#19318)
d0a8cd0fc5 is described below

commit d0a8cd0fc5ad8febe6afad8b7570a10b722c6419
Author: starocean999 <40539150+starocean...@users.noreply.github.com>
AuthorDate: Wed May 10 19:02:35 2023 +0800

    [fix](nereids) dphyper join reorder may lost some join conjuncts (#19318)
---
 .../org/apache/doris/nereids/NereidsPlanner.java   |  10 +-
 .../hypergraph/receiver/PlanReceiver.java          |  40 +++----
 .../java/org/apache/doris/nereids/memo/Group.java  |   8 +-
 .../java/org/apache/doris/nereids/memo/Memo.java   |   4 +-
 .../functions_test/test_dpjoin_reorder.groovy      | 126 +++++++++++++++++++++
 5 files changed, 157 insertions(+), 31 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index e85babed40..7bf0339488 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -54,6 +54,7 @@ import org.apache.doris.planner.ScanNode;
 import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import io.opentelemetry.api.trace.Span;
@@ -270,16 +271,23 @@ public class NereidsPlanner extends Planner {
 
     private void dpHypOptimize() {
         Group root = getRoot();
+        boolean changeRoot = false;
         if (root.isInnerJoinGroup()) {
             // If the root group is join group, DPHyp can change the root 
group.
             // To keep the root group is not changed, we add a project 
operator above join
             List<NamedExpression> outputs = 
ImmutableList.copyOf(root.getLogicalExpression().getPlan().getOutput());
             LogicalPlan plan = new LogicalProject<>(outputs, 
root.getLogicalExpression().getPlan());
-            CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan, 
null, false);
+            CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan, 
null, true);
             root = copyInResult.correspondingExpression.getOwnerGroup();
+            Preconditions.checkArgument(copyInResult.generateNewExpression,
+                    "the top project node can't be generated for 
dpHypOptimize");
+            changeRoot = true;
         }
         cascadesContext.pushJob(new JoinOrderJob(root, 
cascadesContext.getCurrentJobContext()));
         cascadesContext.getJobScheduler().executeJobPool(cascadesContext);
+        if (changeRoot) {
+            
cascadesContext.getMemo().setRoot(root.getLogicalExpression().child(0));
+        }
     }
 
     /**
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
index 892e963810..5f36971fd2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
@@ -102,11 +102,7 @@ public class PlanReceiver implements AbstractReceiver {
         Preconditions.checkArgument(planTable.containsKey(left));
         Preconditions.checkArgument(planTable.containsKey(right));
 
-        // check if the missed edges can be correctly connected by add it to 
edges
-        // if not, the plan is invalid because of the missed edges, just 
return and seek for another valid plan
-        if (!processMissedEdges(left, right, edges)) {
-            return true;
-        }
+        processMissedEdges(left, right, edges);
 
         Memo memo = jobContext.getCascadesContext().getMemo();
         emitCount += 1;
@@ -165,37 +161,27 @@ public class PlanReceiver implements AbstractReceiver {
         return outputSlots;
     }
 
-    // check if the missed edges can be used to connect left and right 
together with edges
-    // return true if no missed edge or the missed edge can be used to connect 
left and right
-    // the returned edges includes missed edges if there is any.
-    private boolean processMissedEdges(long left, long right, List<Edge> 
edges) {
-        boolean canAddMisssedEdges = true;
-
-        // find all reference nodes assume left and right sub graph is 
connected
+    // add any missed edge into edges to connect left and right
+    private void processMissedEdges(long left, long right, List<Edge> edges) {
+        // find all used edges
         BitSet usedEdgesBitmap = new BitSet();
         usedEdgesBitmap.or(usdEdges.get(left));
         usedEdgesBitmap.or(usdEdges.get(right));
         edges.forEach(edge -> usedEdgesBitmap.set(edge.getIndex()));
-        long allReferenceNodes = getAllReferenceNodes(usedEdgesBitmap);
 
-        // check all edges
-        // the edge is a missed edge if the edge is not used and its reference 
nodes is a subset of allReferenceNodes
+        // find all referenced nodes
+        long allReferenceNodes = LongBitmap.or(left, right);
+
+        // find the edge which is not in usedEdgesBitmap and its referenced 
nodes is subset of allReferenceNodes
         for (Edge edge : hyperGraph.getEdges()) {
-            if (LongBitmap.isSubset(edge.getReferenceNodes(), 
allReferenceNodes) && !usedEdgesBitmap.get(
-                    edge.getIndex())) {
-                // check the missed edge can be used to connect left and right 
together with edges
-                // if the missed edge meet the 2 conditions, it is a valid edge
-                // 1. the edge's left child's referenced nodes is subset of 
the left
-                // 2. the edge's original right node is subset of right
-                canAddMisssedEdges = canAddMisssedEdges && 
LongBitmap.isSubset(edge.getLeft(),
-                        left) && LongBitmap.isSubset(edge.getOriginalRight(), 
right);
-
-                // always add the missed edge to edges
-                // because the caller will return immediately if 
canAddMisssedEdges is false
+            long referenceNodes =
+                    LongBitmap.newBitmapUnion(edge.getOriginalLeft(), 
edge.getOriginalRight());
+            if (LongBitmap.isSubset(referenceNodes, allReferenceNodes)
+                    && !usedEdgesBitmap.get(edge.getIndex())) {
+                // add the missed edge to edges
                 edges.add(edge);
             }
         }
-        return canAddMisssedEdges;
     }
 
     private long getAllReferenceNodes(BitSet edgesBitmap) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
index 5bd11c71e7..5bc32cb64f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.cost.Cost;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.properties.PhysicalProperties;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.plans.JoinType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
@@ -368,8 +369,11 @@ public class Group {
     public boolean isInnerJoinGroup() {
         Plan plan = getLogicalExpression().getPlan();
         if (plan instanceof LogicalJoin) {
-            // Right now, we only support inner join
-            return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN;
+            // Right now, we only support inner join with some join conditions
+            return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN
+                    && (((LogicalJoin) plan).getOtherJoinConjuncts().isEmpty()
+                            || !(((LogicalJoin) plan).getOtherJoinConjuncts()
+                                    .get(0) instanceof Literal));
         }
         return false;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
index f8d7694dcc..bb7b540f78 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
@@ -524,7 +524,9 @@ public class Memo {
     }
 
     public Group newGroup(LogicalProperties logicalProperties) {
-        return new Group(groupIdGenerator.getNextId(), logicalProperties);
+        Group group = new Group(groupIdGenerator.getNextId(), 
logicalProperties);
+        groups.put(group.getGroupId(), group);
+        return group;
     }
 
     // This function is used to copy new group expression
diff --git 
a/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy 
b/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy
new file mode 100644
index 0000000000..d30ddef991
--- /dev/null
+++ 
b/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_dp_join_reorder") {
+    sql """set enable_nereids_planner=true"""
+    sql """set enable_dphyp_optimizer=true"""
+    sql """set enable_fallback_to_original_planner=false"""
+    sql "use regression_test_tpcds_sf1_p1"
+
+    explain {
+        sql("""
+            WITH
+                cs_ui AS (
+                    SELECT
+                        cs_item_sk
+                    , sum(cs_ext_list_price) sale
+                    , sum(((cr_refunded_cash + cr_reversed_charge) + 
cr_store_credit)) refund
+                    FROM
+                        catalog_sales
+                    , catalog_returns
+                    WHERE (cs_item_sk = cr_item_sk)
+                        AND (cs_order_number = cr_order_number)
+                    GROUP BY cs_item_sk
+                    HAVING (sum(cs_ext_list_price) > (2 * 
sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))))
+                ), 
+                cross_sales AS (
+                    SELECT
+                        i_product_name product_name
+                    , i_item_sk item_sk
+                    , s_store_name store_name
+                    , s_zip store_zip
+                    , ad1.ca_street_number b_street_number
+                    , ad1.ca_street_name b_street_name
+                    , ad1.ca_city b_city
+                    , ad1.ca_zip b_zip
+                    , ad2.ca_street_number c_street_number
+                    , ad2.ca_street_name c_street_name
+                    , ad2.ca_city c_city
+                    , ad2.ca_zip c_zip
+                    , d1.d_year syear
+                    , d2.d_year fsyear
+                    , d3.d_year s2year
+                    FROM
+                        store_sales
+                    , store_returns
+                    , cs_ui
+                    , date_dim d1
+                    , date_dim d2
+                    , date_dim d3
+                    , store
+                    , customer
+                    , customer_demographics cd1
+                    , customer_demographics cd2
+                    , promotion
+                    , household_demographics hd1
+                    , household_demographics hd2
+                    , customer_address ad1
+                    , customer_address ad2
+                    , income_band ib1
+                    , income_band ib2
+                    , item
+                    WHERE (ss_store_sk = s_store_sk)
+                        AND (ss_sold_date_sk = d1.d_date_sk)
+                        AND (ss_customer_sk = c_customer_sk)
+                        AND (ss_cdemo_sk = cd1.cd_demo_sk)
+                        AND (ss_hdemo_sk = hd1.hd_demo_sk)
+                        AND (ss_addr_sk = ad1.ca_address_sk)
+                        AND (ss_item_sk = i_item_sk)
+                        AND (ss_item_sk = sr_item_sk)
+                        AND (ss_ticket_number = sr_ticket_number)
+                        AND (ss_item_sk = cs_ui.cs_item_sk)
+                        AND (c_current_cdemo_sk = cd2.cd_demo_sk)
+                        AND (c_current_hdemo_sk = hd2.hd_demo_sk)
+                        AND (c_current_addr_sk = ad2.ca_address_sk)
+                        AND (c_first_sales_date_sk = d2.d_date_sk)
+                        AND (c_first_shipto_date_sk = d3.d_date_sk)
+                        AND (ss_promo_sk = p_promo_sk)
+                        AND (hd1.hd_income_band_sk = ib1.ib_income_band_sk)
+                        AND (hd2.hd_income_band_sk = ib2.ib_income_band_sk)
+                        AND (cd1.cd_marital_status <> cd2.cd_marital_status)
+                        AND (i_color IN ('purple'   , 'burlywood'   , 'indian' 
  , 'spring'   , 'floral'   , 'medium'))
+                        AND (i_current_price BETWEEN 64 AND (64 + 10))
+                        AND (i_current_price BETWEEN (64 + 1) AND (64 + 15))
+                )
+            SELECT
+                cs1.product_name
+                , cs1.store_name
+                , cs1.store_zip
+                , cs1.b_street_number
+                , cs1.b_street_name
+                , cs1.b_city
+                , cs1.b_zip
+                , cs1.c_street_number
+                , cs1.c_street_name
+                , cs1.c_city
+                , cs1.c_zip
+                , cs1.syear
+                , cs2.syear
+            FROM
+                cross_sales cs1
+                , cross_sales cs2
+                , cross_sales cs3
+            WHERE (cs1.item_sk = cs2.item_sk)
+                AND (cs1.syear = 1999)
+                AND (cs2.syear = (1999 + 1))
+                AND (cs1.store_name = cs2.store_name)
+                AND (cs1.store_zip = cs2.store_zip)
+                AND (cs1.store_name = cs3.store_name)
+                AND (cs1.store_zip = cs3.store_zip);
+        """)
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to