This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 13e7926d659 [fix](Nereids) ban right outer, right anti, full outer
with bucket shuffle (#26529) (#26702)
13e7926d659 is described below
commit 13e7926d659073e2b44d3ec95bab0eee5b221b0d
Author: morrySnow <[email protected]>
AuthorDate: Thu Nov 9 23:40:03 2023 +0800
[fix](Nereids) ban right outer, right anti, full outer with bucket shuffle
(#26529) (#26702)
pick from master
PR: #26529
commit id: f80495da8374a73409cbf61f3ca2fe371b89fc30
if left bucket has no data, we do not generate left bucket instance.
These join should reserve all right side data. But because left instance
is not exists. So right data will be discard since no dest be set.
We ban these join temporarily until we could generate all instance
for left side in Coordinator.
---
.../properties/ChildrenPropertiesRegulator.java | 27 ++++++++++++++++++----
.../nereids_tpcds_shape_sf100_p0/shape/query72.out | 5 ++--
.../nereids_tpcds_shape_sf100_p0/shape/query80.out | 9 +++++---
.../suites/nereids_p0/join/test_outer_join.groovy | 26 +++++++++++++++++++++
4 files changed, 57 insertions(+), 10 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
index e969e7e878b..b0bdf731e75 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
@@ -30,6 +30,7 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import
org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinction;
import org.apache.doris.nereids.trees.plans.AggMode;
+import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.SortPhase;
import org.apache.doris.nereids.trees.plans.physical.AbstractPhysicalSort;
@@ -169,6 +170,12 @@ public class ChildrenPropertiesRegulator extends
PlanVisitor<Boolean, Void> {
return true;
}
+ private boolean couldNotRightBucketShuffleJoin(JoinType joinType) {
+ return joinType == JoinType.RIGHT_ANTI_JOIN
+ || joinType == JoinType.RIGHT_OUTER_JOIN
+ || joinType == JoinType.FULL_OUTER_JOIN;
+ }
+
@Override
public Boolean visitPhysicalHashJoin(PhysicalHashJoin<? extends Plan, ?
extends Plan> hashJoin,
Void context) {
@@ -198,12 +205,22 @@ public class ChildrenPropertiesRegulator extends
PlanVisitor<Boolean, Void> {
Optional<PhysicalProperties> updatedForLeft = Optional.empty();
Optional<PhysicalProperties> updatedForRight = Optional.empty();
- if ((leftHashSpec.getShuffleType() == ShuffleType.NATURAL
- && rightHashSpec.getShuffleType() == ShuffleType.NATURAL)) {
+ if (JoinUtils.couldColocateJoin(leftHashSpec, rightHashSpec)) {
// check colocate join with scan
- if (JoinUtils.couldColocateJoin(leftHashSpec, rightHashSpec)) {
- return true;
- }
+ return true;
+ } else if (couldNotRightBucketShuffleJoin(hashJoin.getJoinType())) {
+ // right anti, right outer, full outer join could not do bucket
shuffle join
+ // TODO remove this after we refactor coordinator
+ updatedForLeft = Optional.of(calAnotherSideRequired(
+ ShuffleType.EXECUTION_BUCKETED, leftHashSpec, leftHashSpec,
+ (DistributionSpecHash)
requiredProperties.get(0).getDistributionSpec(),
+ (DistributionSpecHash)
requiredProperties.get(0).getDistributionSpec()));
+ updatedForRight = Optional.of(calAnotherSideRequired(
+ ShuffleType.EXECUTION_BUCKETED, leftHashSpec,
rightHashSpec,
+ (DistributionSpecHash)
requiredProperties.get(0).getDistributionSpec(),
+ (DistributionSpecHash)
requiredProperties.get(1).getDistributionSpec()));
+ } else if ((leftHashSpec.getShuffleType() == ShuffleType.NATURAL
+ && rightHashSpec.getShuffleType() == ShuffleType.NATURAL)) {
updatedForRight = Optional.of(calAnotherSideRequired(
ShuffleType.STORAGE_BUCKETED, leftHashSpec, rightHashSpec,
(DistributionSpecHash)
requiredProperties.get(0).getDistributionSpec(),
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
index 58fd6f5c4a8..01fd2572b9e 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
@@ -9,8 +9,9 @@ PhysicalResultSink
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk =
catalog_sales.cs_item_sk)(catalog_returns.cr_order_number =
catalog_sales.cs_order_number)
-------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_returns]
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[catalog_returns]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk =
promotion.p_promo_sk)
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
index 8911d3a81e7..6855da619e9 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
@@ -16,7 +16,8 @@ PhysicalResultSink
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk
= store_returns.sr_item_sk)(store_sales.ss_ticket_number =
store_returns.sr_ticket_number)
---------------------------------PhysicalOlapScan[store_returns]
+--------------------------------PhysicalDistribute
+----------------------------------PhysicalOlapScan[store_returns]
--------------------------------PhysicalDistribute
----------------------------------hashJoin[INNER_JOIN](store_sales.ss_store_sk
= store.s_store_sk)
------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk
= item.i_item_sk)
@@ -42,7 +43,8 @@ PhysicalResultSink
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_catalog_page_sk
= catalog_page.cp_catalog_page_sk)
--------------------------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk
= catalog_returns.cr_item_sk)(catalog_sales.cs_order_number =
catalog_returns.cr_order_number)
-----------------------------------PhysicalOlapScan[catalog_returns]
+----------------------------------PhysicalDistribute
+------------------------------------PhysicalOlapScan[catalog_returns]
----------------------------------PhysicalDistribute
------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk
= item.i_item_sk)
--------------------------------------PhysicalDistribute
@@ -66,7 +68,8 @@ PhysicalResultSink
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------hashJoin[RIGHT_OUTER_JOIN](web_sales.ws_item_sk
= web_returns.wr_item_sk)(web_sales.ws_order_number =
web_returns.wr_order_number)
---------------------------------PhysicalOlapScan[web_returns]
+--------------------------------PhysicalDistribute
+----------------------------------PhysicalOlapScan[web_returns]
--------------------------------PhysicalDistribute
----------------------------------hashJoin[INNER_JOIN](web_sales.ws_web_site_sk
= web_site.web_site_sk)
------------------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk
= item.i_item_sk)
diff --git a/regression-test/suites/nereids_p0/join/test_outer_join.groovy
b/regression-test/suites/nereids_p0/join/test_outer_join.groovy
index 3dc132d08ef..562326175e9 100644
--- a/regression-test/suites/nereids_p0/join/test_outer_join.groovy
+++ b/regression-test/suites/nereids_p0/join/test_outer_join.groovy
@@ -20,6 +20,7 @@ suite("test_outer_join", "nereids_p0") {
sql "SET enable_fallback_to_original_planner=false"
def tbl1 = "test_outer_join1"
def tbl2 = "test_outer_join2"
+ def tbl3 = "test_outer_join3"
sql "DROP TABLE IF EXISTS ${tbl1}"
sql """
@@ -37,6 +38,15 @@ suite("test_outer_join", "nereids_p0") {
DISTRIBUTED BY RANDOM BUCKETS 30
PROPERTIES ("replication_num" = "1");
"""
+
+ sql "DROP TABLE IF EXISTS ${tbl3}"
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tbl3} (
+ c0 DECIMALV3(8,3)
+ )
+ DISTRIBUTED BY HASH (c0) BUCKETS 1 PROPERTIES ("replication_num" =
"1");
+ """
+
sql """INSERT INTO ${tbl2} (c0) VALUES ('dr'), ('x7Tq'), ('');"""
sql """INSERT INTO ${tbl1} (c0) VALUES (0.47683432698249817),
(0.8864791393280029);"""
sql """INSERT INTO ${tbl1} (c0) VALUES (0.11287713050842285);"""
@@ -56,6 +66,22 @@ suite("test_outer_join", "nereids_p0") {
qt_join """
SELECT * FROM ${tbl2} LEFT OUTER JOIN ${tbl1} ON (('') like
('15DScmSM')) WHERE ('abc' NOT LIKE 'abc');
"""
+
+ sql "set disable_join_reorder=true"
+ explain {
+ sql "SELECT * FROM ${tbl1} RIGHT OUTER JOIN ${tbl3} ON ${tbl1}.c0 =
${tbl3}.c0"
+ contains "RIGHT OUTER JOIN(PARTITIONED)"
+ }
+ explain {
+ sql "SELECT * FROM ${tbl1} RIGHT ANTI JOIN ${tbl3} ON ${tbl1}.c0 =
${tbl3}.c0"
+ contains "RIGHT ANTI JOIN(PARTITIONED)"
+ }
+ explain {
+ sql "SELECT * FROM ${tbl1} FULL OUTER JOIN ${tbl3} ON ${tbl1}.c0 =
${tbl3}.c0"
+ contains "FULL OUTER JOIN(PARTITIONED)"
+ }
+
sql "DROP TABLE IF EXISTS ${tbl1}"
sql "DROP TABLE IF EXISTS ${tbl2}"
+ sql "DROP TABLE IF EXISTS ${tbl3}"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]