This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 2b34a393312 branch-4.0: [fix](unique function) fix push unique 
function through join #58516 (#58854)
2b34a393312 is described below

commit 2b34a393312ebaad71f67327f5322430fcebfbd7
Author: yujun <[email protected]>
AuthorDate: Wed Dec 10 10:15:30 2025 +0800

    branch-4.0: [fix](unique function) fix push unique function through join 
#58516 (#58854)
    
    cherry pick from #58516
---
 .../rules/rewrite/PushDownFilterThroughJoin.java   |  7 ++
 .../rules/rewrite/PushFilterInsideJoin.java        | 23 ++++--
 .../doris/nereids/rules/rewrite/ReorderJoin.java   | 42 ++++++++++-
 ...wn_filter_through_join_with_unique_function.out | 67 +++++++++++++++++
 ...filter_through_join_with_unique_function.groovy | 84 ++++++++++++++++++++++
 5 files changed, 217 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughJoin.java
index 6dcf1258ce5..e717a00b3db 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughJoin.java
@@ -116,6 +116,10 @@ public class PushDownFilterThroughJoin extends 
OneRewriteRuleFactory {
             Set<Expression> rightPredicates = Sets.newLinkedHashSet();
             Set<Expression> remainingPredicates = Sets.newLinkedHashSet();
             for (Expression p : filterPredicates) {
+                if (p.containsUniqueFunction()) {
+                    remainingPredicates.add(p);
+                    continue;
+                }
                 Set<Slot> slots = p.collect(SlotReference.class::isInstance);
                 if (slots.isEmpty()) {
                     leftPredicates.add(p);
@@ -154,6 +158,9 @@ public class PushDownFilterThroughJoin extends 
OneRewriteRuleFactory {
         if (!(predicate instanceof EqualTo)) {
             return false;
         }
+        if (predicate.containsUniqueFunction()) {
+            return false;
+        }
 
         EqualTo equalTo = (EqualTo) predicate;
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushFilterInsideJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushFilterInsideJoin.java
index 69d990c3bdb..c1640671d33 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushFilterInsideJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushFilterInsideJoin.java
@@ -26,8 +26,10 @@ import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
 import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.nereids.util.PlanUtils;
 
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 import java.util.List;
 import java.util.Set;
@@ -47,18 +49,31 @@ public class PushFilterInsideJoin extends 
OneRewriteRuleFactory {
                 .when(filter -> filter.child().getJoinType().isCrossJoin()
                         || filter.child().getJoinType().isInnerJoin())
                 .then(filter -> {
-                    List<Expression> otherConditions = 
Lists.newArrayList(filter.getConjuncts());
                     LogicalJoin<Plan, Plan> join = filter.child();
                     Set<Slot> childOutput = join.getOutputSet();
-                    if 
(ExpressionUtils.getInputSlotSet(otherConditions).stream()
+                    if 
(ExpressionUtils.getInputSlotSet(filter.getConjuncts()).stream()
                             .filter(MarkJoinSlotReference.class::isInstance)
                             .anyMatch(slot -> childOutput.contains(slot))) {
                         return null;
                     }
+                    Set<Expression> remainConditions = Sets.newLinkedHashSet();
+                    List<Expression> otherConditions = 
Lists.newArrayListWithExpectedSize(
+                            filter.getConjuncts().size() + 
join.getOtherJoinConjuncts().size());
+                    for (Expression expr : filter.getConjuncts()) {
+                        if (expr.containsUniqueFunction()) {
+                            remainConditions.add(expr);
+                        } else {
+                            otherConditions.add(expr);
+                        }
+                    }
+                    if (otherConditions.isEmpty()) {
+                        return null;
+                    }
                     otherConditions.addAll(join.getOtherJoinConjuncts());
-                    return new LogicalJoin<>(join.getJoinType(), 
join.getHashJoinConjuncts(),
+                    return PlanUtils.filterOrSelf(remainConditions, new 
LogicalJoin<>(
+                            join.getJoinType(), join.getHashJoinConjuncts(),
                             otherConditions, join.getDistributeHint(), 
join.getMarkJoinSlotReference(),
-                            join.children(), join.getJoinReorderContext());
+                            join.children(), join.getJoinReorderContext()));
                 }).toRule(RuleType.PUSH_FILTER_INSIDE_JOIN);
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java
index 0a0971e3de2..f5bb7323155 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ReorderJoin.java
@@ -40,6 +40,7 @@ import com.google.common.collect.ImmutableList.Builder;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -59,6 +60,14 @@ import java.util.stream.Collectors;
  *
  * output:
  * SELECT * FROM t1 JOIN t3 ON t1.id=t3.id JOIN t2 ON t2.id=t3.id
+ *
+ * NOTICE:
+ * ReorderJoin will extract all filter's conjuncts together, then reorder the 
joins.
+ * But if the conjuncts contains unique function, after reordering, the result 
is error,
+ * it may generate more or less expected output rows than the origin 
expression.
+ * We handle the top filter with unique function, but skip the below filters 
with unique function.
+ * For the below filters contains unique function, it should not reorder their 
joins with other joins.
+ *
  * </pre>
  * </p>
  * Using the {@link MultiJoin} to complete this task.
@@ -83,17 +92,38 @@ public class ReorderJoin extends OneRewriteRuleFactory {
                         || ((LogicalJoin<?, ?>) 
ctx.root.child()).isLeadingJoin()) {
                     return null;
                 }
+
                 LogicalFilter<Plan> filter = ctx.root;
+                Set<Expression> uniqueExprConjuncts = Sets.newHashSet();
+                Set<Expression> nonUniqueExprConjuncts = 
Sets.newHashSetWithExpectedSize(filter.getConjuncts().size());
+                for (Expression conjunct : filter.getConjuncts()) {
+                    // after reorder and push down the random() down to lower 
join,
+                    // the rewritten sql may have less rows() than the origin 
sql
+                    if (conjunct.containsUniqueFunction()) {
+                        uniqueExprConjuncts.add(conjunct);
+                    } else {
+                        nonUniqueExprConjuncts.add(conjunct);
+                    }
+                }
+                if (nonUniqueExprConjuncts.isEmpty()) {
+                    return null;
+                }
+                LogicalFilter<Plan> nonUniqueExprFilter = 
uniqueExprConjuncts.isEmpty()
+                        ? filter : 
filter.withConjunctsAndChild(nonUniqueExprConjuncts, filter.child());
 
                 Map<Plan, DistributeHint> planToHintType = Maps.newHashMap();
-                Plan plan = joinToMultiJoin(filter, planToHintType);
+                Plan plan = joinToMultiJoin(nonUniqueExprFilter, 
planToHintType);
                 Preconditions.checkState(plan instanceof MultiJoin, "join to 
multi join should return MultiJoin,"
                         + " but return plan is " + plan.getType());
                 MultiJoin multiJoin = (MultiJoin) plan;
                 ctx.statementContext.addJoinFilters(multiJoin.getJoinFilter());
                 
ctx.statementContext.setMaxNAryInnerJoin(multiJoin.children().size());
                 Plan after = multiJoinToJoin(multiJoin, planToHintType);
-                return after;
+                if (after != null && !after.equals(nonUniqueExprFilter)) {
+                    return PlanUtils.filterOrSelf(uniqueExprConjuncts, after);
+                } else {
+                    return null;
+                }
             }).toRule(RuleType.REORDER_JOIN);
     }
 
@@ -118,6 +148,14 @@ public class ReorderJoin extends OneRewriteRuleFactory {
         // Implicit rely on {rule: MergeFilters}, so don't exist 
filter--filter--join.
         if (plan instanceof LogicalFilter) {
             LogicalFilter<?> filter = (LogicalFilter<?>) plan;
+            for (Expression conjunct : filter.getConjuncts()) {
+                // (t1 join t2) join t3 where t1.a = t3.x + random()
+                // if reorder, then may have ((t1 join t3) on t1.a = t3.x + 
random()) join t2,
+                // then the reorder result will less rows than origin.
+                if (conjunct.containsUniqueFunction()) {
+                    return plan;
+                }
+            }
             joinFilter.addAll(filter.getConjuncts());
             join = (LogicalJoin<?, ?>) filter.child();
         } else {
diff --git 
a/regression-test/data/nereids_rules_p0/unique_function/push_down_filter_through_join_with_unique_function.out
 
b/regression-test/data/nereids_rules_p0/unique_function/push_down_filter_through_join_with_unique_function.out
new file mode 100644
index 00000000000..f32eebd1835
--- /dev/null
+++ 
b/regression-test/data/nereids_rules_p0/unique_function/push_down_filter_through_join_with_unique_function.out
@@ -0,0 +1,67 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !push_down_filter_through_join_1 --
+PhysicalResultSink
+--filter((random() > 0.1))
+----NestedLoopJoin[CROSS_JOIN]
+------PhysicalProject[t1.id]
+--------PhysicalOlapScan[t1]
+------PhysicalProject[t2.id]
+--------PhysicalOlapScan[t2]
+
+-- !push_down_filter_through_join_2 --
+PhysicalResultSink
+--PhysicalProject[t1.id, t2.id]
+----filter((random() > 0.1))
+------hashJoin[INNER_JOIN] hashCondition=((expr_cast(id as BIGINT) = 
expr_(cast(id as BIGINT) + 10))) otherCondition=()
+--------PhysicalProject[cast(id as BIGINT) AS `expr_cast(id as BIGINT)`, t1.id]
+----------filter((t1.id > 100))
+------------PhysicalOlapScan[t1]
+--------PhysicalProject[(cast(id as BIGINT) + 10) AS `expr_(cast(id as BIGINT) 
+ 10)`, t2.id]
+----------PhysicalOlapScan[t2]
+
+-- !push_down_filter_through_join_3 --
+PhysicalResultSink
+--filter(((cast(id as BIGINT) + random(1, 100)) > 100))
+----NestedLoopJoin[CROSS_JOIN]
+------PhysicalProject[t1.id]
+--------PhysicalOlapScan[t1]
+------PhysicalProject[t2.id]
+--------PhysicalOlapScan[t2]
+
+-- !reorder_join_1 --
+PhysicalResultSink
+--PhysicalProject[t1.id, t2.id]
+----filter(((cast(id as BIGINT) + random(1, 100)) = cast(id as BIGINT)))
+------hashJoin[INNER_JOIN] hashCondition=((expr_(cast(id as BIGINT) * 2) = 
expr_(cast(id as BIGINT) * 5))) otherCondition=()
+--------PhysicalProject[(cast(id as BIGINT) * 2) AS `expr_(cast(id as BIGINT) 
* 2)`, t1.id]
+----------PhysicalOlapScan[t1]
+--------PhysicalProject[(cast(id as BIGINT) * 5) AS `expr_(cast(id as BIGINT) 
* 5)`, t2.id]
+----------PhysicalOlapScan[t2]
+
+-- !reorder_join_2 --
+PhysicalResultSink
+--filter(((cast(id as BIGINT) + random(1, 100)) = cast(id as BIGINT)))
+----NestedLoopJoin[CROSS_JOIN]
+------PhysicalProject[t1.id, t2.id]
+--------hashJoin[INNER_JOIN] hashCondition=((expr_(cast(id as BIGINT) * 2) = 
expr_(cast(id as BIGINT) * 5))) otherCondition=()
+----------PhysicalProject[(cast(id as BIGINT) * 2) AS `expr_(cast(id as 
BIGINT) * 2)`, t1.id]
+------------PhysicalOlapScan[t1]
+----------PhysicalProject[(cast(id as BIGINT) * 5) AS `expr_(cast(id as 
BIGINT) * 5)`, t2.id]
+------------PhysicalOlapScan[t2]
+------PhysicalProject[t3.id]
+--------PhysicalOlapScan[t2]
+
+-- !reorder_join_3 --
+PhysicalResultSink
+--PhysicalProject[t1.id, t2.id, t3.id]
+----filter((random() > 10.0))
+------hashJoin[INNER_JOIN] hashCondition=((expr_(cast(id as BIGINT) * 2) = 
expr_(cast(id as BIGINT) * 5))) otherCondition=()
+--------PhysicalProject[(cast(id as BIGINT) * 2) AS `expr_(cast(id as BIGINT) 
* 2)`, t1.id, t2.id]
+----------NestedLoopJoin[CROSS_JOIN]
+------------PhysicalProject[t1.id]
+--------------PhysicalOlapScan[t1]
+------------PhysicalProject[t2.id]
+--------------PhysicalOlapScan[t2]
+--------PhysicalProject[(cast(id as BIGINT) * 5) AS `expr_(cast(id as BIGINT) 
* 5)`, t3.id]
+----------PhysicalOlapScan[t2]
+
diff --git 
a/regression-test/suites/nereids_rules_p0/unique_function/push_down_filter_through_join_with_unique_function.groovy
 
b/regression-test/suites/nereids_rules_p0/unique_function/push_down_filter_through_join_with_unique_function.groovy
new file mode 100644
index 00000000000..1afbef840d2
--- /dev/null
+++ 
b/regression-test/suites/nereids_rules_p0/unique_function/push_down_filter_through_join_with_unique_function.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite('push_down_filter_through_join_with_unique_function') {
+    sql 'SET enable_nereids_planner=true'
+    sql 'SET runtime_filter_mode=OFF'
+    sql 'SET disable_join_reorder=true'
+    sql 'SET enable_fallback_to_original_planner=false'
+    sql "SET ignore_shape_nodes='PhysicalDistribute'"
+    sql "SET detail_shape_nodes='PhysicalProject'"
+    sql 'SET disable_nereids_rules=PRUNE_EMPTY_PARTITION'
+
+    qt_push_down_filter_through_join_1 '''
+        explain shape plan
+        select t1.id, t2.id
+        from t1 join t2
+        where rand() > 0.1
+        '''
+
+    qt_push_down_filter_through_join_2 '''
+        explain shape plan
+        select t1.id, t2.id
+        from t1 join t2
+        where rand() > 0.1 and t1.id = t2.id + 10 and t1.id > 100
+        '''
+
+    qt_push_down_filter_through_join_3 '''
+        explain shape plan
+        select t1.id, t2.id
+        from t1 join t2
+        where t1.id + rand(1, 100) > 100
+        '''
+
+    // sql 'SET disable_join_reorder=false'
+    // Well the reorder join need disable_join_reorder=false,
+    // But if we enable this var, the test is not stable,
+    // the p0 test pipeline may change the two table join order sometimes.
+
+    qt_reorder_join_1 '''
+         explain shape plan
+         select t1.id, t2.id
+         from t1 join t2
+         where t1.id + rand(1, 100) = t2.id and t1.id * 2 = t2.id * 5
+         '''
+
+    qt_reorder_join_2 '''
+         explain shape plan
+         select t1.id, t2.id, t3.id
+         from t1, t2, t2 as t3
+         where t1.id + rand(1, 100) = t3.id and t1.id * 2 = t2.id * 5
+         '''
+
+    // if set disable_join_reorder=false,
+    // | PhysicalResultSink
+    //  --filter((random() > 10.0))                                            
                                                                  |
+    //  ----NestedLoopJoin[CROSS_JOIN]                                         
                                                                  | | 
------PhysicalProject                                                           
                                                         |
+    //  --------hashJoin[INNER_JOIN broadcast] hashCondition=((expr_(cast(id 
as BIGINT) * 2) = expr_(cast(id as BIGINT) * 5))) otherCondition=() |
+    //  ----------PhysicalProject                                              
                                                                  |
+    //  ------------PhysicalOlapScan[t1]                                       
                                                                  |
+    //  ----------PhysicalProject                                              
                                                                  |
+    //  ------------PhysicalOlapScan[t2(t3)]                                   
                                                                  |
+    //  ------PhysicalProject                                                  
                                                                  |
+    //  --------PhysicalOlapScan[t2]                                           
                                                                 |
+    qt_reorder_join_3 '''
+         explain shape plan
+         select t1.id, t2.id, t3.id
+         from t1, t2, t2 as t3
+         where random() > 10 and t1.id * 2 = t3.id * 5
+         '''
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to