This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new eca6bbe111b [fix](repeat plan) fix repeat output slot order 
inconsistent with its input expression (#60045)
eca6bbe111b is described below

commit eca6bbe111b53aef12676f3755e2e97a96df11b9
Author: yujun <[email protected]>
AuthorDate: Fri Jan 23 11:23:40 2026 +0800

    [fix](repeat plan) fix repeat output slot order inconsistent with its input 
expression (#60045)
    
    maybe relate PR: #21168
    
    BE requires that the repeat node's output slot order should be
    inconsistent with its input expressions.
    That is output slots = input expressions + GroupingID + other grouping
    functions.
    But physical translator not ensure this requirement. Then sometimes the
    repeat may have bad cast exception.
    
    for sql:
    SELECT 100000
    FROM db2.table_9_50_undef_partitions2_keys3_properties4_distributed_by5
    GROUP BY GROUPING SETS (
            (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
            , ()
            , (col_varchar_50__undef_signed)
            , (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
    );
    
    the above sql will have wrong ouput slot order
    
    then BE will have exceptions:
    (1105, 'errCode = 2, detailMessage = (172.20.57.146)
    [E-7412]assert cast err:[E-7412] Bad cast from 
type:doris::vectorized::ColumnVector<(doris::PrimitiveType)26> to 
doris::vectorized::ColumnStr<unsigned int>
    0#doris::Exception::Exception(int, std::basic_string_view<char, 
std::char_traits<char> > const&, bool) at 
/home/zcp/repo_center/doris_master/doris/be/src/common/exception.cpp:0\n\t1#
    ...
---
 .../glue/translator/PhysicalPlanTranslator.java    | 53 +++++++-------
 .../java/org/apache/doris/planner/RepeatNode.java  |  4 ++
 .../translator/PhysicalPlanTranslatorTest.java     | 48 +++++++++++--
 .../nereids_p0/repeat/test_repeat_output_slot.out  | 70 ++++++++++++++++++
 .../repeat/test_repeat_output_slot.groovy          | 82 ++++++++++++++++++++++
 5 files changed, 227 insertions(+), 30 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 9f3a7c9cf14..81903aa0ec6 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -2528,36 +2528,37 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
         PlanFragment inputPlanFragment = repeat.child(0).accept(this, context);
         List<List<Expr>> distributeExprLists = 
getDistributeExprs(repeat.child(0));
 
-        ImmutableSet<Expression> flattenGroupingSetExprs = ImmutableSet.copyOf(
-                ExpressionUtils.flatExpressions(repeat.getGroupingSets()));
+        List<Expression> flattenGroupingExpressions = 
repeat.getGroupByExpressions();
+        Set<Slot> preRepeatExpressions = Sets.newLinkedHashSet();
+        // keep group by expression coming first
+        for (Expression groupByExpr : flattenGroupingExpressions) {
+            // NormalizeRepeat had converted group by expression to slot
+            preRepeatExpressions.add((Slot) groupByExpr);
+        }
 
-        List<Slot> aggregateFunctionUsedSlots = repeat.getOutputExpressions()
-                .stream()
-                .filter(output -> !flattenGroupingSetExprs.contains(output))
-                .filter(output -> 
!output.containsType(GroupingScalarFunction.class))
-                .distinct()
-                .map(NamedExpression::toSlot)
+        // add aggregate function used expressions
+        for (NamedExpression outputExpr : repeat.getOutputExpressions()) {
+            if (!outputExpr.containsType(GroupingScalarFunction.class)) {
+                preRepeatExpressions.add(outputExpr.toSlot());
+            }
+        }
+
+        List<Expr> preRepeatExprs = preRepeatExpressions.stream()
+                .map(expr -> ExpressionTranslator.translate(expr, context))
                 .collect(ImmutableList.toImmutableList());
 
-        // keep flattenGroupingSetExprs comes first
-        List<Expr> preRepeatExprs = 
Stream.concat(flattenGroupingSetExprs.stream(), 
aggregateFunctionUsedSlots.stream())
-                .map(expr -> ExpressionTranslator.translate(expr, 
context)).collect(ImmutableList.toImmutableList());
-
-        // outputSlots's order need same with preRepeatExprs
-        List<Slot> outputSlots = Stream.concat(Stream
-                .concat(repeat.getOutputExpressions().stream()
-                        .filter(output -> 
flattenGroupingSetExprs.contains(output)),
-                        repeat.getOutputExpressions().stream()
-                                .filter(output -> 
!flattenGroupingSetExprs.contains(output))
-                                .filter(output -> 
!output.containsType(GroupingScalarFunction.class))
-                                .distinct()
-                       ),
-                        
Stream.concat(Stream.of(repeat.getGroupingId().toSlot()),
-                                repeat.getOutputExpressions().stream()
-                                        .filter(output -> 
output.containsType(GroupingScalarFunction.class)))
-                        )
-                
.map(NamedExpression::toSlot).collect(ImmutableList.toImmutableList());
+        // outputSlots's order must match preRepeatExprs, then grouping id, 
then grouping function slots
+        ImmutableList.Builder<Slot> outputSlotsBuilder
+                = 
ImmutableList.builderWithExpectedSize(repeat.getOutputExpressions().size() + 1);
+        outputSlotsBuilder.addAll(preRepeatExpressions);
+        outputSlotsBuilder.add(repeat.getGroupingId().toSlot());
+        for (NamedExpression outputExpr : repeat.getOutputExpressions()) {
+            if (outputExpr.containsType(GroupingScalarFunction.class)) {
+                outputSlotsBuilder.add(outputExpr.toSlot());
+            }
+        }
 
+        List<Slot> outputSlots = outputSlotsBuilder.build();
         // NOTE: we should first translate preRepeatExprs, then generate 
output tuple,
         //       or else the preRepeatExprs can not find the bottom slotRef 
and throw
         //       exception: invalid slot id
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
index 45989f2c947..acfea0381c1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
@@ -88,4 +88,8 @@ public class RepeatNode extends PlanNode {
     public boolean isSerialOperator() {
         return children.get(0).isSerialOperator();
     }
+
+    public GroupingInfo getGroupingInfo() {
+        return groupingInfo;
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
index 938187fef2b..fdf1c726cfc 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
@@ -17,6 +17,11 @@
 
 package org.apache.doris.nereids.glue.translator;
 
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.GroupingInfo;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.analysis.TupleDescriptor;
+import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.nereids.properties.DataTrait;
@@ -34,16 +39,19 @@ import 
org.apache.doris.nereids.trees.plans.physical.PhysicalFilter;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
 import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.util.PlanChecker;
 import org.apache.doris.nereids.util.PlanConstructor;
 import org.apache.doris.planner.AggregationNode;
 import org.apache.doris.planner.OlapScanNode;
 import org.apache.doris.planner.PlanFragment;
 import org.apache.doris.planner.PlanNode;
 import org.apache.doris.planner.Planner;
+import org.apache.doris.planner.RepeatNode;
 import org.apache.doris.utframe.TestWithFeService;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
 import mockit.Injectable;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
@@ -53,9 +61,18 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Optional;
+import java.util.Set;
 
 public class PhysicalPlanTranslatorTest extends TestWithFeService {
 
+    @Override
+    protected void runBeforeAll() throws Exception {
+        createDatabase("test_db");
+        createTable("create table test_db.t(a int, b int) distributed by 
hash(a) buckets 3 "
+                + "properties('replication_num' = '1');");
+        
connectContext.getSessionVariable().setDisableNereidsRules("prune_empty_partition");
+    }
+
     @Test
     public void testOlapPrune(@Injectable LogicalProperties placeHolder) 
throws Exception {
         OlapTable t1 = PlanConstructor.newOlapTable(0, "t1", 0, 
KeysType.AGG_KEYS);
@@ -93,10 +110,6 @@ public class PhysicalPlanTranslatorTest extends 
TestWithFeService {
 
     @Test
     public void testAggNeedsFinalize() throws Exception {
-        createDatabase("test_db");
-        createTable("create table test_db.t(a int, b int) distributed by 
hash(a) buckets 3 "
-                + "properties('replication_num' = '1');");
-        
connectContext.getSessionVariable().setDisableNereidsRules("prune_empty_partition");
         String querySql = "select b from test_db.t group by b";
         Planner planner = getSQLPlanner(querySql);
         Assertions.assertNotNull(planner);
@@ -125,4 +138,31 @@ public class PhysicalPlanTranslatorTest extends 
TestWithFeService {
         Assertions.assertTrue(upperNeedsFinalize,
                 "upper AggregationNode needsFinalize should be true");
     }
+
+    @Test
+    public void testRepeatInputOutputOrder() throws Exception {
+        String sql = "select grouping(a), grouping(b), grouping_id(a, b), 
sum(a + 2 * b), sum(a + 3 * b) + grouping_id(b, a, b), b, a, b, a"
+                + " from test_db.t"
+                + " group by grouping sets((a, b), (), (b), (a, b), (a + b), 
(a * b))";
+        PlanChecker.from(connectContext).checkPlannerResult(sql,
+                planner -> {
+                    Set<RepeatNode> repeatNodes = Sets.newHashSet();
+                    planner.getFragments().stream()
+                            .map(PlanFragment::getPlanRoot)
+                            .forEach(plan -> plan.collect(RepeatNode.class, 
repeatNodes));
+                    Assertions.assertEquals(1, repeatNodes.size());
+                    RepeatNode repeatNode = repeatNodes.iterator().next();
+                    GroupingInfo groupingInfo = repeatNode.getGroupingInfo();
+                    List<Expr> preRepeatExprs = 
groupingInfo.getPreRepeatExprs();
+                    TupleDescriptor outputs = 
groupingInfo.getOutputTupleDesc();
+                    for (int i = 0; i < preRepeatExprs.size(); i++) {
+                        Expr inputExpr = preRepeatExprs.get(i);
+                        Assertions.assertInstanceOf(SlotRef.class, inputExpr);
+                        Column inputColumn = ((SlotRef) inputExpr).getColumn();
+                        Column outputColumn = 
outputs.getSlots().get(i).getColumn();
+                        Assertions.assertEquals(inputColumn, outputColumn);
+                    }
+                }
+        );
+    }
 }
diff --git a/regression-test/data/nereids_p0/repeat/test_repeat_output_slot.out 
b/regression-test/data/nereids_p0/repeat/test_repeat_output_slot.out
new file mode 100644
index 00000000000..e6516a0d47c
--- /dev/null
+++ b/regression-test/data/nereids_p0/repeat/test_repeat_output_slot.out
@@ -0,0 +1,70 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_1_shape --
+PhysicalCteAnchor ( cteId=CTEId#0 )
+--PhysicalCteProducer ( cteId=CTEId#0 )
+----hashAgg[GLOBAL]
+------PhysicalProject
+--------PhysicalOlapScan[tbl_test_repeat_output_slot]
+--PhysicalResultSink
+----PhysicalProject
+------PhysicalUnion
+--------PhysicalProject
+----------hashAgg[GLOBAL]
+------------hashAgg[LOCAL]
+--------------PhysicalRepeat
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------PhysicalProject
+----------PhysicalCteConsumer ( cteId=CTEId#0 )
+
+-- !sql_1_result --
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+
+-- !sql_2_shape --
+PhysicalCteAnchor ( cteId=CTEId#0 )
+--PhysicalCteProducer ( cteId=CTEId#0 )
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------PhysicalOlapScan[tbl_test_repeat_output_slot]
+--PhysicalResultSink
+----PhysicalProject
+------PhysicalUnion
+--------PhysicalProject
+----------filter((GROUPING_PREFIX_col_varchar_50__undef_signed__index_inverted_col_datetime_6__undef_signed_col_varchar_50__undef_signed
 > 0))
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalRepeat
+------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------PhysicalEmptyRelation
+
+-- !sql_2_result --
+\N     ALL     1       6       \N      \N      \N
+\N     ALL     1       6       \N      \N      \N
+2020-01-02T00:00       ALL     1       6       \N      2020-01-02T00:00        
\N
+2020-01-02T00:00       ALL     1       6       \N      2020-01-02T00:00        
\N
+2020-01-03T00:00       ALL     1       6       \N      2020-01-03T00:00        
\N
+2020-01-03T00:00       ALL     1       6       \N      2020-01-03T00:00        
\N
+2020-01-04T00:00       ALL     1       6       \N      2020-01-04T00:00        
\N
+2020-01-04T00:00       ALL     1       6       \N      2020-01-04T00:00        
\N
+2020-01-04T00:00       ALL     1       7       \N      \N      \N
+
diff --git 
a/regression-test/suites/nereids_p0/repeat/test_repeat_output_slot.groovy 
b/regression-test/suites/nereids_p0/repeat/test_repeat_output_slot.groovy
new file mode 100644
index 00000000000..c0b9f322b6a
--- /dev/null
+++ b/regression-test/suites/nereids_p0/repeat/test_repeat_output_slot.groovy
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_repeat_output_slot") {
+    sql """
+        SET enable_fallback_to_original_planner=false;
+        SET enable_nereids_planner=true;
+        SET ignore_shape_nodes='PhysicalDistribute';
+        SET disable_nereids_rules='PRUNE_EMPTY_PARTITION';
+        SET runtime_filter_mode=OFF;
+        SET disable_join_reorder=true;
+
+        DROP TABLE IF EXISTS tbl_test_repeat_output_slot FORCE;
+
+        """
+
+    sql """
+        CREATE TABLE tbl_test_repeat_output_slot (
+            col_datetime_6__undef_signed datetime(6),
+            col_varchar_50__undef_signed varchar(50),
+            col_varchar_50__undef_signed__index_inverted varchar(50)
+        ) engine=olap
+        distributed by hash(col_datetime_6__undef_signed) buckets 10
+        properties('replication_num' = '1');
+        """
+
+    sql """
+        INSERT INTO tbl_test_repeat_output_slot VALUES
+            (null, null, null), (null, "a", "x"), (null, "a", "y"),
+            ('2020-01-02', "b", "x"), ('2020-01-02', 'a', 'x'), ('2020-01-02', 
'b', 'y'),
+            ('2020-01-03', 'a', 'x'), ('2020-01-03', 'a', 'y'), ('2020-01-03', 
'b', 'x'), ('2020-01-03', 'b', 'y'),
+            ('2020-01-04', 'a', 'x'), ('2020-01-04', 'a', 'y'), ('2020-01-04', 
'b', 'x'), ('2020-01-04', 'b', 'y');
+        """
+
+   explainAndOrderResult 'sql_1', '''
+        SELECT 100000
+        FROM tbl_test_repeat_output_slot
+        GROUP BY GROUPING SETS (
+                (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
+                , ()
+                , (col_varchar_50__undef_signed)
+                , (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
+        );
+        '''
+
+   explainAndOrderResult 'sql_2', '''
+        SELECT MAX(col_datetime_6__undef_signed) AS total_col_datetime,
+               CASE WHEN 
GROUPING(col_varchar_50__undef_signed__index_inverted) = 1 THEN 'ALL'
+                    ELSE CAST(col_varchar_50__undef_signed__index_inverted AS 
VARCHAR)
+                    END AS pretty_val,
+               IF(GROUPING_ID(col_varchar_50__undef_signed__index_inverted,
+                              col_datetime_6__undef_signed,
+                              col_varchar_50__undef_signed) > 0, 1, 0) AS 
is_agg_row,
+               GROUPING_ID(col_varchar_50__undef_signed__index_inverted,
+                           col_datetime_6__undef_signed, 
col_varchar_50__undef_signed) AS having_filter_col,
+               col_varchar_50__undef_signed__index_inverted,
+               col_datetime_6__undef_signed,
+               col_varchar_50__undef_signed
+        FROM tbl_test_repeat_output_slot
+        GROUP BY GROUPING SETS (
+                (col_varchar_50__undef_signed__index_inverted, 
col_datetime_6__undef_signed, col_varchar_50__undef_signed),
+                (),
+                (col_varchar_50__undef_signed),
+                (col_varchar_50__undef_signed__index_inverted, 
col_datetime_6__undef_signed, col_varchar_50__undef_signed),
+                (col_varchar_50__undef_signed))
+        HAVING having_filter_col > 0;
+        '''
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to