This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new eca6bbe111b [fix](repeat plan) fix repeat output slot order
inconsistent with its input expression (#60045)
eca6bbe111b is described below
commit eca6bbe111b53aef12676f3755e2e97a96df11b9
Author: yujun <[email protected]>
AuthorDate: Fri Jan 23 11:23:40 2026 +0800
[fix](repeat plan) fix repeat output slot order inconsistent with its input
expression (#60045)
maybe relate PR: #21168
BE requires that the repeat node's output slot order should be
inconsistent with its input expressions.
That is output slots = input expressions + GroupingID + other grouping
functions.
But physical translator not ensure this requirement. Then sometimes the
repeat may have bad cast exception.
for sql:
SELECT 100000
FROM db2.table_9_50_undef_partitions2_keys3_properties4_distributed_by5
GROUP BY GROUPING SETS (
(col_datetime_6__undef_signed, col_varchar_50__undef_signed)
, ()
, (col_varchar_50__undef_signed)
, (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
);
the above sql will have wrong ouput slot order
then BE will have exceptions:
(1105, 'errCode = 2, detailMessage = (172.20.57.146)
[E-7412]assert cast err:[E-7412] Bad cast from
type:doris::vectorized::ColumnVector<(doris::PrimitiveType)26> to
doris::vectorized::ColumnStr<unsigned int>
0#doris::Exception::Exception(int, std::basic_string_view<char,
std::char_traits<char> > const&, bool) at
/home/zcp/repo_center/doris_master/doris/be/src/common/exception.cpp:0\n\t1#
...
---
.../glue/translator/PhysicalPlanTranslator.java | 53 +++++++-------
.../java/org/apache/doris/planner/RepeatNode.java | 4 ++
.../translator/PhysicalPlanTranslatorTest.java | 48 +++++++++++--
.../nereids_p0/repeat/test_repeat_output_slot.out | 70 ++++++++++++++++++
.../repeat/test_repeat_output_slot.groovy | 82 ++++++++++++++++++++++
5 files changed, 227 insertions(+), 30 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 9f3a7c9cf14..81903aa0ec6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -2528,36 +2528,37 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
PlanFragment inputPlanFragment = repeat.child(0).accept(this, context);
List<List<Expr>> distributeExprLists =
getDistributeExprs(repeat.child(0));
- ImmutableSet<Expression> flattenGroupingSetExprs = ImmutableSet.copyOf(
- ExpressionUtils.flatExpressions(repeat.getGroupingSets()));
+ List<Expression> flattenGroupingExpressions =
repeat.getGroupByExpressions();
+ Set<Slot> preRepeatExpressions = Sets.newLinkedHashSet();
+ // keep group by expression coming first
+ for (Expression groupByExpr : flattenGroupingExpressions) {
+ // NormalizeRepeat had converted group by expression to slot
+ preRepeatExpressions.add((Slot) groupByExpr);
+ }
- List<Slot> aggregateFunctionUsedSlots = repeat.getOutputExpressions()
- .stream()
- .filter(output -> !flattenGroupingSetExprs.contains(output))
- .filter(output ->
!output.containsType(GroupingScalarFunction.class))
- .distinct()
- .map(NamedExpression::toSlot)
+ // add aggregate function used expressions
+ for (NamedExpression outputExpr : repeat.getOutputExpressions()) {
+ if (!outputExpr.containsType(GroupingScalarFunction.class)) {
+ preRepeatExpressions.add(outputExpr.toSlot());
+ }
+ }
+
+ List<Expr> preRepeatExprs = preRepeatExpressions.stream()
+ .map(expr -> ExpressionTranslator.translate(expr, context))
.collect(ImmutableList.toImmutableList());
- // keep flattenGroupingSetExprs comes first
- List<Expr> preRepeatExprs =
Stream.concat(flattenGroupingSetExprs.stream(),
aggregateFunctionUsedSlots.stream())
- .map(expr -> ExpressionTranslator.translate(expr,
context)).collect(ImmutableList.toImmutableList());
-
- // outputSlots's order need same with preRepeatExprs
- List<Slot> outputSlots = Stream.concat(Stream
- .concat(repeat.getOutputExpressions().stream()
- .filter(output ->
flattenGroupingSetExprs.contains(output)),
- repeat.getOutputExpressions().stream()
- .filter(output ->
!flattenGroupingSetExprs.contains(output))
- .filter(output ->
!output.containsType(GroupingScalarFunction.class))
- .distinct()
- ),
-
Stream.concat(Stream.of(repeat.getGroupingId().toSlot()),
- repeat.getOutputExpressions().stream()
- .filter(output ->
output.containsType(GroupingScalarFunction.class)))
- )
-
.map(NamedExpression::toSlot).collect(ImmutableList.toImmutableList());
+ // outputSlots's order must match preRepeatExprs, then grouping id,
then grouping function slots
+ ImmutableList.Builder<Slot> outputSlotsBuilder
+ =
ImmutableList.builderWithExpectedSize(repeat.getOutputExpressions().size() + 1);
+ outputSlotsBuilder.addAll(preRepeatExpressions);
+ outputSlotsBuilder.add(repeat.getGroupingId().toSlot());
+ for (NamedExpression outputExpr : repeat.getOutputExpressions()) {
+ if (outputExpr.containsType(GroupingScalarFunction.class)) {
+ outputSlotsBuilder.add(outputExpr.toSlot());
+ }
+ }
+ List<Slot> outputSlots = outputSlotsBuilder.build();
// NOTE: we should first translate preRepeatExprs, then generate
output tuple,
// or else the preRepeatExprs can not find the bottom slotRef
and throw
// exception: invalid slot id
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
index 45989f2c947..acfea0381c1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
@@ -88,4 +88,8 @@ public class RepeatNode extends PlanNode {
public boolean isSerialOperator() {
return children.get(0).isSerialOperator();
}
+
+ public GroupingInfo getGroupingInfo() {
+ return groupingInfo;
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
index 938187fef2b..fdf1c726cfc 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java
@@ -17,6 +17,11 @@
package org.apache.doris.nereids.glue.translator;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.GroupingInfo;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.analysis.TupleDescriptor;
+import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.nereids.properties.DataTrait;
@@ -34,16 +39,19 @@ import
org.apache.doris.nereids.trees.plans.physical.PhysicalFilter;
import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.util.PlanChecker;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.planner.AggregationNode;
import org.apache.doris.planner.OlapScanNode;
import org.apache.doris.planner.PlanFragment;
import org.apache.doris.planner.PlanNode;
import org.apache.doris.planner.Planner;
+import org.apache.doris.planner.RepeatNode;
import org.apache.doris.utframe.TestWithFeService;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
import mockit.Injectable;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -53,9 +61,18 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
+import java.util.Set;
public class PhysicalPlanTranslatorTest extends TestWithFeService {
+ @Override
+ protected void runBeforeAll() throws Exception {
+ createDatabase("test_db");
+ createTable("create table test_db.t(a int, b int) distributed by
hash(a) buckets 3 "
+ + "properties('replication_num' = '1');");
+
connectContext.getSessionVariable().setDisableNereidsRules("prune_empty_partition");
+ }
+
@Test
public void testOlapPrune(@Injectable LogicalProperties placeHolder)
throws Exception {
OlapTable t1 = PlanConstructor.newOlapTable(0, "t1", 0,
KeysType.AGG_KEYS);
@@ -93,10 +110,6 @@ public class PhysicalPlanTranslatorTest extends
TestWithFeService {
@Test
public void testAggNeedsFinalize() throws Exception {
- createDatabase("test_db");
- createTable("create table test_db.t(a int, b int) distributed by
hash(a) buckets 3 "
- + "properties('replication_num' = '1');");
-
connectContext.getSessionVariable().setDisableNereidsRules("prune_empty_partition");
String querySql = "select b from test_db.t group by b";
Planner planner = getSQLPlanner(querySql);
Assertions.assertNotNull(planner);
@@ -125,4 +138,31 @@ public class PhysicalPlanTranslatorTest extends
TestWithFeService {
Assertions.assertTrue(upperNeedsFinalize,
"upper AggregationNode needsFinalize should be true");
}
+
+ @Test
+ public void testRepeatInputOutputOrder() throws Exception {
+ String sql = "select grouping(a), grouping(b), grouping_id(a, b),
sum(a + 2 * b), sum(a + 3 * b) + grouping_id(b, a, b), b, a, b, a"
+ + " from test_db.t"
+ + " group by grouping sets((a, b), (), (b), (a, b), (a + b),
(a * b))";
+ PlanChecker.from(connectContext).checkPlannerResult(sql,
+ planner -> {
+ Set<RepeatNode> repeatNodes = Sets.newHashSet();
+ planner.getFragments().stream()
+ .map(PlanFragment::getPlanRoot)
+ .forEach(plan -> plan.collect(RepeatNode.class,
repeatNodes));
+ Assertions.assertEquals(1, repeatNodes.size());
+ RepeatNode repeatNode = repeatNodes.iterator().next();
+ GroupingInfo groupingInfo = repeatNode.getGroupingInfo();
+ List<Expr> preRepeatExprs =
groupingInfo.getPreRepeatExprs();
+ TupleDescriptor outputs =
groupingInfo.getOutputTupleDesc();
+ for (int i = 0; i < preRepeatExprs.size(); i++) {
+ Expr inputExpr = preRepeatExprs.get(i);
+ Assertions.assertInstanceOf(SlotRef.class, inputExpr);
+ Column inputColumn = ((SlotRef) inputExpr).getColumn();
+ Column outputColumn =
outputs.getSlots().get(i).getColumn();
+ Assertions.assertEquals(inputColumn, outputColumn);
+ }
+ }
+ );
+ }
}
diff --git a/regression-test/data/nereids_p0/repeat/test_repeat_output_slot.out
b/regression-test/data/nereids_p0/repeat/test_repeat_output_slot.out
new file mode 100644
index 00000000000..e6516a0d47c
--- /dev/null
+++ b/regression-test/data/nereids_p0/repeat/test_repeat_output_slot.out
@@ -0,0 +1,70 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql_1_shape --
+PhysicalCteAnchor ( cteId=CTEId#0 )
+--PhysicalCteProducer ( cteId=CTEId#0 )
+----hashAgg[GLOBAL]
+------PhysicalProject
+--------PhysicalOlapScan[tbl_test_repeat_output_slot]
+--PhysicalResultSink
+----PhysicalProject
+------PhysicalUnion
+--------PhysicalProject
+----------hashAgg[GLOBAL]
+------------hashAgg[LOCAL]
+--------------PhysicalRepeat
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------PhysicalProject
+----------PhysicalCteConsumer ( cteId=CTEId#0 )
+
+-- !sql_1_result --
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+100000
+
+-- !sql_2_shape --
+PhysicalCteAnchor ( cteId=CTEId#0 )
+--PhysicalCteProducer ( cteId=CTEId#0 )
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------PhysicalOlapScan[tbl_test_repeat_output_slot]
+--PhysicalResultSink
+----PhysicalProject
+------PhysicalUnion
+--------PhysicalProject
+----------filter((GROUPING_PREFIX_col_varchar_50__undef_signed__index_inverted_col_datetime_6__undef_signed_col_varchar_50__undef_signed
> 0))
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalRepeat
+------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------PhysicalEmptyRelation
+
+-- !sql_2_result --
+\N ALL 1 6 \N \N \N
+\N ALL 1 6 \N \N \N
+2020-01-02T00:00 ALL 1 6 \N 2020-01-02T00:00
\N
+2020-01-02T00:00 ALL 1 6 \N 2020-01-02T00:00
\N
+2020-01-03T00:00 ALL 1 6 \N 2020-01-03T00:00
\N
+2020-01-03T00:00 ALL 1 6 \N 2020-01-03T00:00
\N
+2020-01-04T00:00 ALL 1 6 \N 2020-01-04T00:00
\N
+2020-01-04T00:00 ALL 1 6 \N 2020-01-04T00:00
\N
+2020-01-04T00:00 ALL 1 7 \N \N \N
+
diff --git
a/regression-test/suites/nereids_p0/repeat/test_repeat_output_slot.groovy
b/regression-test/suites/nereids_p0/repeat/test_repeat_output_slot.groovy
new file mode 100644
index 00000000000..c0b9f322b6a
--- /dev/null
+++ b/regression-test/suites/nereids_p0/repeat/test_repeat_output_slot.groovy
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_repeat_output_slot") {
+ sql """
+ SET enable_fallback_to_original_planner=false;
+ SET enable_nereids_planner=true;
+ SET ignore_shape_nodes='PhysicalDistribute';
+ SET disable_nereids_rules='PRUNE_EMPTY_PARTITION';
+ SET runtime_filter_mode=OFF;
+ SET disable_join_reorder=true;
+
+ DROP TABLE IF EXISTS tbl_test_repeat_output_slot FORCE;
+
+ """
+
+ sql """
+ CREATE TABLE tbl_test_repeat_output_slot (
+ col_datetime_6__undef_signed datetime(6),
+ col_varchar_50__undef_signed varchar(50),
+ col_varchar_50__undef_signed__index_inverted varchar(50)
+ ) engine=olap
+ distributed by hash(col_datetime_6__undef_signed) buckets 10
+ properties('replication_num' = '1');
+ """
+
+ sql """
+ INSERT INTO tbl_test_repeat_output_slot VALUES
+ (null, null, null), (null, "a", "x"), (null, "a", "y"),
+ ('2020-01-02', "b", "x"), ('2020-01-02', 'a', 'x'), ('2020-01-02',
'b', 'y'),
+ ('2020-01-03', 'a', 'x'), ('2020-01-03', 'a', 'y'), ('2020-01-03',
'b', 'x'), ('2020-01-03', 'b', 'y'),
+ ('2020-01-04', 'a', 'x'), ('2020-01-04', 'a', 'y'), ('2020-01-04',
'b', 'x'), ('2020-01-04', 'b', 'y');
+ """
+
+ explainAndOrderResult 'sql_1', '''
+ SELECT 100000
+ FROM tbl_test_repeat_output_slot
+ GROUP BY GROUPING SETS (
+ (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
+ , ()
+ , (col_varchar_50__undef_signed)
+ , (col_datetime_6__undef_signed, col_varchar_50__undef_signed)
+ );
+ '''
+
+ explainAndOrderResult 'sql_2', '''
+ SELECT MAX(col_datetime_6__undef_signed) AS total_col_datetime,
+ CASE WHEN
GROUPING(col_varchar_50__undef_signed__index_inverted) = 1 THEN 'ALL'
+ ELSE CAST(col_varchar_50__undef_signed__index_inverted AS
VARCHAR)
+ END AS pretty_val,
+ IF(GROUPING_ID(col_varchar_50__undef_signed__index_inverted,
+ col_datetime_6__undef_signed,
+ col_varchar_50__undef_signed) > 0, 1, 0) AS
is_agg_row,
+ GROUPING_ID(col_varchar_50__undef_signed__index_inverted,
+ col_datetime_6__undef_signed,
col_varchar_50__undef_signed) AS having_filter_col,
+ col_varchar_50__undef_signed__index_inverted,
+ col_datetime_6__undef_signed,
+ col_varchar_50__undef_signed
+ FROM tbl_test_repeat_output_slot
+ GROUP BY GROUPING SETS (
+ (col_varchar_50__undef_signed__index_inverted,
col_datetime_6__undef_signed, col_varchar_50__undef_signed),
+ (),
+ (col_varchar_50__undef_signed),
+ (col_varchar_50__undef_signed__index_inverted,
col_datetime_6__undef_signed, col_varchar_50__undef_signed),
+ (col_varchar_50__undef_signed))
+ HAVING having_filter_col > 0;
+ '''
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]