This is an automated email from the ASF dual-hosted git repository.

zykkk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a722a102ab9 [improvement](nereids) Simplify ScanNode projection 
handling by removing redundant conditions (#40801)
a722a102ab9 is described below

commit a722a102ab9fcb76b7fb77ef44dffabb835660b5
Author: zy-kkk <zhongy...@gmail.com>
AuthorDate: Tue Sep 24 14:43:06 2024 +0800

    [improvement](nereids) Simplify ScanNode projection handling by removing 
redundant conditions (#40801)
    
    This PR simplifies the handling of `ScanNode` projection logic.
    Previously, the code included multiple conditional checks to determine
    whether a `projectionTuple` should be generated. These conditions have
    been removed, and now `projectionTuple `is always generated for
    `ScanNode`, ensuring a consistent projection setup. Additionally,
    redundant handling of `SlotId` and `SlotRef` has been eliminated, making
    the code cleaner and easier to maintain. The behavior for `OlapScanNode`
    remains unchanged.
---
 .../glue/translator/PhysicalPlanTranslator.java    | 49 +++++-----------------
 .../jdbc/test_doris_jdbc_catalog.out               |  8 ++++
 .../jdbc/test_doris_jdbc_catalog.groovy            |  4 ++
 .../distribute/distribution_expr.groovy            |  6 +--
 4 files changed, 26 insertions(+), 41 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 0bf767ac880..c889a2a75a3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -2003,21 +2003,10 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
             // slotIdsByOrder is used to ensure the ScanNode's output order is 
same with current Project
             // if we change the output order in translate project, the upper 
node will receive wrong order
             // tuple, since they get the order from project.getOutput() not 
scan.getOutput()./
-            List<SlotId> slotIdsByOrder = Lists.newArrayList();
-            if (requiredByProjectSlotIdSet.size() != requiredSlotIdSet.size()
-                    || new HashSet<>(projectionExprs).size() != 
projectionExprs.size()
-                    || projectionExprs.stream().anyMatch(expr -> !(expr 
instanceof SlotRef))) {
-                projectionTuple = generateTupleDesc(slots,
-                        ((ScanNode) inputPlanNode).getTupleDesc().getTable(), 
context);
-                inputPlanNode.setProjectList(projectionExprs);
-                inputPlanNode.setOutputTupleDesc(projectionTuple);
-            } else {
-                for (int i = 0; i < slots.size(); ++i) {
-                    context.addExprIdSlotRefPair(slots.get(i).getExprId(),
-                            (SlotRef) projectionExprs.get(i));
-                    slotIdsByOrder.add(((SlotRef) 
projectionExprs.get(i)).getSlotId());
-                }
-            }
+            projectionTuple = generateTupleDesc(slots,
+                    ((ScanNode) inputPlanNode).getTupleDesc().getTable(), 
context);
+            inputPlanNode.setProjectList(projectionExprs);
+            inputPlanNode.setOutputTupleDesc(projectionTuple);
 
             // TODO: this is a temporary scheme to support two phase read when 
has project.
             //  we need to refactor all topn opt into rbo stage.
@@ -2027,20 +2016,16 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
                 SlotDescriptor lastSlot = 
olapScanSlots.get(olapScanSlots.size() - 1);
                 if (lastSlot.getColumn() != null
                         && 
lastSlot.getColumn().getName().equals(Column.ROWID_COL)) {
-                    if (projectionTuple != null) {
-                        injectRowIdColumnSlot(projectionTuple);
-                        SlotRef slotRef = new SlotRef(lastSlot);
-                        inputPlanNode.getProjectList().add(slotRef);
-                        requiredByProjectSlotIdSet.add(lastSlot.getId());
-                    } else {
-                        slotIdsByOrder.add(lastSlot.getId());
-                    }
+                    injectRowIdColumnSlot(projectionTuple);
+                    SlotRef slotRef = new SlotRef(lastSlot);
+                    inputPlanNode.getProjectList().add(slotRef);
+                    requiredByProjectSlotIdSet.add(lastSlot.getId());
                     requiredSlotIdSet.add(lastSlot.getId());
                 }
                 ((OlapScanNode) inputPlanNode).updateRequiredSlots(context, 
requiredByProjectSlotIdSet);
             }
             updateScanSlotsMaterialization((ScanNode) inputPlanNode, 
requiredSlotIdSet,
-                    requiredByProjectSlotIdSet, slotIdsByOrder, context);
+                    requiredByProjectSlotIdSet, context);
         } else {
             TupleDescriptor tupleDescriptor = generateTupleDesc(slots, null, 
context);
             inputPlanNode.setProjectList(projectionExprs);
@@ -2468,22 +2453,10 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
 
     private void updateScanSlotsMaterialization(ScanNode scanNode,
             Set<SlotId> requiredSlotIdSet, Set<SlotId> 
requiredByProjectSlotIdSet,
-            List<SlotId> slotIdsByOrder, PlanTranslatorContext context) {
+            PlanTranslatorContext context) {
         // TODO: use smallest slot if do not need any slot in upper node
         SlotDescriptor smallest = scanNode.getTupleDesc().getSlots().get(0);
-        if (CollectionUtils.isNotEmpty(slotIdsByOrder)) {
-            // if we eliminate project above scan, we should ensure the slot 
order of scan's output is same with
-            // the projection's output. So, we need to reorder the output slot 
in scan's tuple.
-            Map<SlotId, SlotDescriptor> idToSlotDescMap = 
scanNode.getTupleDesc().getSlots().stream()
-                    .filter(s -> requiredSlotIdSet.contains(s.getId()))
-                    .collect(Collectors.toMap(SlotDescriptor::getId, s -> s));
-            scanNode.getTupleDesc().getSlots().clear();
-            for (SlotId slotId : slotIdsByOrder) {
-                
scanNode.getTupleDesc().getSlots().add(idToSlotDescMap.get(slotId));
-            }
-        } else {
-            scanNode.getTupleDesc().getSlots().removeIf(s -> 
!requiredSlotIdSet.contains(s.getId()));
-        }
+        scanNode.getTupleDesc().getSlots().removeIf(s -> 
!requiredSlotIdSet.contains(s.getId()));
         if (scanNode.getTupleDesc().getSlots().isEmpty()) {
             scanNode.getTupleDesc().getSlots().add(smallest);
         }
diff --git 
a/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out 
b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out
index 3fec3f8a574..9695f628fee 100644
--- a/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out
+++ b/regression-test/data/external_table_p0/jdbc/test_doris_jdbc_catalog.out
@@ -141,6 +141,14 @@ char_col   char(85)        Yes     true    \N      NONE
 varchar_col    char(85)        Yes     true    \N      NONE
 json_col       text    Yes     true    \N      NONE
 
+-- !sql --
+\N     \N
+a      1
+
+-- !sql --
+\N     \N
+1      a
+
 -- !sql --
 doris_jdbc_catalog
 
diff --git 
a/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy 
b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy
index efdec871481..e57ee5ecb37 100644
--- 
a/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy
+++ 
b/regression-test/suites/external_table_p0/jdbc/test_doris_jdbc_catalog.groovy
@@ -231,6 +231,10 @@ suite("test_doris_jdbc_catalog", 
"p0,external,doris,external_docker,external_doc
     // test query tvf
     qt_sql """desc function query("catalog" = "doris_jdbc_catalog", "query" = 
"select * from regression_test_jdbc_catalog_p0.base");"""
 
+    order_qt_sql """ select varchar_col,tinyint_col from query("catalog" = 
"doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from 
regression_test_jdbc_catalog_p0.base");"""
+
+    order_qt_sql """ select tinyint_col,varchar_col from query("catalog" = 
"doris_jdbc_catalog", "query" = "select varchar_col,tinyint_col from 
regression_test_jdbc_catalog_p0.base");"""
+
     //clean
     qt_sql """select current_catalog()"""
     sql "switch internal"
diff --git 
a/regression-test/suites/nereids_syntax_p0/distribute/distribution_expr.groovy 
b/regression-test/suites/nereids_syntax_p0/distribute/distribution_expr.groovy
index 55f6fd78402..c2ca8a40134 100644
--- 
a/regression-test/suites/nereids_syntax_p0/distribute/distribution_expr.groovy
+++ 
b/regression-test/suites/nereids_syntax_p0/distribute/distribution_expr.groovy
@@ -88,8 +88,8 @@ suite("distribution_expr") {
                 ON tmp1 . `pk` = tmp2 . `pk`;
                     """
                contains "BUCKET_SHUFFLE"
-               contains "distribute expr lists: pk[#22]"
-               contains "distribute expr lists: pk[#11]"
+               contains "distribute expr lists: pk[#27]"
+               contains "distribute expr lists: pk[#14]"
        }
 
         multi_sql """
@@ -124,7 +124,7 @@ suite("distribution_expr") {
                select tmp.k1 from baseall d join (select a.k1 as k1 from 
baseall b join test a on (a.k1=b.k1)) tmp on tmp.k1 = d.k1;
                 """
                contains "COLOCATE"
+               contains "distribute expr lists: k1[#6]"
                contains "distribute expr lists: k1[#5]"
-               contains "distribute expr lists: k1[#4]"
         }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to