[GitHub] [doris] morrySnow commented on a diff in pull request #19650: [Feature](Nereids) support advance mv

via GitHub Thu, 25 May 2023 21:04:24 -0700


morrySnow commented on code in PR #19650:
URL: https://github.com/apache/doris/pull/19650#discussion_r1206207554



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java:
##########
@@ -284,25 +285,24 @@ public Optional<String> 
getSelectedMaterializedIndexName() {
 
     @Override
     public List<Slot> computeOutput() {
+        if (selectedIndexId != ((OlapTable) table).getBaseIndexId()) {
+            return getOutputByMvIndex(selectedIndexId);

Review Comment:
   why use two way to compute output of base index and mv index?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java:
##########
@@ -284,25 +285,24 @@ public Optional<String> 
getSelectedMaterializedIndexName() {
 
     @Override
     public List<Slot> computeOutput() {
+        if (selectedIndexId != ((OlapTable) table).getBaseIndexId()) {
+            return getOutputByMvIndex(selectedIndexId);
+        }
         List<Column> otherColumns = new ArrayList<>();
         if (!Util.showHiddenColumns() && getTable().hasDeleteSign()
                 && !ConnectContext.get().getSessionVariable()
                 .skipDeleteSign()) {
             otherColumns.add(getTable().getDeleteSignColumn());
         }
         return Stream.concat(table.getBaseSchema().stream(), 
otherColumns.stream())
-                .map(col -> SlotReference.fromColumn(col, qualified()))
-                .collect(ImmutableList.toImmutableList());
-    }
-
-    @Override
-    public List<Slot> computeNonUserVisibleOutput() {
-        OlapTable olapTable = (OlapTable) table;
-        return olapTable.getVisibleIndexIdToMeta().values()
-                .stream()
-                .filter(index -> index.getIndexId() != ((OlapTable) 
table).getBaseIndexId())
-                .flatMap(index -> index.getSchema().stream())
-                .map(this::generateUniqueSlot)
+                .map(col -> {
+                    if (mvNameToSlot.containsKey(col.getName())) {
+                        return mvNameToSlot.get(col.getName());
+                    }
+                    Slot slot = SlotReference.fromColumn(col, qualified());
+                    mvNameToSlot.put(col.getName(), slot);
+                    return slot;
+                })

Review Comment:
   why need to use mvNameToSlot when compute base index output?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java:
##########
@@ -624,16 +625,23 @@ public PlanFragment visitPhysicalStorageLayerAggregate(
     @Override
     public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, 
PlanTranslatorContext context) {
         // Create OlapScanNode
-        List<Slot> slotList = new ImmutableList.Builder<Slot>()
-                .addAll(olapScan.getOutput())
-                .build();
+        List<Slot> slotList = olapScan.getOutput();
         Set<ExprId> deferredMaterializedExprIds = Collections.emptySet();
         if 
(olapScan.getMutableState(PhysicalOlapScan.DEFERRED_MATERIALIZED_SLOTS).isPresent())
 {
             deferredMaterializedExprIds = (Set<ExprId>) (olapScan
                     
.getMutableState(PhysicalOlapScan.DEFERRED_MATERIALIZED_SLOTS).get());
         }
         OlapTable olapTable = olapScan.getTable();
         TupleDescriptor tupleDescriptor = generateTupleDesc(slotList, 
olapTable, deferredMaterializedExprIds, context);
+
+        Set<Slot> slotSet = new LinkedHashSet<>(slotList);
+        List<Slot> distributionSlots = new ArrayList<>();
+        if (olapScan.getDistributionSpec() instanceof DistributionSpecHash) {
+            distributionSlots = ((DistributionSpecHash) 
olapScan.getDistributionSpec()).getRequiredSlots().stream()
+                    .filter(s -> 
!slotSet.contains(s)).collect(Collectors.toList());
+        }
+        generateTupleDesc(distributionSlots, olapTable, 
deferredMaterializedExprIds, context);

Review Comment:
   why need to generate another tuple?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalOlapScanToPhysicalOlapScan.java:
##########
@@ -79,6 +80,34 @@ private DistributionSpec convertDistribution(LogicalOlapScan 
olapScan) {
                     || olapScan.getSelectedIndexId() != 
olapScan.getTable().getBaseIndexId()) {
                 // TODO if a mv is selected, we ignore base table's 
distributionInfo for now
                 // need improve this to handle the case if mv's 
distributionInfo is the same as base table
+                if (olapScan.getSelectedIndexId() != 
olapScan.getTable().getBaseIndexId()) {
+                    HashDistributionInfo hashDistributionInfo = 
(HashDistributionInfo) distributionInfo;
+                    List<Slot> output = olapScan.getOutput();
+                    List<Slot> baseOutput = 
olapScan.getOutputByMvIndex(olapScan.getTable().getBaseIndexId());
+                    List<ExprId> hashColumns = Lists.newArrayList();
+                    List<Slot> slots = new ArrayList<>();
+                    for (int i = 0; i < output.size(); i++) {
+                        for (Column column : 
hashDistributionInfo.getDistributionColumns()) {
+                            if (((SlotReference) 
output.get(i)).getColumn().get().getNameWithoutMvPrefix()
+                                    .equals(column.getName())) {
+                                hashColumns.add(output.get(i).getExprId());
+                                slots.add(output.get(i));
+                            }
+                        }
+                    }
+                    if (hashColumns.size() != 
hashDistributionInfo.getDistributionColumns().size()) {
+                        for (int i = 0; i < baseOutput.size(); i++) {
+                            for (Column column : 
hashDistributionInfo.getDistributionColumns()) {
+                                if (((SlotReference) 
baseOutput.get(i)).getColumn().get().equals(column)) {
+                                    
hashColumns.add(baseOutput.get(i).getExprId());
+                                    slots.add(baseOutput.get(i));
+                                }
+                            }
+                        }
+                    }
+                    return new DistributionSpecHash(hashColumns, 
ShuffleType.NATURAL, olapScan.getTable().getId(),

Review Comment:
   i think we should use slots as hash columns directly



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/mv/SelectMaterializedIndexWithoutAggregate.java:
##########
@@ -63,47 +60,95 @@ public List<Rule> buildRules() {
                 // project with pushdown filter.
                 // Project(Filter(Scan))
                 
logicalProject(logicalFilter(logicalOlapScan().when(this::shouldSelectIndex)))
-                        .then(project -> {
+                        .thenApply(ctx -> {
+                            LogicalProject<LogicalFilter<LogicalOlapScan>> 
project = ctx.root;
                             LogicalFilter<LogicalOlapScan> filter = 
project.child();
                             LogicalOlapScan scan = filter.child();
-                            return project.withChildren(filter.withChildren(
-                                    select(scan, project::getInputSlots, 
filter::getConjuncts)));
+
+                            LogicalOlapScan mvPlan = select(
+                                    scan, project::getInputSlots, 
filter::getConjuncts,
+                                    
Stream.concat(filter.getExpressions().stream(),
+                                        
project.getExpressions().stream()).collect(ImmutableSet.toImmutableSet()));
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+
+                            return new LogicalProject(
+                                    generateProjectsAlisa(project.getOutput(), 
slotContext),
+                                    new 
ReplaceExpressions(slotContext).replace(
+                                        
project.withChildren(filter.withChildren(mvPlan)), mvPlan));
                         
}).toRule(RuleType.MATERIALIZED_INDEX_PROJECT_FILTER_SCAN),
 
                 // project with filter that cannot be pushdown.
                 // Filter(Project(Scan))
                 
logicalFilter(logicalProject(logicalOlapScan().when(this::shouldSelectIndex)))
-                        .then(filter -> {
+                        .thenApply(ctx -> {
+                            LogicalFilter<LogicalProject<LogicalOlapScan>> 
filter = ctx.root;
                             LogicalProject<LogicalOlapScan> project = 
filter.child();
                             LogicalOlapScan scan = project.child();
-                            return filter.withChildren(project.withChildren(
-                                    select(scan, project::getInputSlots, 
ImmutableSet::of)
-                            ));
+
+                            LogicalOlapScan mvPlan = select(
+                                    scan, project::getInputSlots, 
ImmutableSet::of,
+                                    new HashSet<>(project.getExpressions()));
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+
+                            return new LogicalProject(
+                                    generateProjectsAlisa(project.getOutput(), 
slotContext),
+                                    new 
ReplaceExpressions(slotContext).replace(
+                                    
filter.withChildren(project.withChildren(mvPlan)), mvPlan));
                         
}).toRule(RuleType.MATERIALIZED_INDEX_FILTER_PROJECT_SCAN),
 
                 // scan with filters could be pushdown.
                 // Filter(Scan)
                 logicalFilter(logicalOlapScan().when(this::shouldSelectIndex))
-                        .then(filter -> {
+                        .thenApply(ctx -> {
+                            LogicalFilter<LogicalOlapScan> filter = ctx.root;
                             LogicalOlapScan scan = filter.child();
-                            return filter.withChildren(select(scan, 
filter::getOutputSet, filter::getConjuncts));
+                            LogicalOlapScan mvPlan = select(
+                                    scan, filter::getOutputSet, 
filter::getConjuncts,
+                                    new HashSet<>(filter.getExpressions()));
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+
+                            return new LogicalProject(
+                                generateProjectsAlisa(scan.getOutput(), 
slotContext),
+                                    new 
ReplaceExpressions(slotContext).replace(
+                                        new LogicalProject(mvPlan.getOutput(), 
filter.withChildren(mvPlan)), mvPlan));
                         })
                         .toRule(RuleType.MATERIALIZED_INDEX_FILTER_SCAN),
 
                 // project and scan.
                 // Project(Scan)
                 logicalProject(logicalOlapScan().when(this::shouldSelectIndex))
-                        .then(project -> {
+                        .thenApply(ctx -> {
+                            LogicalProject<LogicalOlapScan> project = ctx.root;
                             LogicalOlapScan scan = project.child();
-                            return project.withChildren(
-                                    select(scan, project::getInputSlots, 
ImmutableSet::of));
+
+                            LogicalOlapScan mvPlan = select(
+                                    scan, project::getInputSlots, 
ImmutableSet::of,
+                                    new HashSet<>(project.getExpressions()));
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+
+                            return new LogicalProject(
+                                    generateProjectsAlisa(project.getOutput(), 
slotContext),
+                                    new 
ReplaceExpressions(slotContext).replace(
+                                    project.withChildren(mvPlan), mvPlan));
                         })
                         .toRule(RuleType.MATERIALIZED_INDEX_PROJECT_SCAN),
 
                 // only scan.
                 logicalOlapScan()
                         .when(this::shouldSelectIndex)
-                        .then(scan -> select(scan, scan::getOutputSet, 
ImmutableSet::of))
+                        .thenApply(ctx -> {
+                            LogicalOlapScan scan = ctx.root;
+
+                            LogicalOlapScan mvPlan = select(
+                                    scan, scan::getOutputSet, ImmutableSet::of,
+                                    scan.getOutputSet());
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+
+                            return new LogicalProject(
+                                    generateProjectsAlisa(mvPlan.getOutput(), 
slotContext),

Review Comment:
   typo, please recheck other place to fix all typos



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java:
##########
@@ -253,13 +254,15 @@ public class NereidsRewriter extends BatchRewriteJob {
 
             topic("MV optimization",
                 topDown(
-                    // TODO: enable this rule after 
https://github.com/apache/doris/issues/18263 is fixed
-                    // new SelectMaterializedIndexWithAggregate(),
+                    new SelectMaterializedIndexWithAggregate(),
                     new SelectMaterializedIndexWithoutAggregate(),
                     new PushdownFilterThroughProject(),
                     new MergeProjects(),
                     new PruneOlapScanTablet()
-                )
+                ),
+                custom(RuleType.COLUMN_PRUNING, ColumnPruning::new),

Review Comment:
   why we need column punning twice?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DistributionSpecHash.java:
##########
@@ -58,6 +60,8 @@ public class DistributionSpecHash extends DistributionSpec {
 
     private final Map<ExprId, Integer> exprIdToEquivalenceSet;
 
+    private final List<Slot> requiredSlots;

Review Comment:
   i think we should replace columns in base index by columns in mv index when 
generate distributionSpecHash in olap scan implementation



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java:
##########
@@ -253,13 +254,15 @@ public class NereidsRewriter extends BatchRewriteJob {
 
             topic("MV optimization",
                 topDown(
-                    // TODO: enable this rule after 
https://github.com/apache/doris/issues/18263 is fixed
-                    // new SelectMaterializedIndexWithAggregate(),
+                    new SelectMaterializedIndexWithAggregate(),
                     new SelectMaterializedIndexWithoutAggregate(),
                     new PushdownFilterThroughProject(),
                     new MergeProjects(),
                     new PruneOlapScanTablet()
-                )
+                ),
+                custom(RuleType.COLUMN_PRUNING, ColumnPruning::new),
+                bottomUp(RuleSet.PUSH_DOWN_FILTERS),

Review Comment:
   why we need push down filters again?



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java:
##########
@@ -568,6 +569,11 @@ public static String mvColumnBuilder(String name) {
         return new 
StringBuilder().append(MATERIALIZED_VIEW_NAME_PREFIX).append(name).toString();
     }
 
+    public static String mvColumnBuilder(Optional<String> functionName, String 
sourceColumnName) {

Review Comment:
   add comment to explain this function use for Nereids



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[GitHub] [doris] morrySnow commented on a diff in pull request #19650: [Feature](Nereids) support advance mv

Reply via email to