morrySnow commented on code in PR #19650: URL: https://github.com/apache/doris/pull/19650#discussion_r1206207554
########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java: ########## @@ -284,25 +285,24 @@ public Optional<String> getSelectedMaterializedIndexName() { @Override public List<Slot> computeOutput() { + if (selectedIndexId != ((OlapTable) table).getBaseIndexId()) { + return getOutputByMvIndex(selectedIndexId); Review Comment: why use two way to compute output of base index and mv index? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java: ########## @@ -284,25 +285,24 @@ public Optional<String> getSelectedMaterializedIndexName() { @Override public List<Slot> computeOutput() { + if (selectedIndexId != ((OlapTable) table).getBaseIndexId()) { + return getOutputByMvIndex(selectedIndexId); + } List<Column> otherColumns = new ArrayList<>(); if (!Util.showHiddenColumns() && getTable().hasDeleteSign() && !ConnectContext.get().getSessionVariable() .skipDeleteSign()) { otherColumns.add(getTable().getDeleteSignColumn()); } return Stream.concat(table.getBaseSchema().stream(), otherColumns.stream()) - .map(col -> SlotReference.fromColumn(col, qualified())) - .collect(ImmutableList.toImmutableList()); - } - - @Override - public List<Slot> computeNonUserVisibleOutput() { - OlapTable olapTable = (OlapTable) table; - return olapTable.getVisibleIndexIdToMeta().values() - .stream() - .filter(index -> index.getIndexId() != ((OlapTable) table).getBaseIndexId()) - .flatMap(index -> index.getSchema().stream()) - .map(this::generateUniqueSlot) + .map(col -> { + if (mvNameToSlot.containsKey(col.getName())) { + return mvNameToSlot.get(col.getName()); + } + Slot slot = SlotReference.fromColumn(col, qualified()); + mvNameToSlot.put(col.getName(), slot); + return slot; + }) Review Comment: why need to use mvNameToSlot when compute base index output? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java: ########## @@ -624,16 +625,23 @@ public PlanFragment visitPhysicalStorageLayerAggregate( @Override public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTranslatorContext context) { // Create OlapScanNode - List<Slot> slotList = new ImmutableList.Builder<Slot>() - .addAll(olapScan.getOutput()) - .build(); + List<Slot> slotList = olapScan.getOutput(); Set<ExprId> deferredMaterializedExprIds = Collections.emptySet(); if (olapScan.getMutableState(PhysicalOlapScan.DEFERRED_MATERIALIZED_SLOTS).isPresent()) { deferredMaterializedExprIds = (Set<ExprId>) (olapScan .getMutableState(PhysicalOlapScan.DEFERRED_MATERIALIZED_SLOTS).get()); } OlapTable olapTable = olapScan.getTable(); TupleDescriptor tupleDescriptor = generateTupleDesc(slotList, olapTable, deferredMaterializedExprIds, context); + + Set<Slot> slotSet = new LinkedHashSet<>(slotList); + List<Slot> distributionSlots = new ArrayList<>(); + if (olapScan.getDistributionSpec() instanceof DistributionSpecHash) { + distributionSlots = ((DistributionSpecHash) olapScan.getDistributionSpec()).getRequiredSlots().stream() + .filter(s -> !slotSet.contains(s)).collect(Collectors.toList()); + } + generateTupleDesc(distributionSlots, olapTable, deferredMaterializedExprIds, context); Review Comment: why need to generate another tuple? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/LogicalOlapScanToPhysicalOlapScan.java: ########## @@ -79,6 +80,34 @@ private DistributionSpec convertDistribution(LogicalOlapScan olapScan) { || olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId()) { // TODO if a mv is selected, we ignore base table's distributionInfo for now // need improve this to handle the case if mv's distributionInfo is the same as base table + if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId()) { + HashDistributionInfo hashDistributionInfo = (HashDistributionInfo) distributionInfo; + List<Slot> output = olapScan.getOutput(); + List<Slot> baseOutput = olapScan.getOutputByMvIndex(olapScan.getTable().getBaseIndexId()); + List<ExprId> hashColumns = Lists.newArrayList(); + List<Slot> slots = new ArrayList<>(); + for (int i = 0; i < output.size(); i++) { + for (Column column : hashDistributionInfo.getDistributionColumns()) { + if (((SlotReference) output.get(i)).getColumn().get().getNameWithoutMvPrefix() + .equals(column.getName())) { + hashColumns.add(output.get(i).getExprId()); + slots.add(output.get(i)); + } + } + } + if (hashColumns.size() != hashDistributionInfo.getDistributionColumns().size()) { + for (int i = 0; i < baseOutput.size(); i++) { + for (Column column : hashDistributionInfo.getDistributionColumns()) { + if (((SlotReference) baseOutput.get(i)).getColumn().get().equals(column)) { + hashColumns.add(baseOutput.get(i).getExprId()); + slots.add(baseOutput.get(i)); + } + } + } + } + return new DistributionSpecHash(hashColumns, ShuffleType.NATURAL, olapScan.getTable().getId(), Review Comment: i think we should use slots as hash columns directly ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/mv/SelectMaterializedIndexWithoutAggregate.java: ########## @@ -63,47 +60,95 @@ public List<Rule> buildRules() { // project with pushdown filter. // Project(Filter(Scan)) logicalProject(logicalFilter(logicalOlapScan().when(this::shouldSelectIndex))) - .then(project -> { + .thenApply(ctx -> { + LogicalProject<LogicalFilter<LogicalOlapScan>> project = ctx.root; LogicalFilter<LogicalOlapScan> filter = project.child(); LogicalOlapScan scan = filter.child(); - return project.withChildren(filter.withChildren( - select(scan, project::getInputSlots, filter::getConjuncts))); + + LogicalOlapScan mvPlan = select( + scan, project::getInputSlots, filter::getConjuncts, + Stream.concat(filter.getExpressions().stream(), + project.getExpressions().stream()).collect(ImmutableSet.toImmutableSet())); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + + return new LogicalProject( + generateProjectsAlisa(project.getOutput(), slotContext), + new ReplaceExpressions(slotContext).replace( + project.withChildren(filter.withChildren(mvPlan)), mvPlan)); }).toRule(RuleType.MATERIALIZED_INDEX_PROJECT_FILTER_SCAN), // project with filter that cannot be pushdown. // Filter(Project(Scan)) logicalFilter(logicalProject(logicalOlapScan().when(this::shouldSelectIndex))) - .then(filter -> { + .thenApply(ctx -> { + LogicalFilter<LogicalProject<LogicalOlapScan>> filter = ctx.root; LogicalProject<LogicalOlapScan> project = filter.child(); LogicalOlapScan scan = project.child(); - return filter.withChildren(project.withChildren( - select(scan, project::getInputSlots, ImmutableSet::of) - )); + + LogicalOlapScan mvPlan = select( + scan, project::getInputSlots, ImmutableSet::of, + new HashSet<>(project.getExpressions())); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + + return new LogicalProject( + generateProjectsAlisa(project.getOutput(), slotContext), + new ReplaceExpressions(slotContext).replace( + filter.withChildren(project.withChildren(mvPlan)), mvPlan)); }).toRule(RuleType.MATERIALIZED_INDEX_FILTER_PROJECT_SCAN), // scan with filters could be pushdown. // Filter(Scan) logicalFilter(logicalOlapScan().when(this::shouldSelectIndex)) - .then(filter -> { + .thenApply(ctx -> { + LogicalFilter<LogicalOlapScan> filter = ctx.root; LogicalOlapScan scan = filter.child(); - return filter.withChildren(select(scan, filter::getOutputSet, filter::getConjuncts)); + LogicalOlapScan mvPlan = select( + scan, filter::getOutputSet, filter::getConjuncts, + new HashSet<>(filter.getExpressions())); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + + return new LogicalProject( + generateProjectsAlisa(scan.getOutput(), slotContext), + new ReplaceExpressions(slotContext).replace( + new LogicalProject(mvPlan.getOutput(), filter.withChildren(mvPlan)), mvPlan)); }) .toRule(RuleType.MATERIALIZED_INDEX_FILTER_SCAN), // project and scan. // Project(Scan) logicalProject(logicalOlapScan().when(this::shouldSelectIndex)) - .then(project -> { + .thenApply(ctx -> { + LogicalProject<LogicalOlapScan> project = ctx.root; LogicalOlapScan scan = project.child(); - return project.withChildren( - select(scan, project::getInputSlots, ImmutableSet::of)); + + LogicalOlapScan mvPlan = select( + scan, project::getInputSlots, ImmutableSet::of, + new HashSet<>(project.getExpressions())); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + + return new LogicalProject( + generateProjectsAlisa(project.getOutput(), slotContext), + new ReplaceExpressions(slotContext).replace( + project.withChildren(mvPlan), mvPlan)); }) .toRule(RuleType.MATERIALIZED_INDEX_PROJECT_SCAN), // only scan. logicalOlapScan() .when(this::shouldSelectIndex) - .then(scan -> select(scan, scan::getOutputSet, ImmutableSet::of)) + .thenApply(ctx -> { + LogicalOlapScan scan = ctx.root; + + LogicalOlapScan mvPlan = select( + scan, scan::getOutputSet, ImmutableSet::of, + scan.getOutputSet()); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + + return new LogicalProject( + generateProjectsAlisa(mvPlan.getOutput(), slotContext), Review Comment: typo, please recheck other place to fix all typos ########## fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java: ########## @@ -253,13 +254,15 @@ public class NereidsRewriter extends BatchRewriteJob { topic("MV optimization", topDown( - // TODO: enable this rule after https://github.com/apache/doris/issues/18263 is fixed - // new SelectMaterializedIndexWithAggregate(), + new SelectMaterializedIndexWithAggregate(), new SelectMaterializedIndexWithoutAggregate(), new PushdownFilterThroughProject(), new MergeProjects(), new PruneOlapScanTablet() - ) + ), + custom(RuleType.COLUMN_PRUNING, ColumnPruning::new), Review Comment: why we need column punning twice? ########## fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DistributionSpecHash.java: ########## @@ -58,6 +60,8 @@ public class DistributionSpecHash extends DistributionSpec { private final Map<ExprId, Integer> exprIdToEquivalenceSet; + private final List<Slot> requiredSlots; Review Comment: i think we should replace columns in base index by columns in mv index when generate distributionSpecHash in olap scan implementation ########## fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/NereidsRewriter.java: ########## @@ -253,13 +254,15 @@ public class NereidsRewriter extends BatchRewriteJob { topic("MV optimization", topDown( - // TODO: enable this rule after https://github.com/apache/doris/issues/18263 is fixed - // new SelectMaterializedIndexWithAggregate(), + new SelectMaterializedIndexWithAggregate(), new SelectMaterializedIndexWithoutAggregate(), new PushdownFilterThroughProject(), new MergeProjects(), new PruneOlapScanTablet() - ) + ), + custom(RuleType.COLUMN_PRUNING, ColumnPruning::new), + bottomUp(RuleSet.PUSH_DOWN_FILTERS), Review Comment: why we need push down filters again? ########## fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java: ########## @@ -568,6 +569,11 @@ public static String mvColumnBuilder(String name) { return new StringBuilder().append(MATERIALIZED_VIEW_NAME_PREFIX).append(name).toString(); } + public static String mvColumnBuilder(Optional<String> functionName, String sourceColumnName) { Review Comment: add comment to explain this function use for Nereids -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org