vrajat commented on code in PR #14296: URL: https://github.com/apache/pinot/pull/14296#discussion_r1815439888
########## pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/PlanNodeVisitor.java: ########## @@ -63,4 +63,150 @@ public interface PlanNodeVisitor<T, C> { T visitExchange(ExchangeNode exchangeNode, C context); T visitExplained(ExplainedNode node, C context); + + /** Review Comment: Why did you choose to add this class to the same file ? ########## pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/EquivalentStagesFinder.java: ########## @@ -0,0 +1,339 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.planner.logical; + +import com.google.common.base.Preconditions; +import java.util.List; +import java.util.Objects; +import org.apache.pinot.query.planner.plannode.AggregateNode; +import org.apache.pinot.query.planner.plannode.ExchangeNode; +import org.apache.pinot.query.planner.plannode.ExplainedNode; +import org.apache.pinot.query.planner.plannode.FilterNode; +import org.apache.pinot.query.planner.plannode.JoinNode; +import org.apache.pinot.query.planner.plannode.MailboxReceiveNode; +import org.apache.pinot.query.planner.plannode.MailboxSendNode; +import org.apache.pinot.query.planner.plannode.PlanNode; +import org.apache.pinot.query.planner.plannode.PlanNodeVisitor; +import org.apache.pinot.query.planner.plannode.ProjectNode; +import org.apache.pinot.query.planner.plannode.SetOpNode; +import org.apache.pinot.query.planner.plannode.SortNode; +import org.apache.pinot.query.planner.plannode.TableScanNode; +import org.apache.pinot.query.planner.plannode.ValueNode; +import org.apache.pinot.query.planner.plannode.WindowNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * This utility class can be used to find equivalent stages in the query plan. + * + * Equivalent stages are stages that represent the same job to be done. These stages can be potentially optimized to + * execute that job only once in a special stage that broadcast the results to all the equivalent stages. + */ +public class EquivalentStagesFinder { + public static final Logger LOGGER = LoggerFactory.getLogger(EquivalentStagesFinder.class); + + private EquivalentStagesFinder() { + } + + public static GroupedStages findEquivalentStages(MailboxSendNode root) { + Visitor visitor = new Visitor(); + root.visit(visitor, null); + + return visitor._equivalentStages; + } + + /** + * A visitor that iterates the plan tree and finds equivalent stages. + * + * It may be a bit confusing that this class, which ends up being a visitor, calls another visitor to compare nodes. + * The reason is that this object implements visitor to iterate the plan tree in pre-order. Then for each + * mailbox send node (which are always the root of a stage), it calls + * {@link NodeEquivalence#areEquivalent(MailboxSendNode, MailboxSendNode)}. NodeEquivalence is another class that + * implements visitor, but this time to compare two nodes. + */ + private static class Visitor extends PlanNodeVisitor.DepthFirstVisitor<Void, Void> { + private final GroupedStages.Mutable _equivalentStages = new GroupedStages.Mutable(); + private final NodeEquivalence _nodeEquivalence = new NodeEquivalence(); + + @Override + public Void visitMailboxSend(MailboxSendNode node, Void context) { + // It is important to visit children before doing anything. + // This is a requirement on NodeEquivalence.areEquivalent() method that reduce the complexity of the algorithm + // from O(n^3) to O(n^2). + visitChildren(node, context); + + // first try to find if the current node/stage is equivalent to an already equivalent stages. + for (MailboxSendNode uniqueStage : _equivalentStages.getLeaders()) { + if (_nodeEquivalence.areEquivalent(node, uniqueStage)) { + _equivalentStages.addToGroup(uniqueStage, node); + return null; + } + } + // there is no visited stage that is equivalent to the current stage, so add it to the unique visited stages. + _equivalentStages.addNewGroup(node); + return null; + } + + /** + * A visitor that compares two nodes to see if they are equivalent. + * + * The implementation uses the already visited stages (stored in {@link #_equivalentStages}) to avoid comparing the + * same nodes multiple times. The side effect of that is that the second argument for {@link #areEquivalent} must be + * a node that was already visited. + */ + private class NodeEquivalence implements PlanNodeVisitor<Boolean, PlanNode> { Review Comment: I need to step through patch to understand. Where is the return value (Boolean) checked ? Also when are methods like `visitAggregate` called as I cannot find `visitChildren` calls in the `NodeEquivalence` class. ########## pinot-query-planner/src/test/java/org/apache/pinot/query/planner/logical/EquivalentStagesFinderTest.java: ########## @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.planner.logical; + +import java.util.Map; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.query.planner.plannode.MailboxSendNode; +import org.testng.annotations.Test; + +import static org.testng.Assert.*; + + +public class EquivalentStagesFinderTest extends StagesTestBase { Review Comment: Confused by the these tests. Are the tests dependent on each other ? Every test adds another stage to `_stages` ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org