This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 08f74ad [SPARK-26390][SQL] ColumnPruning rule should only do column
pruning
08f74ad is described below
commit 08f74ada3656af401099aa79471ef8a1155a3f07
Author: Wenchen Fan <[email protected]>
AuthorDate: Wed Dec 19 09:41:30 2018 -0800
[SPARK-26390][SQL] ColumnPruning rule should only do column pruning
## What changes were proposed in this pull request?
This is a small clean up.
By design catalyst rules should be orthogonal: each rule should have its
own responsibility. However, the `ColumnPruning` rule does not only do column
pruning, but also remove no-op project and window.
This PR updates the `RemoveRedundantProject` rule to remove no-op window as
well, and clean up the `ColumnPruning` rule to only do column pruning.
## How was this patch tested?
existing tests
Closes #23343 from cloud-fan/column-pruning.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/optimizer/Optimizer.scala | 23 +++++++++++-----------
.../catalyst/optimizer/ColumnPruningSuite.scala | 7 +++----
.../catalyst/optimizer/CombiningLimitsSuite.scala | 5 +++--
.../catalyst/optimizer/JoinOptimizationSuite.scala | 1 +
.../RemoveRedundantAliasAndProjectSuite.scala | 2 +-
.../catalyst/optimizer/RewriteSubquerySuite.scala | 2 +-
.../catalyst/optimizer/TransposeWindowSuite.scala | 2 +-
7 files changed, 21 insertions(+), 21 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3eb6bca..44d5543 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -93,7 +93,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
RewriteCorrelatedScalarSubquery,
EliminateSerialization,
RemoveRedundantAliases,
- RemoveRedundantProject,
+ RemoveNoopOperators,
SimplifyExtractValueOps,
CombineConcats) ++
extendedOperatorOptimizationRules
@@ -177,7 +177,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
RewritePredicateSubquery,
ColumnPruning,
CollapseProject,
- RemoveRedundantProject) :+
+ RemoveNoopOperators) :+
Batch("UpdateAttributeReferences", Once,
UpdateNullabilityInAttributeReferences)
}
@@ -403,11 +403,15 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
}
/**
- * Remove projections from the query plan that do not make any modifications.
+ * Remove no-op operators from the query plan that do not make any
modifications.
*/
-object RemoveRedundantProject extends Rule[LogicalPlan] {
+object RemoveNoopOperators extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
- case p @ Project(_, child) if p.output == child.output => child
+ // Eliminate no-op Projects
+ case p @ Project(_, child) if child.sameOutput(p) => child
+
+ // Eliminate no-op Window
+ case w: Window if w.windowExpressions.isEmpty => w.child
}
}
@@ -602,17 +606,12 @@ object ColumnPruning extends Rule[LogicalPlan] {
p.copy(child = w.copy(
windowExpressions = w.windowExpressions.filter(p.references.contains)))
- // Eliminate no-op Window
- case w: Window if w.windowExpressions.isEmpty => w.child
-
- // Eliminate no-op Projects
- case p @ Project(_, child) if child.sameOutput(p) => child
-
// Can't prune the columns on LeafNode
case p @ Project(_, _: LeafNode) => p
// for all other logical plans that inherits the output from it's children
- case p @ Project(_, child) =>
+ // Project over project is handled by the first case, skip it here.
+ case p @ Project(_, child) if !child.isInstanceOf[Project] =>
val required = child.references ++ p.references
if (!child.inputSet.subsetOf(required)) {
val newChildren = child.children.map(c => prunedChild(c, required))
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 8d7c9bf..57195d5 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -34,6 +34,7 @@ class ColumnPruningSuite extends PlanTest {
val batches = Batch("Column pruning", FixedPoint(100),
PushDownPredicate,
ColumnPruning,
+ RemoveNoopOperators,
CollapseProject) :: Nil
}
@@ -340,10 +341,8 @@ class ColumnPruningSuite extends PlanTest {
test("Column pruning on Union") {
val input1 = LocalRelation('a.int, 'b.string, 'c.double)
val input2 = LocalRelation('c.int, 'd.string, 'e.double)
- val query = Project('b :: Nil,
- Union(input1 :: input2 :: Nil)).analyze
- val expected = Project('b :: Nil,
- Union(Project('b :: Nil, input1) :: Project('d :: Nil, input2) ::
Nil)).analyze
+ val query = Project('b :: Nil, Union(input1 :: input2 :: Nil)).analyze
+ val expected = Union(Project('b :: Nil, input1) :: Project('d :: Nil,
input2) :: Nil).analyze
comparePlans(Optimize.execute(query), expected)
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index ef4b848..b190dd5 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -27,8 +27,9 @@ class CombiningLimitsSuite extends PlanTest {
object Optimize extends RuleExecutor[LogicalPlan] {
val batches =
- Batch("Filter Pushdown", FixedPoint(100),
- ColumnPruning) ::
+ Batch("Column Pruning", FixedPoint(100),
+ ColumnPruning,
+ RemoveNoopOperators) ::
Batch("Combine Limit", FixedPoint(10),
CombineLimits) ::
Batch("Constant Folding", FixedPoint(10),
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index e9438b2..6fe5e61 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -39,6 +39,7 @@ class JoinOptimizationSuite extends PlanTest {
ReorderJoin,
PushPredicateThroughJoin,
ColumnPruning,
+ RemoveNoopOperators,
CollapseProject) :: Nil
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index 1973b5a..3802dbf 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -33,7 +33,7 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest
with PredicateHelper
FixedPoint(50),
PushProjectionThroughUnion,
RemoveRedundantAliases,
- RemoveRedundantProject) :: Nil
+ RemoveNoopOperators) :: Nil
}
test("all expressions in project list are aliased child output") {
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
index 6b3739c..f00d22e 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -34,7 +34,7 @@ class RewriteSubquerySuite extends PlanTest {
RewritePredicateSubquery,
ColumnPruning,
CollapseProject,
- RemoveRedundantProject) :: Nil
+ RemoveNoopOperators) :: Nil
}
test("Column pruning after rewriting predicate subquery") {
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
index 58b3d1c..4acd578 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
class TransposeWindowSuite extends PlanTest {
object Optimize extends RuleExecutor[LogicalPlan] {
val batches =
- Batch("CollapseProject", FixedPoint(100), CollapseProject,
RemoveRedundantProject) ::
+ Batch("CollapseProject", FixedPoint(100), CollapseProject,
RemoveNoopOperators) ::
Batch("FlipWindow", Once, CollapseWindow, TransposeWindow) :: Nil
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]