Repository: spark
Updated Branches:
refs/heads/master 7ac70e7ba -> fa7d9d708
[SPARK-18063][SQL] Failed to infer constraints over multiple aliases
## What changes were proposed in this pull request?
The `UnaryNode.getAliasedConstraints` function fails to replace all expressions
by their alias where constraints contains more than one expression to be
replaced.
For example:
```
val tr = LocalRelation('a.int, 'b.string, 'c.int)
val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
multiAlias.analyze.constraints
```
currently outputs:
```
ExpressionSet(Seq(
IsNotNull(resolveColumn(multiAlias.analyze, "x")),
IsNotNull(resolveColumn(multiAlias.analyze, "y"))
)
```
The constraint `resolveColumn(multiAlias.analyze, "x") ===
resolveColumn(multiAlias.analyze, "y") + 10)` is missing.
## How was this patch tested?
Add new test cases in `ConstraintPropagationSuite`.
Author: jiangxingbo <[email protected]>
Closes #15597 from jiangxb1987/alias-constraints.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa7d9d70
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa7d9d70
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa7d9d70
Branch: refs/heads/master
Commit: fa7d9d70825a6816495d239da925d0087f7cb94f
Parents: 7ac70e7
Author: jiangxingbo <[email protected]>
Authored: Wed Oct 26 20:12:20 2016 +0200
Committer: Reynold Xin <[email protected]>
Committed: Wed Oct 26 20:12:20 2016 +0200
----------------------------------------------------------------------
.../sql/catalyst/plans/logical/LogicalPlan.scala | 16 ++++++++++------
.../catalyst/plans/ConstraintPropagationSuite.scala | 8 ++++++++
2 files changed, 18 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fa7d9d70/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 0972547..b0a4145 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -293,15 +293,19 @@ abstract class UnaryNode extends LogicalPlan {
* expressions with the corresponding alias
*/
protected def getAliasedConstraints(projectList: Seq[NamedExpression]):
Set[Expression] = {
- projectList.flatMap {
+ var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
+ projectList.foreach {
case a @ Alias(e, _) =>
- child.constraints.map(_ transform {
+ // For every alias in `projectList`, replace the reference in
constraints by its attribute.
+ allConstraints ++= allConstraints.map(_ transform {
case expr: Expression if expr.semanticEquals(e) =>
a.toAttribute
- }).union(Set(EqualNullSafe(e, a.toAttribute)))
- case _ =>
- Set.empty[Expression]
- }.toSet
+ })
+ allConstraints += EqualNullSafe(e, a.toAttribute)
+ case _ => // Don't change.
+ }
+
+ allConstraints -- child.constraints
}
override protected def validConstraints: Set[Expression] = child.constraints
http://git-wip-us.apache.org/repos/asf/spark/blob/fa7d9d70/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 8d6a49a..8068ce9 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -128,8 +128,16 @@ class ConstraintPropagationSuite extends SparkFunSuite {
ExpressionSet(Seq(resolveColumn(aliasedRelation.analyze, "x") > 10,
IsNotNull(resolveColumn(aliasedRelation.analyze, "x")),
resolveColumn(aliasedRelation.analyze, "b") <=>
resolveColumn(aliasedRelation.analyze, "y"),
+ resolveColumn(aliasedRelation.analyze, "z") <=>
resolveColumn(aliasedRelation.analyze, "x"),
resolveColumn(aliasedRelation.analyze, "z") > 10,
IsNotNull(resolveColumn(aliasedRelation.analyze, "z")))))
+
+ val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
+ verifyConstraints(multiAlias.analyze.constraints,
+ ExpressionSet(Seq(IsNotNull(resolveColumn(multiAlias.analyze, "x")),
+ IsNotNull(resolveColumn(multiAlias.analyze, "y")),
+ resolveColumn(multiAlias.analyze, "x") ===
resolveColumn(multiAlias.analyze, "y") + 10))
+ )
}
test("propagating constraints in union") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]