Repository: spark Updated Branches: refs/heads/branch-2.0 4ccc5643f -> 49e666138
[SPARK-15122] [SQL] Fix TPC-DS 41 - Normalize predicates before pulling them out ## What changes were proposed in this pull request? The official TPC-DS 41 query currently fails because it contains a scalar subquery with a disjunctive correlated predicate (the correlated predicates were nested in ORs). This makes the `Analyzer` pull out the entire predicate which is wrong and causes the following (correct) analysis exception: `The correlated scalar subquery can only contain equality predicates` This PR fixes this by first simplifing (or normalizing) the correlated predicates before pulling them out of the subquery. ## How was this patch tested? Manual testing on TPC-DS 41, and added a test to SubquerySuite. Author: Herman van Hovell <[email protected]> Closes #12954 from hvanhovell/SPARK-15122. (cherry picked from commit df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48) Signed-off-by: Davies Liu <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/49e66613 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/49e66613 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/49e66613 Branch: refs/heads/branch-2.0 Commit: 49e666138b2c74b8145faf7adc6fd090656e5ea0 Parents: 4ccc564 Author: Herman van Hovell <[email protected]> Authored: Fri May 6 21:06:03 2016 -0700 Committer: Davies Liu <[email protected]> Committed: Fri May 6 21:06:14 2016 -0700 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 +++- .../test/scala/org/apache/spark/sql/SubquerySuite.scala | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/49e66613/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 527d5b6..9e9a856 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification import org.apache.spark.sql.catalyst.planning.IntegerIndex import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _} @@ -958,7 +959,8 @@ class Analyzer( localPredicateReferences -- p.outputSet } - val transformed = sub transformUp { + // Simplify the predicates before pulling them out. + val transformed = BooleanSimplification(sub) transformUp { case f @ Filter(cond, child) => // Find all predicates with an outer reference. val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter) http://git-wip-us.apache.org/repos/asf/spark/blob/49e66613/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 80bb4e0..17ac0c8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -281,4 +281,16 @@ class SubquerySuite extends QueryTest with SharedSQLContext { assert(msg1.getMessage.contains( "The correlated scalar subquery can only contain equality predicates")) } + + test("disjunctive correlated scalar subquery") { + checkAnswer( + sql(""" + |select a + |from l + |where (select count(*) + | from r + | where (a = c and d = 2.0) or (a = c and d = 1.0)) > 0 + """.stripMargin), + Row(3) :: Nil) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
