This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 593b423 [SPARK-31958][SQL] normalize special floating numbers in subquery 593b423 is described below commit 593b42323255f44b268ca5148f1fa817f3c01de7 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Thu Jun 11 06:39:14 2020 +0000 [SPARK-31958][SQL] normalize special floating numbers in subquery ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/23388 . https://github.com/apache/spark/pull/23388 has an issue: it doesn't handle subquery expressions and assumes they will be turned into joins. However, this is not true for non-correlated subquery expressions. This PR fixes this issue. It now doesn't skip `Subquery`, and subquery expressions will be handled by `OptimizeSubqueries`, which runs the optimizer with the subquery. Note that, correlated subquery expressions will be handled twice: once in `OptimizeSubqueries`, once later when it becomes join. This is OK as `NormalizeFloatingNumbers` is idempotent now. ### Why are the changes needed? fix a bug ### Does this PR introduce _any_ user-facing change? yes, see the newly added test. ### How was this patch tested? new test Closes #28785 from cloud-fan/normalize. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 6fb9c80da129d0b43f9ff5b8be6ce8bad992a4ed) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../catalyst/optimizer/NormalizeFloatingNumbers.scala | 4 ---- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala index 5f94af5..4373820 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala @@ -56,10 +56,6 @@ import org.apache.spark.sql.types._ object NormalizeFloatingNumbers extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan match { - // A subquery will be rewritten into join later, and will go through this rule - // eventually. Here we skip subquery, as we only need to run this rule once. - case _: Subquery => plan - case _ => plan transform { case w: Window if w.partitionSpec.exists(p => needNormalize(p)) => // Although the `windowExpressions` may refer to `partitionSpec` expressions, we don't need diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index a23e583..093f2db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3449,6 +3449,24 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer(sql("select CAST(-32768 as short) DIV CAST (-1 as short)"), Seq(Row(Short.MinValue.toLong * -1))) } + + test("normalize special floating numbers in subquery") { + withTempView("v1", "v2", "v3") { + Seq(-0.0).toDF("d").createTempView("v1") + Seq(0.0).toDF("d").createTempView("v2") + spark.range(2).createTempView("v3") + + // non-correlated subquery + checkAnswer(sql("SELECT (SELECT v1.d FROM v1 JOIN v2 ON v1.d = v2.d)"), Row(-0.0)) + // correlated subquery + checkAnswer( + sql( + """ + |SELECT id FROM v3 WHERE EXISTS + | (SELECT v1.d FROM v1 JOIN v2 ON v1.d = v2.d WHERE id > 0) + |""".stripMargin), Row(1)) + } + } } case class Foo(bar: Option[String]) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org