This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 53c99a4270e [SPARK-42108][SQL] Make Analyzer transform `Count(*)` into
`Count(1)`
53c99a4270e is described below
commit 53c99a4270eeaa431e18cfd7f053d8e0ae98734d
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Jan 19 10:25:07 2023 +0800
[SPARK-42108][SQL] Make Analyzer transform `Count(*)` into `Count(1)`
### What changes were proposed in this pull request?
Make Analyzer transform `Count(*)` into `Count(1)`
### Why are the changes needed?
Existing `Count(*) -> Count(1)` transformation happens in
[`AstBuilder.visitFunctionCall`](https://github.com/apache/spark/blob/97a6955278c55fa02cb9f039ae45e49e6f0f2bfd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala#L2105-L2132).
The Analyzer requires the `Count(*)` had already been converted to
`Count(1)` in Parser, and for a given `Count(*)` expression, the Analyzer
itself can not correctly handle it and cause correctness issue in Spark Connect
(see https://issues.apache.org/jira/browse/SPARK-41845)
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
added UT, manually test with Spark Connect
Closes #39636 from zhengruifeng/sql_move_count_star.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 +++++
.../spark/sql/catalyst/analysis/AnalysisSuite.scala | 18 ++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ba2c2759e2d..f0c22471afa 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1953,6 +1953,11 @@ class Analyzer(override val catalogManager:
CatalogManager)
*/
def expandStarExpression(expr: Expression, child: LogicalPlan): Expression
= {
expr.transformUp {
+ case f0: UnresolvedFunction if !f0.isDistinct &&
+ f0.nameParts.map(_.toLowerCase(Locale.ROOT)) == Seq("count") &&
+ f0.arguments == Seq(UnresolvedStar(None)) =>
+ // Transform COUNT(*) into COUNT(1).
+ f0.copy(nameParts = Seq("count"), arguments = Seq(Literal(1)))
case f1: UnresolvedFunction if containsStar(f1.arguments) =>
// SPECIAL CASE: We want to block count(tblName.*) because in spark,
count(tblName.*) will
// be expanded while count(*) will be converted to count(1). They
will produce different
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index a91a0a44dd3..6dfbf12bbd7 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -120,6 +120,24 @@ class AnalysisSuite extends AnalysisTest with Matchers {
caseSensitive = false)
}
+ test("SPARK-42108: transform count(*) to count(1)") {
+ val a = testRelation.output(0)
+
+ checkAnalysis(
+ Project(
+ Alias(UnresolvedFunction("count" :: Nil,
+ UnresolvedStar(None) :: Nil, isDistinct = false), "x")() :: Nil,
testRelation),
+ Aggregate(Nil, count(Literal(1)).as("x") :: Nil, testRelation))
+
+ checkAnalysis(
+ Project(
+ Alias(UnresolvedFunction("count" :: Nil,
+ UnresolvedStar(None) :: Nil, isDistinct = false), "x")() ::
+ Alias(UnresolvedFunction("count" :: Nil,
+ UnresolvedAttribute("a") :: Nil, isDistinct = false), "y")() ::
Nil, testRelation),
+ Aggregate(Nil, count(Literal(1)).as("x") :: count(a).as("y") :: Nil,
testRelation))
+ }
+
test("resolve sort references - filter/limit") {
val a = testRelation2.output(0)
val b = testRelation2.output(1)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]