(spark) branch master updated: [SPARK-51614][SQL] Introduce ResolveUnresolvedHaving rule in the Analyzer

wenchen Fri, 28 Mar 2025 08:51:38 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 0895d195f77c [SPARK-51614][SQL] Introduce ResolveUnresolvedHaving rule 
in the Analyzer
0895d195f77c is described below

commit 0895d195f77cf3d46b8bb45077d6376c1068539c
Author: mihailoale-db <[email protected]>
AuthorDate: Fri Mar 28 23:40:17 2025 +0800

    [SPARK-51614][SQL] Introduce ResolveUnresolvedHaving rule in the Analyzer
    
    ### What changes were proposed in this pull request?
    Introduce `ResolveUnresolvedHaving` rule in the Analyzer to handle 
`UnresolvedHaving` nodes with resolved `condition` and `child`.
    
    ### Why are the changes needed?
    Fix the case to work.
    
    ### Does this PR introduce _any_ user-facing change?
    Queries that failed now pass.
    
    ### How was this patch tested?
    Added test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50409 from mihailoale-db/unresolvedhavingwithgenerate.
    
    Authored-by: mihailoale-db <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  | 18 +++++++++++++++++-
 .../spark/sql/catalyst/rules/RuleIdCollection.scala    |  1 +
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala     | 13 +++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 91451795cc42..c22d28a0cd5b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -460,7 +460,8 @@ class Analyzer(override val catalogManager: CatalogManager) 
extends RuleExecutor
     Batch("HandleSpecialCommand", Once,
       HandleSpecialCommand),
     Batch("Remove watermark for batch query", Once,
-      EliminateEventTimeWatermark)
+      EliminateEventTimeWatermark),
+    Batch("ResolveUnresolvedHaving", Once, ResolveUnresolvedHaving)
   )
 
   /**
@@ -4206,3 +4207,18 @@ object RemoveTempResolvedColumn extends 
Rule[LogicalPlan] {
     }
   }
 }
+
+/**
+ * Rule that's used to handle `UnresolvedHaving` nodes with resolved 
`condition` and `child`.
+ * It's placed outside the main batch to avoid conflicts with other rules that 
resolve
+ * `UnresolvedHaving` in the main batch.
+ */
+object ResolveUnresolvedHaving extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveOperatorsWithPruning(_.containsPattern(UNRESOLVED_HAVING), 
ruleId) {
+      case u @ UnresolvedHaving(havingCondition, child)
+        if havingCondition.resolved && child.resolved =>
+        Filter(condition = havingCondition, child = child)
+    }
+  }
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 8cf13168466f..dea7fc33b184 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -111,6 +111,7 @@ object RuleIdCollection {
       
"org.apache.spark.sql.catalyst.analysis.ResolveUpdateEventTimeWatermarkColumn" 
::
       "org.apache.spark.sql.catalyst.expressions.EliminatePipeOperators" ::
       
"org.apache.spark.sql.catalyst.expressions.ValidateAndStripPipeExpressions" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveUnresolvedHaving" ::
       // Catalyst Optimizer rules
       "org.apache.spark.sql.catalyst.optimizer.BooleanSimplification" ::
       "org.apache.spark.sql.catalyst.optimizer.CollapseProject" ::
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3147a60bad5e..ce6440e90103 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4943,6 +4943,19 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
       Row(Array(0), Array(0)), Row(Array(1), Array(1)), Row(Array(2), 
Array(2)))
     checkAnswer(df, expectedAnswer)
   }
+
+  test("SPARK-51614: Having operator is properly resolved when there's 
generator in condition") {
+    val df = sql(
+      """select
+        |  explode(packages) as package
+        |from
+        |  values(array('a')) t(packages)
+        |group by all
+        |having package in ('a')""".stripMargin
+    )
+
+    checkAnswer(df, Row("a"))
+  }
 }
 
 case class Foo(bar: Option[String])


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-51614][SQL] Introduce ResolveUnresolvedHaving rule in the Analyzer

Reply via email to