This is an automated email from the ASF dual-hosted git repository.

cloud-fan pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new bf3e79306aa2 [SPARK-57005][SQL] Fix None.get in 
RewritePredicateSubquery when subquery predicates are eliminated
bf3e79306aa2 is described below

commit bf3e79306aa295f5e2c4b90a2553847a0ebe691d
Author: Anupam Yadav <[email protected]>
AuthorDate: Thu May 28 20:03:05 2026 +0800

    [SPARK-57005][SQL] Fix None.get in RewritePredicateSubquery when subquery 
predicates are eliminated
    
    ### What changes were proposed in this pull request?
    
    Guard `joinCond.get` with `isDefined` check in 
`rewriteDomainJoinsIfPresent`.
    
    ### Why are the changes needed?
    
    When constant folding eliminates all correlated predicates in a subquery 
(e.g., `WHERE FALSE AND correlated_pred`), `rewriteExistentialExpr` returns 
`None` for `joinCond`. The subsequent call to `rewriteDomainJoinsIfPresent` 
then crashes with `NoSuchElementException: None.get`.
    
    Repro:
    ```sql
    SELECT t0.value FROM VALUES (CAST(0 AS TINYINT)) AS t0(value)
    WHERE NOT EXISTS (
      SELECT t1.value FROM VALUES (CAST(0 AS TINYINT)) AS t1(value)
      WHERE FALSE AND (t0.value = t1.value)
    )
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes -- queries with EXISTS/NOT EXISTS subqueries where all correlated 
predicates are eliminated by constant folding no longer crash.
    
    ### How was this patch tested?
    
    Added test in `RewriteSubquerySuite` that verifies the optimizer rule does 
not crash (no `NoSuchElementException`) when correlated predicates are 
eliminated. The test applies optimizer rules directly at the catalyst level via 
`Optimize.execute(plan.analyze)`.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes.
    
    Closes #56077 from yadavay-amzn/fix/SPARK-57005-none-get-subquery.
    
    Authored-by: Anupam Yadav <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit b133cdc681451682cd8226aca56c0a1cff1eb9ce)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/optimizer/subquery.scala    |  7 +++-
 .../catalyst/optimizer/RewriteSubquerySuite.scala  | 40 +++++++++++++++++++++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 378081221c8c..bf3e63571dbb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -71,10 +71,15 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] 
with PredicateHelper {
       outerPlan: LogicalPlan,
       sub: LogicalPlan,
       joinCond: Option[Expression]): LogicalPlan = {
-    if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn) {
+    if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn && 
joinCond.isDefined) {
       DecorrelateInnerQuery.rewriteDomainJoins(outerPlan,
         sub, splitConjunctivePredicates(joinCond.get))
     } else {
+      // After PullupCorrelatedPredicates, BooleanSimplification can eliminate 
every
+      // correlated predicate in the subquery (e.g., FALSE AND outer.x = 
inner.x -> FALSE),
+      // leaving joinCond as None. In this case, no DomainJoin exists in sub 
because both
+      // joinCond and DomainJoins are produced by the same 
DecorrelateInnerQuery call --
+      // if there's no join condition, there are no domain joins to rewrite.
       sub
     }
   }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
index c45a761353c8..dca1d503e3fd 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Cast, IsNull, ListQuery, Not}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Exists, IsNull, 
ListQuery, Literal, Not}
 import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, LeftSemi, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -96,4 +96,42 @@ class RewriteSubquerySuite extends PlanTest {
       .select($"exists".as("(sum(col2) IN (listquery()))")).analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-57005: No None.get when correlated predicates are eliminated") {
+    // When BooleanSimplification in PullupCorrelatedPredicates eliminates all 
correlated
+    // predicates (e.g., FALSE AND correlated_pred -> FALSE), the Exists node 
ends up with
+    // outerAttrs non-empty but joinCond empty. RewritePredicateSubquery must 
handle this.
+    object OptimizeWithPullup extends RuleExecutor[LogicalPlan] {
+      val batches =
+        Batch("Pullup Correlated Expressions", Once,
+          PullupCorrelatedPredicates) ::
+        Batch("Rewrite Subquery", FixedPoint(1),
+          RewritePredicateSubquery,
+          PruneFilters,
+          PropagateEmptyRelation,
+          ColumnPruning,
+          CollapseProject,
+          RemoveNoopOperators) :: Nil
+    }
+
+    val outer = LocalRelation($"a".int, $"b".int)
+    val inner = LocalRelation($"x".int, $"y".int)
+
+    // NOT EXISTS with FALSE AND correlated_pred: subquery is always empty,
+    // so NOT EXISTS is always true and the filter is eliminated.
+    // Since outer is an empty LocalRelation, the result is also empty.
+    val notExistsQuery = outer.where(
+      Not(Exists(inner.where(Literal.FalseLiteral && $"a" === 
$"x")))).select($"a")
+    val notExistsOptimized = OptimizeWithPullup.execute(notExistsQuery.analyze)
+    val notExistsExpected = 
LocalRelation(notExistsQuery.analyze.output).analyze
+    comparePlans(notExistsOptimized, notExistsExpected)
+
+    // EXISTS with FALSE AND correlated_pred: subquery is always empty,
+    // so EXISTS is always false and no rows pass the filter.
+    val existsQuery = outer.where(
+      Exists(inner.where(Literal.FalseLiteral && $"a" === $"x"))).select($"a")
+    val existsOptimized = OptimizeWithPullup.execute(existsQuery.analyze)
+    val existsExpected = LocalRelation(existsQuery.analyze.output).analyze
+    comparePlans(existsOptimized, existsExpected)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to