This is an automated email from the ASF dual-hosted git repository.
cloud-fan pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.2 by this push:
new a3ceb143d1d6 [SPARK-57005][SQL] Fix None.get in
RewritePredicateSubquery when subquery predicates are eliminated
a3ceb143d1d6 is described below
commit a3ceb143d1d6522668a1b14403630f8791566ec7
Author: Anupam Yadav <[email protected]>
AuthorDate: Thu May 28 20:03:05 2026 +0800
[SPARK-57005][SQL] Fix None.get in RewritePredicateSubquery when subquery
predicates are eliminated
### What changes were proposed in this pull request?
Guard `joinCond.get` with `isDefined` check in
`rewriteDomainJoinsIfPresent`.
### Why are the changes needed?
When constant folding eliminates all correlated predicates in a subquery
(e.g., `WHERE FALSE AND correlated_pred`), `rewriteExistentialExpr` returns
`None` for `joinCond`. The subsequent call to `rewriteDomainJoinsIfPresent`
then crashes with `NoSuchElementException: None.get`.
Repro:
```sql
SELECT t0.value FROM VALUES (CAST(0 AS TINYINT)) AS t0(value)
WHERE NOT EXISTS (
SELECT t1.value FROM VALUES (CAST(0 AS TINYINT)) AS t1(value)
WHERE FALSE AND (t0.value = t1.value)
)
```
### Does this PR introduce _any_ user-facing change?
Yes -- queries with EXISTS/NOT EXISTS subqueries where all correlated
predicates are eliminated by constant folding no longer crash.
### How was this patch tested?
Added test in `RewriteSubquerySuite` that verifies the optimizer rule does
not crash (no `NoSuchElementException`) when correlated predicates are
eliminated. The test applies optimizer rules directly at the catalyst level via
`Optimize.execute(plan.analyze)`.
### Was this patch authored or co-authored using generative AI tooling?
Yes.
Closes #56077 from yadavay-amzn/fix/SPARK-57005-none-get-subquery.
Authored-by: Anupam Yadav <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit b133cdc681451682cd8226aca56c0a1cff1eb9ce)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/optimizer/subquery.scala | 7 +++-
.../catalyst/optimizer/RewriteSubquerySuite.scala | 40 +++++++++++++++++++++-
2 files changed, 45 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 378081221c8c..bf3e63571dbb 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -71,10 +71,15 @@ object RewritePredicateSubquery extends Rule[LogicalPlan]
with PredicateHelper {
outerPlan: LogicalPlan,
sub: LogicalPlan,
joinCond: Option[Expression]): LogicalPlan = {
- if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn) {
+ if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn &&
joinCond.isDefined) {
DecorrelateInnerQuery.rewriteDomainJoins(outerPlan,
sub, splitConjunctivePredicates(joinCond.get))
} else {
+ // After PullupCorrelatedPredicates, BooleanSimplification can eliminate
every
+ // correlated predicate in the subquery (e.g., FALSE AND outer.x =
inner.x -> FALSE),
+ // leaving joinCond as None. In this case, no DomainJoin exists in sub
because both
+ // joinCond and DomainJoins are produced by the same
DecorrelateInnerQuery call --
+ // if there's no join condition, there are no domain joins to rewrite.
sub
}
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
index c45a761353c8..dca1d503e3fd 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
import org.apache.spark.sql.catalyst.QueryPlanningTracker
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Cast, IsNull, ListQuery, Not}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Exists, IsNull,
ListQuery, Literal, Not}
import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, LeftSemi, PlanTest}
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -96,4 +96,42 @@ class RewriteSubquerySuite extends PlanTest {
.select($"exists".as("(sum(col2) IN (listquery()))")).analyze
comparePlans(optimized, correctAnswer)
}
+
+ test("SPARK-57005: No None.get when correlated predicates are eliminated") {
+ // When BooleanSimplification in PullupCorrelatedPredicates eliminates all
correlated
+ // predicates (e.g., FALSE AND correlated_pred -> FALSE), the Exists node
ends up with
+ // outerAttrs non-empty but joinCond empty. RewritePredicateSubquery must
handle this.
+ object OptimizeWithPullup extends RuleExecutor[LogicalPlan] {
+ val batches =
+ Batch("Pullup Correlated Expressions", Once,
+ PullupCorrelatedPredicates) ::
+ Batch("Rewrite Subquery", FixedPoint(1),
+ RewritePredicateSubquery,
+ PruneFilters,
+ PropagateEmptyRelation,
+ ColumnPruning,
+ CollapseProject,
+ RemoveNoopOperators) :: Nil
+ }
+
+ val outer = LocalRelation($"a".int, $"b".int)
+ val inner = LocalRelation($"x".int, $"y".int)
+
+ // NOT EXISTS with FALSE AND correlated_pred: subquery is always empty,
+ // so NOT EXISTS is always true and the filter is eliminated.
+ // Since outer is an empty LocalRelation, the result is also empty.
+ val notExistsQuery = outer.where(
+ Not(Exists(inner.where(Literal.FalseLiteral && $"a" ===
$"x")))).select($"a")
+ val notExistsOptimized = OptimizeWithPullup.execute(notExistsQuery.analyze)
+ val notExistsExpected =
LocalRelation(notExistsQuery.analyze.output).analyze
+ comparePlans(notExistsOptimized, notExistsExpected)
+
+ // EXISTS with FALSE AND correlated_pred: subquery is always empty,
+ // so EXISTS is always false and no rows pass the filter.
+ val existsQuery = outer.where(
+ Exists(inner.where(Literal.FalseLiteral && $"a" === $"x"))).select($"a")
+ val existsOptimized = OptimizeWithPullup.execute(existsQuery.analyze)
+ val existsExpected = LocalRelation(existsQuery.analyze.output).analyze
+ comparePlans(existsOptimized, existsExpected)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]