This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c8cdeeca9236 [SPARK-51751][SQL] Fix multiple rCTEs for one WITH
statement that reference each other
c8cdeeca9236 is described below
commit c8cdeeca92364986d9a133bd063b88d1cbd49bc8
Author: Pavle Martinovic <[email protected]>
AuthorDate: Thu Apr 10 10:27:23 2025 +0800
[SPARK-51751][SQL] Fix multiple rCTEs for one WITH statement that reference
each other
### What changes were proposed in this pull request?
Add a check to UnionLoopExec that the UnionLoopRef in its subtree is
referring to the correct query. This is important since if we call another rCTE
from it, it will also have a UnionLoopRef which needs to not be replaced with
the reference.
### Why are the changes needed?
Multiple rCTEs within one WITH statement ended in an infinite recursion.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New golden file test added.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #50544 from Pajaraja/pavle-martinovic_data/multiplerctesfix.
Lead-authored-by: Pavle Martinovic
<[email protected]>
Co-authored-by: pavle-martinovic_data <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/execution/UnionLoopExec.scala | 2 +-
.../analyzer-results/cte-recursion.sql.out | 44 ++++++++++++++++++++++
.../resources/sql-tests/inputs/cte-recursion.sql | 15 +++++++-
.../sql-tests/results/cte-recursion.sql.out | 20 ++++++++++
4 files changed, 79 insertions(+), 2 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
index 561f9cd72efb..85c7a57467b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnionLoopExec.scala
@@ -188,7 +188,7 @@ case class UnionLoopExec(
// This way we support only UNION ALL case. Additional case should be
added for UNION case.
// One way of supporting UNION case can be seen at SPARK-24497 PR from
Peter Toth.
val newRecursion = recursion.transform {
- case r: UnionLoopRef =>
+ case r: UnionLoopRef if r.loopId == loopId =>
val logicalPlan = prevDF.logicalPlan
val optimizedPlan = prevDF.queryExecution.optimizedPlan
val (stats, constraints) = rewriteStatsAndConstraints(logicalPlan,
optimizedPlan)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
index 88f3f675c87b..33e486e6dc88 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
@@ -1256,3 +1256,47 @@ WithCTE
+- Project [id#x, xid#x]
+- SubqueryAlias t
+- CTERelationRef xxxx, true, [id#x, xid#x], false, false
+
+
+-- !query
+WITH RECURSIVE t1(a, b) AS (
+ SELECT 1, 1
+ UNION ALL
+ SELECT a + b, a FROM t1 WHERE a < 20
+),
+t2(n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1 FROM t2, t1 WHERE n + 1 = a
+)
+SELECT * FROM t2
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias t1
+: +- Project [1#x AS a#x, 1#x AS b#x]
+: +- UnionLoop xxxx
+: :- Project [1 AS 1#x, 1 AS 1#x]
+: : +- OneRowRelation
+: +- Project [(a#x + b#x) AS (a + b)#x, a#x]
+: +- Filter (a#x < 20)
+: +- SubqueryAlias t1
+: +- Project [1#x AS a#x, 1#x AS b#x]
+: +- UnionLoopRef xxxx, [1#x, 1#x], false
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias t2
+: +- Project [1#x AS n#x]
+: +- UnionLoop xxxx
+: :- Project [1 AS 1#x]
+: : +- OneRowRelation
+: +- Project [(n#x + 1) AS (n + 1)#x]
+: +- Filter ((n#x + 1) = a#x)
+: +- Join Inner
+: :- SubqueryAlias t2
+: : +- Project [1#x AS n#x]
+: : +- UnionLoopRef xxxx, [1#x], false
+: +- SubqueryAlias t1
+: +- CTERelationRef xxxx, true, [a#x, b#x], false, false
++- Project [n#x]
+ +- SubqueryAlias t2
+ +- CTERelationRef xxxx, true, [n#x], false, false
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
index 2780462478b0..c45f62196430 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
@@ -486,4 +486,17 @@ WITH RECURSIVE
UNION ALL
SELECT t.id + 1, xid * 10 + x.id FROM t CROSS JOIN x WHERE t.id < 3
)
-SELECT * FROM t
\ No newline at end of file
+SELECT * FROM t;
+
+-- rCTE referencing other rCTE
+WITH RECURSIVE t1(a, b) AS (
+ SELECT 1, 1
+ UNION ALL
+ SELECT a + b, a FROM t1 WHERE a < 20
+),
+t2(n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1 FROM t2, t1 WHERE n + 1 = a
+)
+SELECT * FROM t2;
\ No newline at end of file
diff --git
a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
index 90762f81fd51..cc4e01e11ca6 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
@@ -1178,3 +1178,23 @@ struct<id:int,xid:int>
3 212
3 221
3 222
+
+
+-- !query
+WITH RECURSIVE t1(a, b) AS (
+ SELECT 1, 1
+ UNION ALL
+ SELECT a + b, a FROM t1 WHERE a < 20
+),
+t2(n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1 FROM t2, t1 WHERE n + 1 = a
+)
+SELECT * FROM t2
+-- !query schema
+struct<n:int>
+-- !query output
+1
+2
+3
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]