This is an automated email from the ASF dual-hosted git repository.
cloud-fan pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.2 by this push:
new 78c9adf2703b [SPARK-56814][SQL][TESTS] Add lateral join tests for
outer attribute visibility after NATURAL/USING JOIN
78c9adf2703b is described below
commit 78c9adf2703b7a12e3c9042e7a8a87a101821290
Author: Mihailo Timotic <[email protected]>
AuthorDate: Mon May 11 20:39:34 2026 +0800
[SPARK-56814][SQL][TESTS] Add lateral join tests for outer attribute
visibility after NATURAL/USING JOIN
## What changes were proposed in this pull request?
Add SQL query test cases to join-lateral.sql covering lateral join outer
attribute visibility after NATURAL JOIN and USING JOIN. Specifically:
1. Lateral after NATURAL JOIN -- unqualified key: verifies the merged
join key resolves correctly in the lateral subquery.
2. Lateral after NATURAL JOIN -- qualified keys: verifies t1.k and t2.k
resolve to the original pre-merge columns.
3. Lateral after USING JOIN -- qualified keys: same as above but with
explicit USING (k) syntax.
4. Lateral cannot see column hidden by subquery alias: verifies that a
column not in the subquery's output (v1 behind SELECT k FROM ... ORDER BY v1)
is not visible to the lateral subquery.
5. Lateral cannot see column not in GROUP BY output: verifies that a
column dropped by GROUP BY projection (v1 in SELECT k FROM ... GROUP BY k) is
not
visible to the lateral subquery.
## Why are the changes needed?
The existing join-lateral.sql tests cover NATURAL JOIN and USING JOIN with
the lateral subquery itself (lines 28-29), but do not test lateral joins
chained after a NATURAL/USING JOIN -- i.e., whether the merged/qualified/hidden
columns from the left side are correctly visible or hidden across the lateral
boundary. These tests close that gap.
## Does this PR introduce any user-facing change?
No.
## How was this patch tested?
Added test cases
## Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (claude-opus-4-6)
Closes #55794 from
mihailotim-db/mihailo-timotic_data/lateral-join-outer-attr-visibility.
Authored-by: Mihailo Timotic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 52daa61338998b915b51e2fd221a82087beea26d)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../analyzer-results/join-lateral.sql.out | 130 +++++++++++++++++++++
.../resources/sql-tests/inputs/join-lateral.sql | 30 +++++
.../sql-tests/results/join-lateral.sql.out | 81 +++++++++++++
3 files changed, 241 insertions(+)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
index 4666e62b2d69..cb46c265512f 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
@@ -3064,6 +3064,136 @@ Project [1 AS 1#x]
+- LocalRelation [col1#x, col2#x]
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+ nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k)
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias nj1
+: +- Project [col1#x AS k#x, col2#x AS v1#x]
+: +- LocalRelation [col1#x, col2#x]
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias nj2
+: +- Project [col1#x AS k#x, col2#x AS v2#x]
+: +- LocalRelation [col1#x, col2#x]
++- Project [k#x, v1#x, v2#x, unq_k#x]
+ +- LateralJoin lateral-subquery#x [k#x], Inner
+ : +- SubqueryAlias __auto_generated_subquery_name
+ : +- Project [outer(k#x) AS unq_k#x]
+ : +- OneRowRelation
+ +- Project [k#x, v1#x, v2#x]
+ +- Join Inner, (k#x = k#x)
+ :- SubqueryAlias nj1
+ : +- CTERelationRef xxxx, true, [k#x, v1#x], false, false, 1
+ +- SubqueryAlias nj2
+ +- CTERelationRef xxxx, true, [k#x, v2#x], false, false, 1
+
+
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+ nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k, nj1.k AS qual_nj1k, nj2.k AS qual_nj2k)
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias nj1
+: +- Project [col1#x AS k#x, col2#x AS v1#x]
+: +- LocalRelation [col1#x, col2#x]
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias nj2
+: +- Project [col1#x AS k#x, col2#x AS v2#x]
+: +- LocalRelation [col1#x, col2#x]
++- Project [k#x, v1#x, v2#x, unq_k#x, qual_nj1k#x, qual_nj2k#x]
+ +- Project [k#x, v1#x, v2#x, unq_k#x, qual_nj1k#x, qual_nj2k#x]
+ +- LateralJoin lateral-subquery#x [k#x && k#x && k#x], Inner
+ : +- SubqueryAlias __auto_generated_subquery_name
+ : +- Project [outer(k#x) AS unq_k#x, outer(k#x) AS qual_nj1k#x,
outer(k#x) AS qual_nj2k#x]
+ : +- OneRowRelation
+ +- Project [k#x, v1#x, v2#x, k#x]
+ +- Join Inner, (k#x = k#x)
+ :- SubqueryAlias nj1
+ : +- CTERelationRef xxxx, true, [k#x, v1#x], false, false, 1
+ +- SubqueryAlias nj2
+ +- CTERelationRef xxxx, true, [k#x, v2#x], false, false, 1
+
+
+-- !query
+WITH uj1(k, v1) AS (VALUES (1, 'a')),
+ uj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM uj1 JOIN uj2 USING (k),
+LATERAL (SELECT k AS unq_k, uj1.k AS qual_uj1k, uj2.k AS qual_uj2k)
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias uj1
+: +- Project [col1#x AS k#x, col2#x AS v1#x]
+: +- LocalRelation [col1#x, col2#x]
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias uj2
+: +- Project [col1#x AS k#x, col2#x AS v2#x]
+: +- LocalRelation [col1#x, col2#x]
++- Project [k#x, v1#x, v2#x, unq_k#x, qual_uj1k#x, qual_uj2k#x]
+ +- Project [k#x, v1#x, v2#x, unq_k#x, qual_uj1k#x, qual_uj2k#x]
+ +- LateralJoin lateral-subquery#x [k#x && k#x && k#x], Inner
+ : +- SubqueryAlias __auto_generated_subquery_name
+ : +- Project [outer(k#x) AS unq_k#x, outer(k#x) AS qual_uj1k#x,
outer(k#x) AS qual_uj2k#x]
+ : +- OneRowRelation
+ +- Project [k#x, v1#x, v2#x, k#x]
+ +- Join Inner, (k#x = k#x)
+ :- SubqueryAlias uj1
+ : +- CTERelationRef xxxx, true, [k#x, v1#x], false, false, 1
+ +- SubqueryAlias uj2
+ +- CTERelationRef xxxx, true, [k#x, v2#x], false, false, 1
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'))
+SELECT * FROM (SELECT k FROM cte1 ORDER BY v1) sub,
+LATERAL (SELECT v1 AS leaked)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`v1`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 107,
+ "stopIndex" : 108,
+ "fragment" : "v1"
+ } ]
+}
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'), (2, 'b'), (3, 'c'))
+SELECT * FROM (SELECT k FROM cte1 GROUP BY k) g,
+LATERAL (SELECT v1 AS leaked)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`v1`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 124,
+ "stopIndex" : 125,
+ "fragment" : "v1"
+ } ]
+}
+
+
-- !query
DROP VIEW t1
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
index e3cef9207d20..8a71afb38a76 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
@@ -552,6 +552,36 @@ left join
order by t_inner.b1,t_inner.b2 desc limit 1
) as lateral_table;
+-- lateral join after NATURAL/USING JOIN: outer attribute visibility
+
+-- lateral after NATURAL JOIN: unqualified key resolves to the merged column
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+ nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k);
+
+-- lateral after NATURAL JOIN: qualified keys resolve to original columns
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+ nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k, nj1.k AS qual_nj1k, nj2.k AS qual_nj2k);
+
+-- lateral after USING JOIN: unqualified and qualified keys
+WITH uj1(k, v1) AS (VALUES (1, 'a')),
+ uj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM uj1 JOIN uj2 USING (k),
+LATERAL (SELECT k AS unq_k, uj1.k AS qual_uj1k, uj2.k AS qual_uj2k);
+
+-- lateral cannot see column hidden by a subquery alias
+WITH cte1(k, v1) AS (VALUES (1, 'a'))
+SELECT * FROM (SELECT k FROM cte1 ORDER BY v1) sub,
+LATERAL (SELECT v1 AS leaked);
+
+-- lateral cannot see column not in GROUP BY output
+WITH cte1(k, v1) AS (VALUES (1, 'a'), (2, 'b'), (3, 'c'))
+SELECT * FROM (SELECT k FROM cte1 GROUP BY k) g,
+LATERAL (SELECT v1 AS leaked);
+
-- clean up
DROP VIEW t1;
DROP VIEW t2;
diff --git a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
index 11bafb2cf63c..b8af8dfea221 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
@@ -1905,6 +1905,87 @@ struct<1:int>
1
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+ nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k)
+-- !query schema
+struct<k:int,v1:string,v2:string,unq_k:int>
+-- !query output
+1 a b 1
+
+
+-- !query
+WITH nj1(k, v1) AS (VALUES (1, 'a')),
+ nj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM nj1 NATURAL JOIN nj2,
+LATERAL (SELECT k AS unq_k, nj1.k AS qual_nj1k, nj2.k AS qual_nj2k)
+-- !query schema
+struct<k:int,v1:string,v2:string,unq_k:int,qual_nj1k:int,qual_nj2k:int>
+-- !query output
+1 a b 1 1 1
+
+
+-- !query
+WITH uj1(k, v1) AS (VALUES (1, 'a')),
+ uj2(k, v2) AS (VALUES (1, 'b'))
+SELECT * FROM uj1 JOIN uj2 USING (k),
+LATERAL (SELECT k AS unq_k, uj1.k AS qual_uj1k, uj2.k AS qual_uj2k)
+-- !query schema
+struct<k:int,v1:string,v2:string,unq_k:int,qual_uj1k:int,qual_uj2k:int>
+-- !query output
+1 a b 1 1 1
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'))
+SELECT * FROM (SELECT k FROM cte1 ORDER BY v1) sub,
+LATERAL (SELECT v1 AS leaked)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`v1`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 107,
+ "stopIndex" : 108,
+ "fragment" : "v1"
+ } ]
+}
+
+
+-- !query
+WITH cte1(k, v1) AS (VALUES (1, 'a'), (2, 'b'), (3, 'c'))
+SELECT * FROM (SELECT k FROM cte1 GROUP BY k) g,
+LATERAL (SELECT v1 AS leaked)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`v1`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 124,
+ "stopIndex" : 125,
+ "fragment" : "v1"
+ } ]
+}
+
+
-- !query
DROP VIEW t1
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]