This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6d02e241f55c [SPARK-54687][SQL] Add more edge cases with generators
6d02e241f55c is described below
commit 6d02e241f55cff79bba85d5f011bdd7aaea994d8
Author: Mikhail Nikoliukin <[email protected]>
AuthorDate: Tue Mar 3 02:57:32 2026 +0800
[SPARK-54687][SQL] Add more edge cases with generators
### What changes were proposed in this pull request?
Follow up on my previous pr https://github.com/apache/spark/pull/53447.
Found more obscure and strange cases so want to add them to golden files
### Why are the changes needed?
Better test coverage
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
run golden tests
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: claude 2.1.56
Closes #54487 from mikhailnik-db/add-even-more-generators-tests.
Authored-by: Mikhail Nikoliukin <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../generators-resolution-edge-cases.sql.out | 303 +++++++++++++++++++++
.../inputs/generators-resolution-edge-cases.sql | 82 ++++++
.../generators-resolution-edge-cases.sql.out | 301 ++++++++++++++++++++
.../apache/spark/sql/GeneratorFunctionSuite.scala | 187 +++++++++++++
4 files changed, 873 insertions(+)
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
index 5a87586b150a..0b21d6e6b85d 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
@@ -478,3 +478,306 @@ Project [col#x, arr#x]
+- Generate explode(arr#x), false, [col#x]
+- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
+- OneRowRelation
+
+
+-- !query
+SELECT col + 1 as col2, explode(array(1, 2, 3)) as col
+-- !query analysis
+Project [(col#x + 1) AS col2#x, col#x]
++- Generate explode(array(1, 2, 3)), false, [col#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT 1 AS (pos, val)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "MULTI_ALIAS_WITHOUT_GENERATOR",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "expr" : "\"1\"",
+ "names" : "pos, val"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 22,
+ "fragment" : "1 AS (pos, val)"
+ } ]
+}
+
+
+-- !query
+SELECT * FROM VALUES (1) ORDER BY explode(array(1, 2, 3))
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "plan" : "Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true"
+ }
+}
+
+
+-- !query
+SELECT * FROM VALUES (array(1, 2, 3)) t(arr) WHERE explode(arr) == 2
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "plan" : "'Filter (explode(arr#x) = 2)"
+ }
+}
+
+
+-- !query
+SELECT explode(packages) AS package
+FROM (VALUES(array('a', 'b'))) AS t(packages)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query analysis
+Filter package#x IN (a)
++- Project [package#x]
+ +- Generate explode(_gen_input_0#x), false, [package#x]
+ +- Aggregate [packages#x], [packages#x AS _gen_input_0#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS packages#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(collect_list(a)) AS package
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query analysis
+Filter package#x IN (a)
++- Project [package#x]
+ +- Generate explode(_gen_input_0#x), false, [package#x]
+ +- Aggregate [collect_list(a#x, 0, 0, true) AS _gen_input_0#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS a#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT stack(2, id * 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "MISSING_GROUP_BY",
+ "sqlState" : "42803",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 92,
+ "fragment" : "SELECT stack(2, id * 10L, count(val))\nFROM (VALUES (1,'a'),
(1,'b'), (2, 'c')) AS t(id, val)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ explode(array(1)) AS x1,
+ explode(array(t1.c1, t2.c1)) AS x2,
+ explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query analysis
+Project [x1#x, x2#x, x3#x]
++- Project [c1#x, x1#x, x2#x, x3#x]
+ +- Generate explode(array(c1#x)), false, [x3#x]
+ +- Project [c1#x, x1#x, x2#x, c1#x]
+ +- Generate explode(array(c1#x, c1#x)), false, [x2#x]
+ +- Generate explode(array(1)), false, [x1#x]
+ +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+ +- Join FullOuter, (c1#x = c1#x)
+ :- SubqueryAlias t1
+ : +- Project [col1#x AS c1#x]
+ : +- LocalRelation [col1#x]
+ +- SubqueryAlias t2
+ +- Project [col1#x AS c1#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT
+ explode(array(t1.c1)) AS x1,
+ explode(array(t1.c1, t2.c1)) AS x2,
+ explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query analysis
+Project [x1#x, x2#x, x3#x]
++- Project [c1#x, x1#x, x2#x, x3#x]
+ +- Generate explode(array(c1#x)), false, [x3#x]
+ +- Project [c1#x, x1#x, x2#x, c1#x]
+ +- Generate explode(array(c1#x, c1#x)), false, [x2#x]
+ +- Project [c1#x, x1#x, c1#x, c1#x]
+ +- Generate explode(array(c1#x)), false, [x1#x]
+ +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+ +- Join FullOuter, (c1#x = c1#x)
+ :- SubqueryAlias t1
+ : +- Project [col1#x AS c1#x]
+ : +- LocalRelation [col1#x]
+ +- SubqueryAlias t2
+ +- Project [col1#x AS c1#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(x1, t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+USING (c1)
+-- !query analysis
+Project [x1#x, x2#x]
++- Project [c1#x, x1#x, x2#x]
+ +- Generate explode(array(x1#x, c1#x)), false, [x2#x]
+ +- Project [c1#x, x1#x, c1#x]
+ +- Generate explode(array(c1#x, c1#x)), false, [x1#x]
+ +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+ +- Join FullOuter, (c1#x = c1#x)
+ :- SubqueryAlias t1
+ : +- Project [col1#x AS c1#x]
+ : +- LocalRelation [col1#x]
+ +- SubqueryAlias t2
+ +- Project [col1#x AS c1#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10, count(*)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10, count(*)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10
+-- !query analysis
+Project [pos#x, pos#x, val#x, (pos#x * 10) AS (lateralAliasReference(pos) *
10)#x]
++- Project [pos#x, val#x, 1 AS pos#x]
+ +- Generate posexplode(array(x, y)), false, [pos#x, val#x]
+ +- OneRowRelation
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10, count(*)
+-- !query analysis
+Project [pos#x, pos#x, val#x, (lateralAliasReference(pos) * 10)#x, count(1)#xL]
++- Generate posexplode(array(x, y)), false, [pos#x, val#x]
+ +- Project [pos#x, (pos#x * 10) AS (lateralAliasReference(pos) * 10)#x,
count(1)#xL AS count(1)#xL]
+ +- Project [count(1)#xL, 1 AS pos#x]
+ +- Aggregate [count(1) AS count(1)#xL]
+ +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col FROM (VALUES (42)) AS t(col)
+-- !query analysis
+Project [col#x, col#x]
++- Generate explode(array(10, 20)), false, [col#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS col#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col, count(*) FROM (VALUES (42)) AS
t(col) GROUP BY col
+-- !query analysis
+Project [col#x, col#x, count(1)#xL]
++- Generate explode(array(10, 20)), false, [col#x]
+ +- Aggregate [col#x], [col#x, count(1) AS count(1)#xL]
+ +- SubqueryAlias t
+ +- Project [col1#x AS col#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val FROM (VALUES (42))
AS t(pos)
+-- !query analysis
+Project [pos#x, val#x, pos#x, val#x]
++- Generate posexplode(array(x, y)), false, [pos#x, val#x]
+ +- SubqueryAlias t
+ +- Project [col1#x AS pos#x]
+ +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val, count(*) FROM
(VALUES (42)) AS t(pos) GROUP BY pos
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`val`",
+ "proposal" : "`pos`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 56,
+ "stopIndex" : 58,
+ "fragment" : "val"
+ } ]
+}
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
index 50ba6480efd9..67b5ae77c336 100644
---
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
+++
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
@@ -133,3 +133,85 @@ SELECT explode(array(array(0), array(1), array(2))) as
arr, explode(arr) as col;
-- generator LCA right-to-left should work
SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as
arr;
+
+-- generator output LCA right-to-left should fail (reference before definition)
+SELECT col + 1 as col2, explode(array(1, 2, 3)) as col;
+
+-- multi-alias on non-generator expression should fail
+SELECT 1 AS (pos, val);
+
+-- generator in ORDER BY should fail
+SELECT * FROM VALUES (1) ORDER BY explode(array(1, 2, 3));
+
+-- generator in WHERE should fail
+SELECT * FROM VALUES (array(1, 2, 3)) t(arr) WHERE explode(arr) == 2;
+
+-- generator with GROUP BY ALL and HAVING should work
+SELECT explode(packages) AS package
+FROM (VALUES(array('a', 'b'))) AS t(packages)
+GROUP BY ALL
+HAVING package IN ('a');
+
+-- generator with aggregate function and GROUP BY ALL and HAVING should work
+SELECT explode(collect_list(a)) AS package
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+HAVING package IN ('a');
+
+-- stack with mixed aggregate and non-aggregate children should fail without
GROUP BY
+SELECT stack(2, id * 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val);
+
+-- three generators with full outer join, one with constant array
+SELECT
+ explode(array(1)) AS x1,
+ explode(array(t1.c1, t2.c1)) AS x2,
+ explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1);
+
+-- three generators with full outer join, all using hidden attributes
+SELECT
+ explode(array(t1.c1)) AS x1,
+ explode(array(t1.c1, t2.c1)) AS x2,
+ explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1);
+
+-- explode with LCA reference to another generator alias
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(x1, t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+USING (c1);
+
+-- LCA and generator output share the same alias name: LCA defined first
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10;
+
+-- LCA and generator output share the same alias name: LCA defined first with
aggregate
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10, count(*);
+
+-- LCA and generator output share the same alias name: generator defined first
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10;
+
+-- LCA and generator output share the same alias name: generator defined first
with aggregate
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10, count(*);
+
+-- LCA and generator output share the same alias name with multi-alias
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10;
+
+-- LCA and generator output share the same alias name with multi-alias and
aggregate
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10, count(*);
+
+-- generator's output alias does not shadow table column
+SELECT explode(array(10, 20)) as col, col FROM (VALUES (42)) AS t(col);
+
+-- generator's output alias does not shadow table column with aggregate
+SELECT explode(array(10, 20)) as col, col, count(*) FROM (VALUES (42)) AS
t(col) GROUP BY col;
+
+-- generator's multi-alias does not shadow table column
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val FROM (VALUES (42))
AS t(pos);
+
+-- generator's multi-alias does not shadow table column with aggregate
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val, count(*) FROM
(VALUES (42)) AS t(pos) GROUP BY pos;
diff --git
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
index dfe39028ea01..7dc50cfb4aac 100644
---
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
@@ -480,3 +480,304 @@ struct<col:int,arr:array<int>>
0 [0]
1 [1]
2 [2]
+
+
+-- !query
+SELECT col + 1 as col2, explode(array(1, 2, 3)) as col
+-- !query schema
+struct<col2:int,col:int>
+-- !query output
+2 1
+3 2
+4 3
+
+
+-- !query
+SELECT 1 AS (pos, val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "MULTI_ALIAS_WITHOUT_GENERATOR",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "expr" : "\"1\"",
+ "names" : "pos, val"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 22,
+ "fragment" : "1 AS (pos, val)"
+ } ]
+}
+
+
+-- !query
+SELECT * FROM VALUES (1) ORDER BY explode(array(1, 2, 3))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "plan" : "Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true"
+ }
+}
+
+
+-- !query
+SELECT * FROM VALUES (array(1, 2, 3)) t(arr) WHERE explode(arr) == 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+ "sqlState" : "42K0E",
+ "messageParameters" : {
+ "plan" : "'Filter (explode(arr#x) = 2)"
+ }
+}
+
+
+-- !query
+SELECT explode(packages) AS package
+FROM (VALUES(array('a', 'b'))) AS t(packages)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query schema
+struct<package:string>
+-- !query output
+a
+
+
+-- !query
+SELECT explode(collect_list(a)) AS package
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query schema
+struct<package:string>
+-- !query output
+a
+
+
+-- !query
+SELECT stack(2, id * 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "MISSING_GROUP_BY",
+ "sqlState" : "42803",
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 1,
+ "stopIndex" : 92,
+ "fragment" : "SELECT stack(2, id * 10L, count(val))\nFROM (VALUES (1,'a'),
(1,'b'), (2, 'c')) AS t(id, val)"
+ } ]
+}
+
+
+-- !query
+SELECT
+ explode(array(1)) AS x1,
+ explode(array(t1.c1, t2.c1)) AS x2,
+ explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query schema
+struct<x1:int,x2:int,x3:int>
+-- !query output
+1 1 NULL
+1 2 2
+1 2 2
+1 3 3
+1 NULL 3
+1 NULL NULL
+
+
+-- !query
+SELECT
+ explode(array(t1.c1)) AS x1,
+ explode(array(t1.c1, t2.c1)) AS x2,
+ explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query schema
+struct<x1:int,x2:int,x3:int>
+-- !query output
+1 1 NULL
+1 NULL NULL
+2 2 2
+2 2 2
+NULL 3 3
+NULL NULL 3
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(x1, t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+USING (c1)
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+1 1
+1 1
+2 2
+2 2
+2 2
+2 2
+3 3
+3 3
+3 3
+3 3
+4 4
+4 NULL
+NULL 1
+NULL NULL
+NULL NULL
+NULL NULL
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10, count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10, count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+ "sqlState" : "42702",
+ "messageParameters" : {
+ "n" : "2",
+ "name" : "`a`"
+ }
+}
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10
+-- !query schema
+struct<pos:int,pos:int,val:string,(lateralAliasReference(pos) * 10):int>
+-- !query output
+1 0 x 10
+1 1 y 10
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10, count(*)
+-- !query schema
+struct<pos:int,pos:int,val:string,(lateralAliasReference(pos) *
10):int,count(1):bigint>
+-- !query output
+1 0 x 10 1
+1 1 y 10 1
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col FROM (VALUES (42)) AS t(col)
+-- !query schema
+struct<col:int,col:int>
+-- !query output
+10 42
+20 42
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col, count(*) FROM (VALUES (42)) AS
t(col) GROUP BY col
+-- !query schema
+struct<col:int,col:int,count(1):bigint>
+-- !query output
+10 42 1
+20 42 1
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val FROM (VALUES (42))
AS t(pos)
+-- !query schema
+struct<pos:int,val:string,pos:int,val:string>
+-- !query output
+0 x 42 x
+1 y 42 y
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val, count(*) FROM
(VALUES (42)) AS t(pos) GROUP BY pos
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+ "sqlState" : "42703",
+ "messageParameters" : {
+ "objectName" : "`val`",
+ "proposal" : "`pos`"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 56,
+ "stopIndex" : 58,
+ "fragment" : "val"
+ } ]
+}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index b9491a79cc3a..d9a3130f4aed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -578,6 +578,193 @@ class GeneratorFunctionSuite extends QueryTest with
SharedSparkSession {
|""".stripMargin)
checkAnswer(df, Seq(Row(0, "a"), Row(0, "b")))
}
+
+ test("generator with alias in multiple projects") {
+ val df = sql("SELECT explode(array(5, 6, 7, 8, 9)) AS a")
+ val alias = ($"a" + 1).as("a")
+ checkAnswer(
+ df.select(alias).select(alias).select(alias),
+ Seq(Row(8), Row(9), Row(10), Row(11), Row(12))
+ )
+ }
+
+ test("generator in self-join with aliased columns") {
+ val df1 = sql("SELECT explode(array(1, 2, 3)) AS col")
+ val df2 = df1.select($"col".as("col2"))
+ checkAnswer(
+ df1.join(df2, df1("col") === df2("col2")),
+ Seq(Row(1, 1), Row(2, 2), Row(3, 3))
+ )
+ }
+
+ test("generator in self-union") {
+ val df1 = sql("SELECT explode(array(1, 2, 3)) AS col")
+ checkAnswer(
+ df1.union(df1),
+ Seq(Row(1), Row(2), Row(3), Row(1), Row(2), Row(3))
+ )
+ }
+
+ test("explode with nested aliases using DataFrame API") {
+ checkAnswer(
+ spark.range(1).select(explode(array(lit(1), lit(2),
lit(3))).as("first").as("second")),
+ Seq(Row(1), Row(2), Row(3))
+ )
+ }
+
+ test("posexplode with multi-alias using DataFrame API") {
+ checkAnswer(
+ spark.range(1).select(posexplode(array(lit(10), lit(20))).as(Seq("idx",
"val"))),
+ Seq(Row(0, 10), Row(1, 20))
+ )
+ }
+
+ test("posexplode with chained aliases using DataFrame API should fail") {
+ val exception = intercept[AnalysisException] {
+ spark
+ .range(1)
+ .select(
+ posexplode(array(lit(1), lit(2), lit(3)))
+ .as("lolkek")
+ .as(Seq("pos", "val"))
+ .as(Seq("pos", "val", "kek"))
+ .as(Seq("pos2", "val2"))
+ .as("lolkek")
+ )
+ .collect()
+ }
+ assert(exception.getCondition == "UDTF_ALIAS_NUMBER_MISMATCH")
+ }
+
+ test("posexplode with chained aliases ending with valid multi-alias using
DataFrame API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ posexplode(array(lit(1), lit(2), lit(3)))
+ .as("lolkek")
+ .as(Seq("pos", "val"))
+ .as(Seq("pos", "val", "kek"))
+ .as(Seq("pos2", "val2"))
+ ),
+ Seq(Row(0, 1), Row(1, 2), Row(2, 3))
+ )
+ }
+
+ test("explode with chained aliases and LCA reference using DataFrame API
should fail") {
+ val exception = intercept[AnalysisException] {
+ spark
+ .range(1)
+ .select(
+ explode(array(lit(1), lit(2), lit(3)))
+ .as("first")
+ .as("second"),
+ $"first"
+ )
+ .collect()
+ }
+ assert(exception.getCondition == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+ }
+
+ test("explode with chained aliases and final alias reference using DataFrame
API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ explode(array(lit(1), lit(2), lit(3)))
+ .as("first")
+ .as("second"),
+ $"second"
+ ),
+ Seq(Row(1, 1), Row(2, 2), Row(3, 3))
+ )
+ }
+
+ test("explode_outer with chained aliases using DataFrame API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ explode_outer(array(lit(1), lit(2), lit(3)))
+ .as("first")
+ .as("second")
+ ),
+ Seq(Row(1), Row(2), Row(3))
+ )
+ }
+
+ test("explode_outer with chained aliases and final alias reference using
DataFrame API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ explode_outer(array(lit(1), lit(2), lit(3)))
+ .as("first")
+ .as("second"),
+ $"second"
+ ),
+ Seq(Row(1, 1), Row(2, 2), Row(3, 3))
+ )
+ }
+
+ test("posexplode_outer with chained aliases using DataFrame API should
fail") {
+ val exception = intercept[AnalysisException] {
+ spark
+ .range(1)
+ .select(
+ posexplode_outer(array(lit(1), lit(2), lit(3)))
+ .as("lolkek")
+ .as(Seq("pos", "val"))
+ .as(Seq("pos", "val", "kek"))
+ .as(Seq("pos2", "val2"))
+ .as("lolkek")
+ )
+ .collect()
+ }
+ assert(exception.getCondition == "UDTF_ALIAS_NUMBER_MISMATCH")
+ }
+
+ test("posexplode_outer with chained aliases ending with valid multi-alias
using DataFrame API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ posexplode_outer(array(lit(1), lit(2), lit(3)))
+ .as("lolkek")
+ .as(Seq("pos", "val"))
+ .as(Seq("pos", "val", "kek"))
+ .as(Seq("pos2", "val2"))
+ ),
+ Seq(Row(0, 1), Row(1, 2), Row(2, 3))
+ )
+ }
+
+ test("posexplode_outer with multi-alias using DataFrame API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ posexplode_outer(array(lit(10), lit(20)))
+ .as(Seq("idx", "val"))
+ ),
+ Seq(Row(0, 10), Row(1, 20))
+ )
+ }
+
+ test("posexplode_outer with chained multi-alias and final reference using
DataFrame API") {
+ checkAnswer(
+ spark
+ .range(1)
+ .select(
+ posexplode_outer(array(lit(10), lit(20)))
+ .as(Seq("pos1", "val1"))
+ .as(Seq("pos2", "val2")),
+ $"pos2",
+ $"val2"
+ ),
+ Seq(Row(0, 10, 0, 10), Row(1, 20, 1, 20))
+ )
+ }
}
case class EmptyGenerator() extends Generator with LeafLike[Expression] {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]