(spark) branch master updated: [SPARK-54687][SQL] Add more edge cases with generators

wenchen Mon, 02 Mar 2026 10:58:08 -0800

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 6d02e241f55c [SPARK-54687][SQL] Add more edge cases with generators
6d02e241f55c is described below

commit 6d02e241f55cff79bba85d5f011bdd7aaea994d8
Author: Mikhail Nikoliukin <[email protected]>
AuthorDate: Tue Mar 3 02:57:32 2026 +0800

    [SPARK-54687][SQL] Add more edge cases with generators
    
    ### What changes were proposed in this pull request?
    
    Follow up on my previous pr https://github.com/apache/spark/pull/53447.
    Found more obscure and strange cases so want to add them to golden files
    
    ### Why are the changes needed?
    
    Better test coverage
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    run golden tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: claude 2.1.56
    
    Closes #54487 from mikhailnik-db/add-even-more-generators-tests.
    
    Authored-by: Mikhail Nikoliukin <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../generators-resolution-edge-cases.sql.out       | 303 +++++++++++++++++++++
 .../inputs/generators-resolution-edge-cases.sql    |  82 ++++++
 .../generators-resolution-edge-cases.sql.out       | 301 ++++++++++++++++++++
 .../apache/spark/sql/GeneratorFunctionSuite.scala  | 187 +++++++++++++
 4 files changed, 873 insertions(+)

diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
index 5a87586b150a..0b21d6e6b85d 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/generators-resolution-edge-cases.sql.out
@@ -478,3 +478,306 @@ Project [col#x, arr#x]
 +- Generate explode(arr#x), false, [col#x]
    +- Generate explode(array(array(0), array(1), array(2))), false, [arr#x]
       +- OneRowRelation
+
+
+-- !query
+SELECT col + 1 as col2, explode(array(1, 2, 3)) as col
+-- !query analysis
+Project [(col#x + 1) AS col2#x, col#x]
++- Generate explode(array(1, 2, 3)), false, [col#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT 1 AS (pos, val)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MULTI_ALIAS_WITHOUT_GENERATOR",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expr" : "\"1\"",
+    "names" : "pos, val"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "1 AS (pos, val)"
+  } ]
+}
+
+
+-- !query
+SELECT * FROM VALUES (1) ORDER BY explode(array(1, 2, 3))
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "plan" : "Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true"
+  }
+}
+
+
+-- !query
+SELECT * FROM VALUES (array(1, 2, 3)) t(arr) WHERE explode(arr) == 2
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "plan" : "'Filter (explode(arr#x) = 2)"
+  }
+}
+
+
+-- !query
+SELECT explode(packages) AS package
+FROM (VALUES(array('a', 'b'))) AS t(packages)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query analysis
+Filter package#x IN (a)
++- Project [package#x]
+   +- Generate explode(_gen_input_0#x), false, [package#x]
+      +- Aggregate [packages#x], [packages#x AS _gen_input_0#x]
+         +- SubqueryAlias t
+            +- Project [col1#x AS packages#x]
+               +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(collect_list(a)) AS package
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query analysis
+Filter package#x IN (a)
++- Project [package#x]
+   +- Generate explode(_gen_input_0#x), false, [package#x]
+      +- Aggregate [collect_list(a#x, 0, 0, true) AS _gen_input_0#x]
+         +- SubqueryAlias t
+            +- Project [col1#x AS a#x]
+               +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT stack(2, id * 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 92,
+    "fragment" : "SELECT stack(2, id * 10L, count(val))\nFROM (VALUES (1,'a'), 
(1,'b'), (2, 'c')) AS t(id, val)"
+  } ]
+}
+
+
+-- !query
+SELECT
+  explode(array(1)) AS x1,
+  explode(array(t1.c1, t2.c1)) AS x2,
+  explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query analysis
+Project [x1#x, x2#x, x3#x]
++- Project [c1#x, x1#x, x2#x, x3#x]
+   +- Generate explode(array(c1#x)), false, [x3#x]
+      +- Project [c1#x, x1#x, x2#x, c1#x]
+         +- Generate explode(array(c1#x, c1#x)), false, [x2#x]
+            +- Generate explode(array(1)), false, [x1#x]
+               +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+                  +- Join FullOuter, (c1#x = c1#x)
+                     :- SubqueryAlias t1
+                     :  +- Project [col1#x AS c1#x]
+                     :     +- LocalRelation [col1#x]
+                     +- SubqueryAlias t2
+                        +- Project [col1#x AS c1#x]
+                           +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT
+  explode(array(t1.c1)) AS x1,
+  explode(array(t1.c1, t2.c1)) AS x2,
+  explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query analysis
+Project [x1#x, x2#x, x3#x]
++- Project [c1#x, x1#x, x2#x, x3#x]
+   +- Generate explode(array(c1#x)), false, [x3#x]
+      +- Project [c1#x, x1#x, x2#x, c1#x]
+         +- Generate explode(array(c1#x, c1#x)), false, [x2#x]
+            +- Project [c1#x, x1#x, c1#x, c1#x]
+               +- Generate explode(array(c1#x)), false, [x1#x]
+                  +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+                     +- Join FullOuter, (c1#x = c1#x)
+                        :- SubqueryAlias t1
+                        :  +- Project [col1#x AS c1#x]
+                        :     +- LocalRelation [col1#x]
+                        +- SubqueryAlias t2
+                           +- Project [col1#x AS c1#x]
+                              +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(x1, t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+USING (c1)
+-- !query analysis
+Project [x1#x, x2#x]
++- Project [c1#x, x1#x, x2#x]
+   +- Generate explode(array(x1#x, c1#x)), false, [x2#x]
+      +- Project [c1#x, x1#x, c1#x]
+         +- Generate explode(array(c1#x, c1#x)), false, [x1#x]
+            +- Project [coalesce(c1#x, c1#x) AS c1#x, c1#x, c1#x]
+               +- Join FullOuter, (c1#x = c1#x)
+                  :- SubqueryAlias t1
+                  :  +- Project [col1#x AS c1#x]
+                  :     +- LocalRelation [col1#x]
+                  +- SubqueryAlias t2
+                     +- Project [col1#x AS c1#x]
+                        +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10, count(*)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10, count(*)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10
+-- !query analysis
+Project [pos#x, pos#x, val#x, (pos#x * 10) AS (lateralAliasReference(pos) * 
10)#x]
++- Project [pos#x, val#x, 1 AS pos#x]
+   +- Generate posexplode(array(x, y)), false, [pos#x, val#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10, count(*)
+-- !query analysis
+Project [pos#x, pos#x, val#x, (lateralAliasReference(pos) * 10)#x, count(1)#xL]
++- Generate posexplode(array(x, y)), false, [pos#x, val#x]
+   +- Project [pos#x, (pos#x * 10) AS (lateralAliasReference(pos) * 10)#x, 
count(1)#xL AS count(1)#xL]
+      +- Project [count(1)#xL, 1 AS pos#x]
+         +- Aggregate [count(1) AS count(1)#xL]
+            +- OneRowRelation
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col FROM (VALUES (42)) AS t(col)
+-- !query analysis
+Project [col#x, col#x]
++- Generate explode(array(10, 20)), false, [col#x]
+   +- SubqueryAlias t
+      +- Project [col1#x AS col#x]
+         +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col, count(*) FROM (VALUES (42)) AS 
t(col) GROUP BY col
+-- !query analysis
+Project [col#x, col#x, count(1)#xL]
++- Generate explode(array(10, 20)), false, [col#x]
+   +- Aggregate [col#x], [col#x, count(1) AS count(1)#xL]
+      +- SubqueryAlias t
+         +- Project [col1#x AS col#x]
+            +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val FROM (VALUES (42)) 
AS t(pos)
+-- !query analysis
+Project [pos#x, val#x, pos#x, val#x]
++- Generate posexplode(array(x, y)), false, [pos#x, val#x]
+   +- SubqueryAlias t
+      +- Project [col1#x AS pos#x]
+         +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val, count(*) FROM 
(VALUES (42)) AS t(pos) GROUP BY pos
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`val`",
+    "proposal" : "`pos`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 56,
+    "stopIndex" : 58,
+    "fragment" : "val"
+  } ]
+}
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
index 50ba6480efd9..67b5ae77c336 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/generators-resolution-edge-cases.sql
@@ -133,3 +133,85 @@ SELECT explode(array(array(0), array(1), array(2))) as 
arr, explode(arr) as col;
 
 -- generator LCA right-to-left should work
 SELECT explode(arr) as col, explode(array(array(0), array(1), array(2))) as 
arr;
+
+-- generator output LCA right-to-left should fail (reference before definition)
+SELECT col + 1 as col2, explode(array(1, 2, 3)) as col;
+
+-- multi-alias on non-generator expression should fail
+SELECT 1 AS (pos, val);
+
+-- generator in ORDER BY should fail
+SELECT * FROM VALUES (1) ORDER BY explode(array(1, 2, 3));
+
+-- generator in WHERE should fail
+SELECT * FROM VALUES (array(1, 2, 3)) t(arr) WHERE explode(arr) == 2;
+
+-- generator with GROUP BY ALL and HAVING should work
+SELECT explode(packages) AS package
+FROM (VALUES(array('a', 'b'))) AS t(packages)
+GROUP BY ALL
+HAVING package IN ('a');
+
+-- generator with aggregate function and GROUP BY ALL and HAVING should work
+SELECT explode(collect_list(a)) AS package
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+HAVING package IN ('a');
+
+-- stack with mixed aggregate and non-aggregate children should fail without 
GROUP BY
+SELECT stack(2, id * 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val);
+
+-- three generators with full outer join, one with constant array
+SELECT
+  explode(array(1)) AS x1,
+  explode(array(t1.c1, t2.c1)) AS x2,
+  explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1);
+
+-- three generators with full outer join, all using hidden attributes
+SELECT
+  explode(array(t1.c1)) AS x1,
+  explode(array(t1.c1, t2.c1)) AS x2,
+  explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1);
+
+-- explode with LCA reference to another generator alias
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(x1, t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+USING (c1);
+
+-- LCA and generator output share the same alias name: LCA defined first
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10;
+
+-- LCA and generator output share the same alias name: LCA defined first with 
aggregate
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10, count(*);
+
+-- LCA and generator output share the same alias name: generator defined first
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10;
+
+-- LCA and generator output share the same alias name: generator defined first 
with aggregate
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10, count(*);
+
+-- LCA and generator output share the same alias name with multi-alias
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10;
+
+-- LCA and generator output share the same alias name with multi-alias and 
aggregate
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10, count(*);
+
+-- generator's output alias does not shadow table column
+SELECT explode(array(10, 20)) as col, col FROM (VALUES (42)) AS t(col);
+
+-- generator's output alias does not shadow table column with aggregate
+SELECT explode(array(10, 20)) as col, col, count(*) FROM (VALUES (42)) AS 
t(col) GROUP BY col;
+
+-- generator's multi-alias does not shadow table column
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val FROM (VALUES (42)) 
AS t(pos);
+
+-- generator's multi-alias does not shadow table column with aggregate
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val, count(*) FROM 
(VALUES (42)) AS t(pos) GROUP BY pos;
diff --git 
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
index dfe39028ea01..7dc50cfb4aac 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/generators-resolution-edge-cases.sql.out
@@ -480,3 +480,304 @@ struct<col:int,arr:array<int>>
 0      [0]
 1      [1]
 2      [2]
+
+
+-- !query
+SELECT col + 1 as col2, explode(array(1, 2, 3)) as col
+-- !query schema
+struct<col2:int,col:int>
+-- !query output
+2      1
+3      2
+4      3
+
+
+-- !query
+SELECT 1 AS (pos, val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MULTI_ALIAS_WITHOUT_GENERATOR",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expr" : "\"1\"",
+    "names" : "pos, val"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "1 AS (pos, val)"
+  } ]
+}
+
+
+-- !query
+SELECT * FROM VALUES (1) ORDER BY explode(array(1, 2, 3))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "plan" : "Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true"
+  }
+}
+
+
+-- !query
+SELECT * FROM VALUES (array(1, 2, 3)) t(arr) WHERE explode(arr) == 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "plan" : "'Filter (explode(arr#x) = 2)"
+  }
+}
+
+
+-- !query
+SELECT explode(packages) AS package
+FROM (VALUES(array('a', 'b'))) AS t(packages)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query schema
+struct<package:string>
+-- !query output
+a
+
+
+-- !query
+SELECT explode(collect_list(a)) AS package
+FROM (VALUES ('a'), ('b')) AS t(a)
+GROUP BY ALL
+HAVING package IN ('a')
+-- !query schema
+struct<package:string>
+-- !query output
+a
+
+
+-- !query
+SELECT stack(2, id * 10L, count(val))
+FROM (VALUES (1,'a'), (1,'b'), (2, 'c')) AS t(id, val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 92,
+    "fragment" : "SELECT stack(2, id * 10L, count(val))\nFROM (VALUES (1,'a'), 
(1,'b'), (2, 'c')) AS t(id, val)"
+  } ]
+}
+
+
+-- !query
+SELECT
+  explode(array(1)) AS x1,
+  explode(array(t1.c1, t2.c1)) AS x2,
+  explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query schema
+struct<x1:int,x2:int,x3:int>
+-- !query output
+1      1       NULL
+1      2       2
+1      2       2
+1      3       3
+1      NULL    3
+1      NULL    NULL
+
+
+-- !query
+SELECT
+  explode(array(t1.c1)) AS x1,
+  explode(array(t1.c1, t2.c1)) AS x2,
+  explode(array(t2.c1)) AS x3
+FROM (VALUES (1), (2)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3)) AS t2(c1)
+USING (c1)
+-- !query schema
+struct<x1:int,x2:int,x3:int>
+-- !query output
+1      1       NULL
+1      NULL    NULL
+2      2       2
+2      2       2
+NULL   3       3
+NULL   NULL    3
+
+
+-- !query
+SELECT explode(array(t1.c1, t2.c1)) AS x1, explode(array(x1, t1.c1)) AS x2
+FROM (VALUES (1), (2), (3)) AS t1(c1)
+FULL OUTER JOIN (VALUES (2), (3), (4)) AS t2(c1)
+USING (c1)
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+1      1
+1      1
+2      2
+2      2
+2      2
+2      2
+3      3
+3      3
+3      3
+3      3
+4      4
+4      NULL
+NULL   1
+NULL   NULL
+NULL   NULL
+NULL   NULL
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT 1 as a, explode(array(1, 2, 3)) as a, a * 10, count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT explode(array(1, 2, 3)) as a, 1 as a, a * 10, count(*)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "n" : "2",
+    "name" : "`a`"
+  }
+}
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10
+-- !query schema
+struct<pos:int,pos:int,val:string,(lateralAliasReference(pos) * 10):int>
+-- !query output
+1      0       x       10
+1      1       y       10
+
+
+-- !query
+SELECT 1 as pos, posexplode(array('x', 'y')) as (pos, val), pos * 10, count(*)
+-- !query schema
+struct<pos:int,pos:int,val:string,(lateralAliasReference(pos) * 
10):int,count(1):bigint>
+-- !query output
+1      0       x       10      1
+1      1       y       10      1
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col FROM (VALUES (42)) AS t(col)
+-- !query schema
+struct<col:int,col:int>
+-- !query output
+10     42
+20     42
+
+
+-- !query
+SELECT explode(array(10, 20)) as col, col, count(*) FROM (VALUES (42)) AS 
t(col) GROUP BY col
+-- !query schema
+struct<col:int,col:int,count(1):bigint>
+-- !query output
+10     42      1
+20     42      1
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val FROM (VALUES (42)) 
AS t(pos)
+-- !query schema
+struct<pos:int,val:string,pos:int,val:string>
+-- !query output
+0      x       42      x
+1      y       42      y
+
+
+-- !query
+SELECT posexplode(array('x', 'y')) as (pos, val), pos, val, count(*) FROM 
(VALUES (42)) AS t(pos) GROUP BY pos
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`val`",
+    "proposal" : "`pos`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 56,
+    "stopIndex" : 58,
+    "fragment" : "val"
+  } ]
+}
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index b9491a79cc3a..d9a3130f4aed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -578,6 +578,193 @@ class GeneratorFunctionSuite extends QueryTest with 
SharedSparkSession {
          |""".stripMargin)
     checkAnswer(df, Seq(Row(0, "a"), Row(0, "b")))
   }
+
+  test("generator with alias in multiple projects") {
+    val df = sql("SELECT explode(array(5, 6, 7, 8, 9)) AS a")
+    val alias = ($"a" + 1).as("a")
+    checkAnswer(
+      df.select(alias).select(alias).select(alias),
+      Seq(Row(8), Row(9), Row(10), Row(11), Row(12))
+    )
+  }
+
+  test("generator in self-join with aliased columns") {
+    val df1 = sql("SELECT explode(array(1, 2, 3)) AS col")
+    val df2 = df1.select($"col".as("col2"))
+    checkAnswer(
+      df1.join(df2, df1("col") === df2("col2")),
+      Seq(Row(1, 1), Row(2, 2), Row(3, 3))
+    )
+  }
+
+  test("generator in self-union") {
+    val df1 = sql("SELECT explode(array(1, 2, 3)) AS col")
+    checkAnswer(
+      df1.union(df1),
+      Seq(Row(1), Row(2), Row(3), Row(1), Row(2), Row(3))
+    )
+  }
+
+  test("explode with nested aliases using DataFrame API") {
+    checkAnswer(
+      spark.range(1).select(explode(array(lit(1), lit(2), 
lit(3))).as("first").as("second")),
+      Seq(Row(1), Row(2), Row(3))
+    )
+  }
+
+  test("posexplode with multi-alias using DataFrame API") {
+    checkAnswer(
+      spark.range(1).select(posexplode(array(lit(10), lit(20))).as(Seq("idx", 
"val"))),
+      Seq(Row(0, 10), Row(1, 20))
+    )
+  }
+
+  test("posexplode with chained aliases using DataFrame API should fail") {
+    val exception = intercept[AnalysisException] {
+      spark
+        .range(1)
+        .select(
+          posexplode(array(lit(1), lit(2), lit(3)))
+            .as("lolkek")
+            .as(Seq("pos", "val"))
+            .as(Seq("pos", "val", "kek"))
+            .as(Seq("pos2", "val2"))
+            .as("lolkek")
+        )
+        .collect()
+    }
+    assert(exception.getCondition == "UDTF_ALIAS_NUMBER_MISMATCH")
+  }
+
+  test("posexplode with chained aliases ending with valid multi-alias using 
DataFrame API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          posexplode(array(lit(1), lit(2), lit(3)))
+            .as("lolkek")
+            .as(Seq("pos", "val"))
+            .as(Seq("pos", "val", "kek"))
+            .as(Seq("pos2", "val2"))
+        ),
+      Seq(Row(0, 1), Row(1, 2), Row(2, 3))
+    )
+  }
+
+  test("explode with chained aliases and LCA reference using DataFrame API 
should fail") {
+    val exception = intercept[AnalysisException] {
+      spark
+        .range(1)
+        .select(
+          explode(array(lit(1), lit(2), lit(3)))
+            .as("first")
+            .as("second"),
+          $"first"
+        )
+        .collect()
+    }
+    assert(exception.getCondition == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+  }
+
+  test("explode with chained aliases and final alias reference using DataFrame 
API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          explode(array(lit(1), lit(2), lit(3)))
+            .as("first")
+            .as("second"),
+          $"second"
+        ),
+      Seq(Row(1, 1), Row(2, 2), Row(3, 3))
+    )
+  }
+
+  test("explode_outer with chained aliases using DataFrame API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          explode_outer(array(lit(1), lit(2), lit(3)))
+            .as("first")
+            .as("second")
+        ),
+      Seq(Row(1), Row(2), Row(3))
+    )
+  }
+
+  test("explode_outer with chained aliases and final alias reference using 
DataFrame API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          explode_outer(array(lit(1), lit(2), lit(3)))
+            .as("first")
+            .as("second"),
+          $"second"
+        ),
+      Seq(Row(1, 1), Row(2, 2), Row(3, 3))
+    )
+  }
+
+  test("posexplode_outer with chained aliases using DataFrame API should 
fail") {
+    val exception = intercept[AnalysisException] {
+      spark
+        .range(1)
+        .select(
+          posexplode_outer(array(lit(1), lit(2), lit(3)))
+            .as("lolkek")
+            .as(Seq("pos", "val"))
+            .as(Seq("pos", "val", "kek"))
+            .as(Seq("pos2", "val2"))
+            .as("lolkek")
+        )
+        .collect()
+    }
+    assert(exception.getCondition == "UDTF_ALIAS_NUMBER_MISMATCH")
+  }
+
+  test("posexplode_outer with chained aliases ending with valid multi-alias 
using DataFrame API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          posexplode_outer(array(lit(1), lit(2), lit(3)))
+            .as("lolkek")
+            .as(Seq("pos", "val"))
+            .as(Seq("pos", "val", "kek"))
+            .as(Seq("pos2", "val2"))
+        ),
+      Seq(Row(0, 1), Row(1, 2), Row(2, 3))
+    )
+  }
+
+  test("posexplode_outer with multi-alias using DataFrame API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          posexplode_outer(array(lit(10), lit(20)))
+            .as(Seq("idx", "val"))
+        ),
+      Seq(Row(0, 10), Row(1, 20))
+    )
+  }
+
+  test("posexplode_outer with chained multi-alias and final reference using 
DataFrame API") {
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          posexplode_outer(array(lit(10), lit(20)))
+            .as(Seq("pos1", "val1"))
+            .as(Seq("pos2", "val2")),
+          $"pos2",
+          $"val2"
+        ),
+      Seq(Row(0, 10, 0, 10), Row(1, 20, 1, 20))
+    )
+  }
 }
 
 case class EmptyGenerator() extends Generator with LeafLike[Expression] {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-54687][SQL] Add more edge cases with generators

Reply via email to