This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5b19aa3abc8b [SPARK-51778][SQL][TESTS] Close SQL test gaps discovered during single-pass Analyzer implementation 5b19aa3abc8b is described below commit 5b19aa3abc8b39d8a12faaebd4074bc24f3f2701 Author: Vladimir Golubev <vladimir.golu...@databricks.com> AuthorDate: Mon Apr 14 21:31:55 2025 +0800 [SPARK-51778][SQL][TESTS] Close SQL test gaps discovered during single-pass Analyzer implementation ### What changes were proposed in this pull request? Close SQL test gaps discovered during single-pass Analyzer implementation. ### Why are the changes needed? To make Spark test coverage better. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50568 from vladimirg-db/vladimir-golubev_data/add-more-tests-discovered-during-single-pass-analyzer-implementation. Authored-by: Vladimir Golubev <vladimir.golu...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql-tests/analyzer-results/cte.sql.out | 229 +++++++++++++++++++++ .../analyzer-results/group-by-alias.sql.out | 73 +++++++ .../analyzer-results/group-by-all.sql.out | 109 ++++++++++ .../analyzer-results/group-by-ordinal.sql.out | 61 ++++++ .../analyzer-results/misc-functions.sql.out | 7 + .../sql-tests/analyzer-results/order-by.sql.out | 100 +++++++++ .../src/test/resources/sql-tests/inputs/cte.sql | 92 +++++++++ .../resources/sql-tests/inputs/group-by-alias.sql | 24 +++ .../resources/sql-tests/inputs/group-by-all.sql | 32 ++- .../sql-tests/inputs/group-by-ordinal.sql | 21 ++ .../resources/sql-tests/inputs/misc-functions.sql | 5 +- .../test/resources/sql-tests/inputs/order-by.sql | 35 ++++ .../test/resources/sql-tests/results/cte.sql.out | 142 +++++++++++++ .../sql-tests/results/group-by-alias.sql.out | 40 ++++ .../sql-tests/results/group-by-all.sql.out | 85 ++++++++ .../sql-tests/results/group-by-ordinal.sql.out | 29 +++ .../sql-tests/results/misc-functions.sql.out | 8 + .../resources/sql-tests/results/order-by.sql.out | 82 ++++++++ 18 files changed, 1172 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out index a62b7bbe66f3..8d8d978cb0cb 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out @@ -802,6 +802,235 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +SELECT * FROM ( + ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) UNION ALL ( + WITH cte AS (SELECT 2) + SELECT * FROM cte + ) +) +-- !query analysis +Project [1#x] ++- SubqueryAlias __auto_generated_subquery_name + +- WithCTE + :- CTERelationDef xxxx, false + : +- SubqueryAlias cte + : +- Project [1 AS 1#x] + : +- OneRowRelation + :- CTERelationDef xxxx, false + : +- SubqueryAlias cte + : +- Project [2 AS 2#x] + : +- OneRowRelation + +- Union false, false + :- Project [1#x] + : +- SubqueryAlias cte + : +- CTERelationRef xxxx, true, [1#x], false, false, 1 + +- Project [2#x] + +- SubqueryAlias cte + +- CTERelationRef xxxx, true, [2#x], false, false, 1 + + +-- !query +SELECT * FROM ( + ( + WITH cte AS (SELECT 2) + SELECT * FROM cte + ) UNION ALL ( + SELECT 1 WHERE EXISTS ( + SELECT * FROM cte + ) + ) +) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`cte`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 134, + "stopIndex" : 136, + "fragment" : "cte" + } ] +} + + +-- !query +SELECT + * +FROM + VALUES (1) +WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) +UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) +) +-- !query analysis +Union false, false +:- Project [col1#x] +: +- Filter exists#x [] +: : +- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias cte +: : : +- Project [1 AS 1#x] +: : : +- OneRowRelation +: : +- Project [1#x] +: : +- SubqueryAlias cte +: : +- CTERelationRef xxxx, true, [1#x], false, false, 1 +: +- LocalRelation [col1#x] ++- Project [col1#x] + +- Filter exists#x [] + : +- WithCTE + : :- CTERelationDef xxxx, false + : : +- SubqueryAlias cte + : : +- Project [1 AS 1#x] + : : +- OneRowRelation + : +- Project [1#x] + : +- SubqueryAlias cte + : +- CTERelationRef xxxx, true, [1#x], false, false, 1 + +- LocalRelation [col1#x] + + +-- !query +WITH cte AS ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + ) +) +SELECT * FROM cte +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias cte +: +- Union false, false +: :- Project [col1#x] +: : +- Filter exists#x [] +: : : +- WithCTE +: : : :- CTERelationDef xxxx, false +: : : : +- SubqueryAlias cte +: : : : +- Project [1 AS 1#x] +: : : : +- OneRowRelation +: : : +- Project [1#x] +: : : +- SubqueryAlias cte +: : : +- CTERelationRef xxxx, true, [1#x], false, false, 1 +: : +- LocalRelation [col1#x] +: +- Project [col1#x] +: +- Filter exists#x [] +: : +- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias cte +: : : +- Project [1 AS 1#x] +: : : +- OneRowRelation +: : +- Project [1#x] +: : +- SubqueryAlias cte +: : +- CTERelationRef xxxx, true, [1#x], false, false, 1 +: +- LocalRelation [col1#x] ++- Project [col1#x] + +- SubqueryAlias cte + +- CTERelationRef xxxx, true, [col1#x], false, false, 2 + + +-- !query +SELECT * FROM ( + WITH cte AS (SELECT 1) + SELECT + * + FROM VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + ) +) +-- !query analysis +Project [col1#x] ++- SubqueryAlias __auto_generated_subquery_name + +- WithCTE + :- CTERelationDef xxxx, false + : +- SubqueryAlias cte + : +- Project [1 AS 1#x] + : +- OneRowRelation + +- Union false, false + :- Project [col1#x] + : +- Filter exists#x [] + : : +- WithCTE + : : :- CTERelationDef xxxx, false + : : : +- SubqueryAlias cte + : : : +- Project [1 AS 1#x] + : : : +- OneRowRelation + : : +- Project [1#x] + : : +- SubqueryAlias cte + : : +- CTERelationRef xxxx, true, [1#x], false, false, 1 + : +- LocalRelation [col1#x] + +- Project [col1#x] + +- Filter exists#x [] + : +- WithCTE + : :- CTERelationDef xxxx, false + : : +- SubqueryAlias cte + : : +- Project [1 AS 1#x] + : : +- OneRowRelation + : +- Project [1#x] + : +- SubqueryAlias cte + : +- CTERelationRef xxxx, true, [1#x], false, false, 1 + +- LocalRelation [col1#x] + + +-- !query +WITH `a.b.c` AS ( + SELECT 1 +) +SELECT * FROM `a.b.c` +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias `a.b.c` +: +- Project [1 AS 1#x] +: +- OneRowRelation ++- Project [1#x] + +- SubqueryAlias `a.b.c` + +- CTERelationRef xxxx, true, [1#x], false, false, 1 + + -- !query DROP VIEW IF EXISTS t -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out index 8a57acd50d48..e90a68b22791 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-alias.sql.out @@ -338,6 +338,79 @@ Aggregate [(col1#x % 3)], [max(col1#x) AS max(col1)#x, 3 AS abc#x] +- LocalRelation [col1#x] +-- !query +SELECT 1 AS a FROM testData GROUP BY `a` +-- !query analysis +Aggregate [a#x], [1 AS a#x] ++- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + subq1 +-- !query analysis +Aggregate [scalar-subquery#x []], [scalar-subquery#x [] AS subq1#x] +: :- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias cte +: : : +- Project [1 AS 1#x] +: : : +- OneRowRelation +: : +- Project [1#x] +: : +- SubqueryAlias cte +: : +- CTERelationRef xxxx, true, [1#x], false, false, 1 +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias cte +: : +- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [1#x] +: +- SubqueryAlias cte +: +- CTERelationRef xxxx, true, [1#x], false, false, 1 ++- LocalRelation [col1#x] + + +-- !query +SELECT ( + SELECT a FROM testData LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + subq1 +-- !query analysis +Aggregate [scalar-subquery#x []], [scalar-subquery#x [] AS subq1#x] +: :- GlobalLimit 1 +: : +- LocalLimit 1 +: : +- Project [a#x] +: : +- SubqueryAlias testdata +: : +- View (`testData`, [a#x, b#x]) +: : +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] +: : +- Project [a#x, b#x] +: : +- SubqueryAlias testData +: : +- LocalRelation [a#x, b#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [a#x] +: +- SubqueryAlias testdata +: +- View (`testData`, [a#x, b#x]) +: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] +: +- Project [a#x, b#x] +: +- SubqueryAlias testData +: +- LocalRelation [a#x, b#x] ++- LocalRelation [col1#x] + + -- !query set spark.sql.groupByAliases=false -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out index c2c77db6c3b1..b1a4b85d3ae5 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out @@ -348,3 +348,112 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "fragment" : "non_exist" } ] } + + +-- !query +SELECT * FROM data GROUP BY ALL +-- !query analysis +Aggregate [country#x, city#x, name#x, id#x, power#x], [country#x, city#x, name#x, id#x, power#x] ++- SubqueryAlias data + +- View (`data`, [country#x, city#x, name#x, id#x, power#x]) + +- Project [cast(country#x as string) AS country#x, cast(city#x as string) AS city#x, cast(name#x as string) AS name#x, cast(id#x as int) AS id#x, cast(power#x as decimal(3,1)) AS power#x] + +- Project [country#x, city#x, name#x, id#x, power#x] + +- SubqueryAlias data + +- LocalRelation [country#x, city#x, name#x, id#x, power#x] + + +-- !query +SELECT 4 AS a, 5 AS b, 6 AS c, * FROM data GROUP BY ALL +-- !query analysis +Aggregate [1, 2, 3, country#x, city#x, name#x, id#x, power#x], [4 AS a#x, 5 AS b#x, 6 AS c#x, country#x, city#x, name#x, id#x, power#x] ++- SubqueryAlias data + +- View (`data`, [country#x, city#x, name#x, id#x, power#x]) + +- Project [cast(country#x as string) AS country#x, cast(city#x as string) AS city#x, cast(name#x as string) AS name#x, cast(id#x as int) AS id#x, cast(power#x as decimal(3,1)) AS power#x] + +- Project [country#x, city#x, name#x, id#x, power#x] + +- SubqueryAlias data + +- LocalRelation [country#x, city#x, name#x, id#x, power#x] + + +-- !query +SELECT *, 4 AS a, 5 AS b, 6 AS c FROM data GROUP BY ALL +-- !query analysis +Aggregate [country#x, city#x, name#x, id#x, power#x, 6, 7, 8], [country#x, city#x, name#x, id#x, power#x, 4 AS a#x, 5 AS b#x, 6 AS c#x] ++- SubqueryAlias data + +- View (`data`, [country#x, city#x, name#x, id#x, power#x]) + +- Project [cast(country#x as string) AS country#x, cast(city#x as string) AS city#x, cast(name#x as string) AS name#x, cast(id#x as int) AS id#x, cast(power#x as decimal(3,1)) AS power#x] + +- Project [country#x, city#x, name#x, id#x, power#x] + +- SubqueryAlias data + +- LocalRelation [country#x, city#x, name#x, id#x, power#x] + + +-- !query +SELECT 4 AS a, 5 AS b, *, 6 AS c FROM data GROUP BY ALL +-- !query analysis +Aggregate [1, 2, country#x, city#x, name#x, id#x, power#x, 8], [4 AS a#x, 5 AS b#x, country#x, city#x, name#x, id#x, power#x, 6 AS c#x] ++- SubqueryAlias data + +- View (`data`, [country#x, city#x, name#x, id#x, power#x]) + +- Project [cast(country#x as string) AS country#x, cast(city#x as string) AS city#x, cast(name#x as string) AS name#x, cast(id#x as int) AS id#x, cast(power#x as decimal(3,1)) AS power#x] + +- Project [country#x, city#x, name#x, id#x, power#x] + +- SubqueryAlias data + +- LocalRelation [country#x, city#x, name#x, id#x, power#x] + + +-- !query +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + ALL +-- !query analysis +Aggregate [scalar-subquery#x []], [scalar-subquery#x [] AS subq1#x] +: :- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias cte +: : : +- Project [1 AS 1#x] +: : : +- OneRowRelation +: : +- Project [1#x] +: : +- SubqueryAlias cte +: : +- CTERelationRef xxxx, true, [1#x], false, false, 1 +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias cte +: : +- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [1#x] +: +- SubqueryAlias cte +: +- CTERelationRef xxxx, true, [1#x], false, false, 1 ++- LocalRelation [col1#x] + + +-- !query +SELECT ( + SELECT country FROM data LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + ALL +-- !query analysis +Aggregate [scalar-subquery#x []], [scalar-subquery#x [] AS subq1#x] +: :- GlobalLimit 1 +: : +- LocalLimit 1 +: : +- Project [country#x] +: : +- SubqueryAlias data +: : +- View (`data`, [country#x, city#x, name#x, id#x, power#x]) +: : +- Project [cast(country#x as string) AS country#x, cast(city#x as string) AS city#x, cast(name#x as string) AS name#x, cast(id#x as int) AS id#x, cast(power#x as decimal(3,1)) AS power#x] +: : +- Project [country#x, city#x, name#x, id#x, power#x] +: : +- SubqueryAlias data +: : +- LocalRelation [country#x, city#x, name#x, id#x, power#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [country#x] +: +- SubqueryAlias data +: +- View (`data`, [country#x, city#x, name#x, id#x, power#x]) +: +- Project [cast(country#x as string) AS country#x, cast(city#x as string) AS city#x, cast(name#x as string) AS name#x, cast(id#x as int) AS id#x, cast(power#x as decimal(3,1)) AS power#x] +: +- Project [country#x, city#x, name#x, id#x, power#x] +: +- SubqueryAlias data +: +- LocalRelation [country#x, city#x, name#x, id#x, power#x] ++- LocalRelation [col1#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out index 904b35559ced..7ffd5bf22baf 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out @@ -471,6 +471,67 @@ org.apache.spark.sql.AnalysisException } +-- !query +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + 1 +-- !query analysis +Aggregate [scalar-subquery#x []], [scalar-subquery#x [] AS subq1#x] +: :- WithCTE +: : :- CTERelationDef xxxx, false +: : : +- SubqueryAlias cte +: : : +- Project [1 AS 1#x] +: : : +- OneRowRelation +: : +- Project [1#x] +: : +- SubqueryAlias cte +: : +- CTERelationRef xxxx, true, [1#x], false, false, 1 +: +- WithCTE +: :- CTERelationDef xxxx, false +: : +- SubqueryAlias cte +: : +- Project [1 AS 1#x] +: : +- OneRowRelation +: +- Project [1#x] +: +- SubqueryAlias cte +: +- CTERelationRef xxxx, true, [1#x], false, false, 1 ++- LocalRelation [col1#x] + + +-- !query +SELECT ( + SELECT a FROM data LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + 1 +-- !query analysis +Aggregate [scalar-subquery#x []], [scalar-subquery#x [] AS subq1#x] +: :- GlobalLimit 1 +: : +- LocalLimit 1 +: : +- Project [a#x] +: : +- SubqueryAlias data +: : +- View (`data`, [a#x, b#x]) +: : +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] +: : +- Project [a#x, b#x] +: : +- SubqueryAlias data +: : +- LocalRelation [a#x, b#x] +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [a#x] +: +- SubqueryAlias data +: +- View (`data`, [a#x, b#x]) +: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] +: +- Project [a#x, b#x] +: +- SubqueryAlias data +: +- LocalRelation [a#x, b#x] ++- LocalRelation [col1#x] + + -- !query set spark.sql.groupByOrdinal=false -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out index e524cf54fdbf..708ba0ad7b58 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out @@ -226,3 +226,10 @@ Project [raise_error(_LEGACY_ERROR_USER_RAISED_EXCEPTION, map(errorMessage, hell SET spark.sql.legacy.raiseErrorWithoutErrorClass=false -- !query analysis SetCommand (spark.sql.legacy.raiseErrorWithoutErrorClass,Some(false)) + + +-- !query +SELECT assert_true(col1 <= col2) FROM VALUES ('2025-03-01', '2025-03-10') +-- !query analysis +Project [assert_true((col1#x <= col2#x), '(col1 <= col2)' is not true!) AS assert_true((col1 <= col2), '(col1 <= col2)' is not true!)#x] ++- LocalRelation [col1#x, col2#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by.sql.out index ad0357f81937..9f0afdfd8859 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by.sql.out @@ -12,6 +12,22 @@ AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true +- LocalRelation [a#x, b#x] +-- !query +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS col1 +-- !query analysis +CreateViewCommand `v1`, SELECT 1 AS col1, false, true, LocalTempView, UNSUPPORTED, true + +- Project [1 AS col1#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v2 AS SELECT 1 AS col1 +-- !query analysis +CreateViewCommand `v2`, SELECT 1 AS col1, false, true, LocalTempView, UNSUPPORTED, true + +- Project [1 AS col1#x] + +- OneRowRelation + + -- !query SELECT a FROM testData ORDER BY a -- !query analysis @@ -184,7 +200,91 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +SELECT col1 FROM VALUES (1, 2) GROUP BY col1 ORDER BY MAX(col2), RAND() +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT 1 AS col1, col1 FROM VALUES (10) ORDER BY col1 +-- !query analysis +Sort [col1#x ASC NULLS FIRST], true ++- Project [1 AS col1#x, col1#x] + +- LocalRelation [col1#x] + + +-- !query +SELECT + COUNT(DISTINCT col1) +FROM + v1 +NATURAL JOIN + v2 +GROUP BY + col1 +ORDER BY + col1 +-- !query analysis +Project [count(DISTINCT col1)#xL] ++- Sort [col1#x ASC NULLS FIRST], true + +- Aggregate [col1#x], [count(distinct col1#x) AS count(DISTINCT col1)#xL, col1#x] + +- Project [col1#x] + +- Join Inner, (col1#x = col1#x) + :- SubqueryAlias v1 + : +- View (`v1`, [col1#x]) + : +- Project [cast(col1#x as int) AS col1#x] + : +- Project [1 AS col1#x] + : +- OneRowRelation + +- SubqueryAlias v2 + +- View (`v2`, [col1#x]) + +- Project [cast(col1#x as int) AS col1#x] + +- Project [1 AS col1#x] + +- OneRowRelation + + +-- !query +SELECT + COUNT(DISTINCT col1) +FROM + v1 +NATURAL JOIN + v1 +GROUP BY + col1 +ORDER BY + col1 +-- !query analysis +Project [count(DISTINCT col1)#xL] ++- Sort [col1#x ASC NULLS FIRST], true + +- Aggregate [col1#x], [count(distinct col1#x) AS count(DISTINCT col1)#xL, col1#x] + +- Project [col1#x] + +- Join Inner, (col1#x = col1#x) + :- SubqueryAlias v1 + : +- View (`v1`, [col1#x]) + : +- Project [cast(col1#x as int) AS col1#x] + : +- Project [1 AS col1#x] + : +- OneRowRelation + +- SubqueryAlias v1 + +- View (`v1`, [col1#x]) + +- Project [cast(col1#x as int) AS col1#x] + +- Project [1 AS col1#x] + +- OneRowRelation + + -- !query DROP VIEW IF EXISTS testData -- !query analysis DropTempViewCommand testData + + +-- !query +DROP VIEW IF EXISTS v1 +-- !query analysis +DropTempViewCommand v1 + + +-- !query +DROP VIEW IF EXISTS v2 +-- !query analysis +DropTempViewCommand v2 diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql index 1e17529d545b..81194804e917 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -176,6 +176,98 @@ with cte AS (SELECT * FROM va) SELECT * FROM cte; -- Self-refer to non-existent cte, should fail. with cte as (select * from cte) select * from cte; +-- WithCTE placement for WITH in boch UNION branches +SELECT * FROM ( + ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) UNION ALL ( + WITH cte AS (SELECT 2) + SELECT * FROM cte + ) +); + +-- CTE in the left UNION branch with a subquery referencing that CTE name on the right +SELECT * FROM ( + ( + WITH cte AS (SELECT 2) + SELECT * FROM cte + ) UNION ALL ( + SELECT 1 WHERE EXISTS ( + SELECT * FROM cte + ) + ) +); + +-- CTEs in subqueries in both branches of UNION +SELECT + * +FROM + VALUES (1) +WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) +UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) +); + +WITH cte AS ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + ) +) +SELECT * FROM cte; + +SELECT * FROM ( + WITH cte AS (SELECT 1) + SELECT + * + FROM VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + ) +); + +-- CTE name with dots +WITH `a.b.c` AS ( + SELECT 1 +) +SELECT * FROM `a.b.c`; + -- Clean up DROP VIEW IF EXISTS t; DROP VIEW IF EXISTS t2; diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql index 75afc82f998d..07e7227b5dfa 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-alias.sql @@ -46,6 +46,30 @@ SELECT COUNT(b) AS k FROM testData GROUP BY k; -- Ordinal is replaced correctly when grouping by alias of a literal SELECT MAX(col1), 3 as abc FROM VALUES(1),(2),(3),(4) GROUP BY col1 % abc; +-- GROUP BY attribute takes precedence over alias +SELECT 1 AS a FROM testData GROUP BY `a`; + +-- Group alias on subquery with CTE inside +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + subq1 +; + +-- Group by alias on subquery with relation +SELECT ( + SELECT a FROM testData LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + subq1 +; + -- turn off group by aliases set spark.sql.groupByAliases=false; diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql index 4400c0b57866..dd05b1b8d261 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql @@ -82,4 +82,34 @@ select country, (select count(*) from data d1 where d1.country = d2.country), co select (select count(*) from data d1 where d1.country = d2.country) + count(id) from data d2 group by all; -- SELECT list contains unresolved column, should not report UNRESOLVED_ALL_IN_GROUP_BY -select non_exist from data group by all; \ No newline at end of file +select non_exist from data group by all; + +-- GROUP BY ALL with a * in aggregate expressions list +SELECT * FROM data GROUP BY ALL; + +SELECT 4 AS a, 5 AS b, 6 AS c, * FROM data GROUP BY ALL; + +SELECT *, 4 AS a, 5 AS b, 6 AS c FROM data GROUP BY ALL; + +SELECT 4 AS a, 5 AS b, *, 6 AS c FROM data GROUP BY ALL; + +-- Group by ALL on subquery with CTE inside +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + ALL +; + +-- Group by ALL on subquery with relation +SELECT ( + SELECT country FROM data LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + ALL +; diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql index b773396c050d..ca563ba5db0f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql @@ -89,6 +89,27 @@ select a, b, count(1) from data group by cube(-1, 2); select a, b, count(1) from data group by cube(1, 3); +-- Group by ordinal on subquery with CTE inside +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + 1 +; + +-- Group by ordinal on subquery with relation +SELECT ( + SELECT a FROM data LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + 1 +; + -- turn off group by ordinal set spark.sql.groupByOrdinal=false; diff --git a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql index f7c58a0dc5ad..417af4220641 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -40,4 +40,7 @@ SELECT raise_error(1); SET spark.sql.legacy.raiseErrorWithoutErrorClass=true; SELECT assert_true(false); SELECT raise_error('hello'); -SET spark.sql.legacy.raiseErrorWithoutErrorClass=false; \ No newline at end of file +SET spark.sql.legacy.raiseErrorWithoutErrorClass=false; + +-- Implicit alias of assert expression +SELECT assert_true(col1 <= col2) FROM VALUES ('2025-03-01', '2025-03-10'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/order-by.sql b/sql/core/src/test/resources/sql-tests/inputs/order-by.sql index d67987bcb51c..c5ce610e4c88 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/order-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/order-by.sql @@ -2,6 +2,8 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) AS testData(a, b); +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS col1; +CREATE OR REPLACE TEMPORARY VIEW v2 AS SELECT 1 AS col1; -- ORDER BY a column from a child's output SELECT a FROM testData ORDER BY a; @@ -30,5 +32,38 @@ SELECT * FROM testData ORDER BY (SELECT * FROM testData ORDER BY (SELECT a FROM -- Fails because correlation is not allowed in ORDER BY SELECT * FROM testData ORDER BY (SELECT a FROM VALUES (1)); +-- Nondeterministic expression + Aggregate expression in ORDER BY +SELECT col1 FROM VALUES (1, 2) GROUP BY col1 ORDER BY MAX(col2), RAND(); + +-- Order by table column and alias with the same name +SELECT 1 AS col1, col1 FROM VALUES (10) ORDER BY col1; + +-- Order by on top of natural join with count(distinct) +SELECT + COUNT(DISTINCT col1) +FROM + v1 +NATURAL JOIN + v2 +GROUP BY + col1 +ORDER BY + col1 +; + +SELECT + COUNT(DISTINCT col1) +FROM + v1 +NATURAL JOIN + v1 +GROUP BY + col1 +ORDER BY + col1 +; + -- Clean up DROP VIEW IF EXISTS testData; +DROP VIEW IF EXISTS v1; +DROP VIEW IF EXISTS v2; diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index 4367ae1d5f2d..71dd283c0d9e 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -584,6 +584,148 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +SELECT * FROM ( + ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) UNION ALL ( + WITH cte AS (SELECT 2) + SELECT * FROM cte + ) +) +-- !query schema +struct<1:int> +-- !query output +1 +2 + + +-- !query +SELECT * FROM ( + ( + WITH cte AS (SELECT 2) + SELECT * FROM cte + ) UNION ALL ( + SELECT 1 WHERE EXISTS ( + SELECT * FROM cte + ) + ) +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`cte`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 134, + "stopIndex" : 136, + "fragment" : "cte" + } ] +} + + +-- !query +SELECT + * +FROM + VALUES (1) +WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) +UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) +) +-- !query schema +struct<col1:int> +-- !query output +1 +1 + + +-- !query +WITH cte AS ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + ) +) +SELECT * FROM cte +-- !query schema +struct<col1:int> +-- !query output +1 +1 + + +-- !query +SELECT * FROM ( + WITH cte AS (SELECT 1) + SELECT + * + FROM VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + UNION ALL ( + SELECT + * + FROM + VALUES (1) + WHERE EXISTS ( + WITH cte AS (SELECT 1) + SELECT * FROM cte + ) + ) +) +-- !query schema +struct<col1:int> +-- !query output +1 +1 + + +-- !query +WITH `a.b.c` AS ( + SELECT 1 +) +SELECT * FROM `a.b.c` +-- !query schema +struct<1:int> +-- !query output +1 + + -- !query DROP VIEW IF EXISTS t -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out index fd740c88bc55..052b3bc9782b 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-alias.sql.out @@ -287,6 +287,46 @@ struct<max(col1):int,abc:int> 4 3 +-- !query +SELECT 1 AS a FROM testData GROUP BY `a` +-- !query schema +struct<a:int> +-- !query output +1 +1 +1 +1 + + +-- !query +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + subq1 +-- !query schema +struct<subq1:int> +-- !query output +1 + + +-- !query +SELECT ( + SELECT a FROM testData LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + subq1 +-- !query schema +struct<subq1:int> +-- !query output +1 + + -- !query set spark.sql.groupByAliases=false -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out index bf3c1a3c21b8..a82366ae4d10 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out @@ -288,3 +288,88 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "fragment" : "non_exist" } ] } + + +-- !query +SELECT * FROM data GROUP BY ALL +-- !query schema +struct<country:string,city:string,name:string,id:int,power:decimal(3,1)> +-- !query output +China Hangzhou Wenchen 4 14.0 +China Shanghai Shanghaiese 5 15.0 +Korea Seoul Hyukjin 6 16.0 +UK London Sean 7 17.0 +USA Berkeley Xiao 3 13.0 +USA San Francisco Matei 2 12.0 +USA San Francisco Reynold 1 11.0 + + +-- !query +SELECT 4 AS a, 5 AS b, 6 AS c, * FROM data GROUP BY ALL +-- !query schema +struct<a:int,b:int,c:int,country:string,city:string,name:string,id:int,power:decimal(3,1)> +-- !query output +4 5 6 China Hangzhou Wenchen 4 14.0 +4 5 6 China Shanghai Shanghaiese 5 15.0 +4 5 6 Korea Seoul Hyukjin 6 16.0 +4 5 6 UK London Sean 7 17.0 +4 5 6 USA Berkeley Xiao 3 13.0 +4 5 6 USA San Francisco Matei 2 12.0 +4 5 6 USA San Francisco Reynold 1 11.0 + + +-- !query +SELECT *, 4 AS a, 5 AS b, 6 AS c FROM data GROUP BY ALL +-- !query schema +struct<country:string,city:string,name:string,id:int,power:decimal(3,1),a:int,b:int,c:int> +-- !query output +China Hangzhou Wenchen 4 14.0 4 5 6 +China Shanghai Shanghaiese 5 15.0 4 5 6 +Korea Seoul Hyukjin 6 16.0 4 5 6 +UK London Sean 7 17.0 4 5 6 +USA Berkeley Xiao 3 13.0 4 5 6 +USA San Francisco Matei 2 12.0 4 5 6 +USA San Francisco Reynold 1 11.0 4 5 6 + + +-- !query +SELECT 4 AS a, 5 AS b, *, 6 AS c FROM data GROUP BY ALL +-- !query schema +struct<a:int,b:int,country:string,city:string,name:string,id:int,power:decimal(3,1),c:int> +-- !query output +4 5 China Hangzhou Wenchen 4 14.0 6 +4 5 China Shanghai Shanghaiese 5 15.0 6 +4 5 Korea Seoul Hyukjin 6 16.0 6 +4 5 UK London Sean 7 17.0 6 +4 5 USA Berkeley Xiao 3 13.0 6 +4 5 USA San Francisco Matei 2 12.0 6 +4 5 USA San Francisco Reynold 1 11.0 6 + + +-- !query +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + ALL +-- !query schema +struct<subq1:int> +-- !query output +1 + + +-- !query +SELECT ( + SELECT country FROM data LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + ALL +-- !query schema +struct<subq1:string> +-- !query output +USA diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out index 0f29c27268c8..e13c4b13899a 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out @@ -508,6 +508,35 @@ org.apache.spark.sql.AnalysisException } +-- !query +SELECT ( + WITH cte AS (SELECT 1) + SELECT * FROM cte +) AS subq1 +FROM + VALUES (1) +GROUP BY + 1 +-- !query schema +struct<subq1:int> +-- !query output +1 + + +-- !query +SELECT ( + SELECT a FROM data LIMIT 1 +) AS subq1 +FROM + VALUES (1) +GROUP BY + 1 +-- !query schema +struct<subq1:int> +-- !query output +1 + + -- !query set spark.sql.groupByOrdinal=false -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out index b05b69715938..efcb0409bf93 100644 --- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -313,3 +313,11 @@ SET spark.sql.legacy.raiseErrorWithoutErrorClass=false struct<key:string,value:string> -- !query output spark.sql.legacy.raiseErrorWithoutErrorClass false + + +-- !query +SELECT assert_true(col1 <= col2) FROM VALUES ('2025-03-01', '2025-03-10') +-- !query schema +struct<assert_true((col1 <= col2), '(col1 <= col2)' is not true!):void> +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/order-by.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by.sql.out index 786564fbc3d6..2b9862964a64 100644 --- a/sql/core/src/test/resources/sql-tests/results/order-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/order-by.sql.out @@ -9,6 +9,22 @@ struct<> +-- !query +CREATE OR REPLACE TEMPORARY VIEW v1 AS SELECT 1 AS col1 +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW v2 AS SELECT 1 AS col1 +-- !query schema +struct<> +-- !query output + + + -- !query SELECT a FROM testData ORDER BY a -- !query schema @@ -186,9 +202,75 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +SELECT col1 FROM VALUES (1, 2) GROUP BY col1 ORDER BY MAX(col2), RAND() +-- !query schema +struct<col1:int> +-- !query output +1 + + +-- !query +SELECT 1 AS col1, col1 FROM VALUES (10) ORDER BY col1 +-- !query schema +struct<col1:int,col1:int> +-- !query output +1 10 + + +-- !query +SELECT + COUNT(DISTINCT col1) +FROM + v1 +NATURAL JOIN + v2 +GROUP BY + col1 +ORDER BY + col1 +-- !query schema +struct<count(DISTINCT col1):bigint> +-- !query output +1 + + +-- !query +SELECT + COUNT(DISTINCT col1) +FROM + v1 +NATURAL JOIN + v1 +GROUP BY + col1 +ORDER BY + col1 +-- !query schema +struct<count(DISTINCT col1):bigint> +-- !query output +1 + + -- !query DROP VIEW IF EXISTS testData -- !query schema struct<> -- !query output + + +-- !query +DROP VIEW IF EXISTS v1 +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IF EXISTS v2 +-- !query schema +struct<> +-- !query output + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org