This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 5f24babefb2d [SPARK-51979][SQL][TESTS] Add more SQL Query tests for SQL TVF return columns 5f24babefb2d is described below commit 5f24babefb2d1b0fe3cdc9ea8c40455630e2ba25 Author: Allison Wang <allison.w...@databricks.com> AuthorDate: Fri May 2 12:59:29 2025 +0800 [SPARK-51979][SQL][TESTS] Add more SQL Query tests for SQL TVF return columns This PR adds more SQL query tests for SQL User-defined table function with various valid and invalid return columns. To improve test coverage for SQL UDFs. No This PR is test only No Closes #50776 from allisonwang-db/spark-51979-more-tvf-tests. Authored-by: Allison Wang <allison.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit fd56ef68efdac66ab51aa376bd753bb04d715635) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql-tests/analyzer-results/sql-udf.sql.out | 187 +++++++++++++++++++++ .../test/resources/sql-tests/inputs/sql-udf.sql | 37 ++++ .../resources/sql-tests/results/sql-udf.sql.out | 176 +++++++++++++++++++ 3 files changed, 400 insertions(+) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out index 76252a4db1b5..ae47ab805b8d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out @@ -592,6 +592,193 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +CREATE FUNCTION foo2a0() RETURNS TABLE() RETURN SELECT 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "')'", + "hint" : "" + } +} + + +-- !query +CREATE FUNCTION foo2a2() RETURNS TABLE(c1 INT, c2 INT) RETURN SELECT 1, 2 +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2a2`" + } +} + + +-- !query +SELECT * FROM foo2a2() +-- !query analysis +Project [c1#x, c2#x] ++- SQLFunctionNode spark_catalog.default.foo2a2 + +- SubqueryAlias foo2a2 + +- Project [cast(1#x as int) AS c1#x, cast(2#x as int) AS c2#x] + +- Project [1 AS 1#x, 2 AS 2#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2a4() RETURNS TABLE(c1 INT, c2 INT, c3 INT, c4 INT) RETURN SELECT 1, 2, 3, 4 +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo2a4`" + } +} + + +-- !query +SELECT * FROM foo2a2() +-- !query analysis +Project [c1#x, c2#x] ++- SQLFunctionNode spark_catalog.default.foo2a2 + +- SubqueryAlias foo2a2 + +- Project [cast(1#x as int) AS c1#x, cast(2#x as int) AS c2#x] + +- Project [1 AS 1#x, 2 AS 2#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo2b1() RETURNS TABLE(DuPLiCatE INT, duplicate INT) RETURN SELECT 1, 2 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_ROUTINE_RETURNS_COLUMNS", + "sqlState" : "42711", + "messageParameters" : { + "columns" : "`duplicate`", + "routineName" : "foo2b1" + } +} + + +-- !query +CREATE FUNCTION foo2b2() RETURNS TABLE(a INT, b INT, duplicate INT, c INT, d INT, e INT, DUPLICATE INT) +RETURN SELECT 1, 2, 3, 4, 5, 6, 7 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_ROUTINE_RETURNS_COLUMNS", + "sqlState" : "42711", + "messageParameters" : { + "columns" : "`duplicate`", + "routineName" : "foo2b2" + } +} + + +-- !query +CREATE FUNCTION foo2c1() RETURNS TABLE(c1 INT DEFAULT 5) RETURN SELECT 1, 2 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'DEFAULT'", + "hint" : "" + } +} + + +-- !query +CREATE FUNCTION foo31() RETURNS INT RETURN (SELECT 1, 2) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN", + "sqlState" : "42823", + "messageParameters" : { + "number" : "2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 56, + "fragment" : "CREATE FUNCTION foo31() RETURNS INT RETURN (SELECT 1, 2)" + } ] +} + + +-- !query +CREATE FUNCTION foo32() RETURNS TABLE(a INT) RETURN SELECT 1, 2 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.RETURN_COLUMN_COUNT_MISMATCH", + "sqlState" : "42601", + "messageParameters" : { + "name" : "spark_catalog.default.foo32", + "outputSize" : "2", + "returnParamSize" : "1" + } +} + + +-- !query +CREATE FUNCTION foo33() RETURNS TABLE(a INT, b INT) RETURN SELECT 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.RETURN_COLUMN_COUNT_MISMATCH", + "sqlState" : "42601", + "messageParameters" : { + "name" : "spark_catalog.default.foo33", + "outputSize" : "1", + "returnParamSize" : "2" + } +} + + +-- !query +CREATE FUNCTION foo41() RETURNS INT RETURN SELECT 1 +-- !query analysis +org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException +{ + "errorClass" : "ROUTINE_ALREADY_EXISTS", + "sqlState" : "42723", + "messageParameters" : { + "existingRoutineType" : "routine", + "newRoutineType" : "routine", + "routineName" : "`default`.`foo41`" + } +} + + +-- !query +CREATE FUNCTION foo42() RETURNS TABLE(a INT) RETURN 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.SQL_TABLE_UDF_BODY_MUST_BE_A_QUERY", + "sqlState" : "42601", + "messageParameters" : { + "name" : "foo42" + } +} + + -- !query CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql index f71cca3fe5d3..a436d2c5c627 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql @@ -146,6 +146,43 @@ CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN SELEC CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT RETURN y + 1; CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT RETURN id + 1; +-- 1.2 Returns Columns +-- 1.2.a A table function with various numbers of returns columns +-- Expect error: Cannot have an empty RETURNS +CREATE FUNCTION foo2a0() RETURNS TABLE() RETURN SELECT 1; + +CREATE FUNCTION foo2a2() RETURNS TABLE(c1 INT, c2 INT) RETURN SELECT 1, 2; +-- Expect (1, 2) +SELECT * FROM foo2a2(); + +CREATE FUNCTION foo2a4() RETURNS TABLE(c1 INT, c2 INT, c3 INT, c4 INT) RETURN SELECT 1, 2, 3, 4; +-- Expect (1, 2, 3, 4) +SELECT * FROM foo2a2(); + +-- 1.2.b Duplicates in RETURNS clause +-- Expect failure +CREATE FUNCTION foo2b1() RETURNS TABLE(DuPLiCatE INT, duplicate INT) RETURN SELECT 1, 2; + +-- Expect failure +CREATE FUNCTION foo2b2() RETURNS TABLE(a INT, b INT, duplicate INT, c INT, d INT, e INT, DUPLICATE INT) +RETURN SELECT 1, 2, 3, 4, 5, 6, 7; + +-- 1.2.c No DEFAULT allowed in RETURNS +CREATE FUNCTION foo2c1() RETURNS TABLE(c1 INT DEFAULT 5) RETURN SELECT 1, 2; + +-- 1.3 Mismatched RETURN +-- Expect Failure +CREATE FUNCTION foo31() RETURNS INT RETURN (SELECT 1, 2); + +CREATE FUNCTION foo32() RETURNS TABLE(a INT) RETURN SELECT 1, 2; + +CREATE FUNCTION foo33() RETURNS TABLE(a INT, b INT) RETURN SELECT 1; + +-- 1.4 Table function returns expression and vice versa +CREATE FUNCTION foo41() RETURNS INT RETURN SELECT 1; +-- Expect failure +CREATE FUNCTION foo42() RETURNS TABLE(a INT) RETURN 1; + ------------------------------- -- 2. Scalar SQL UDF -- 2.1 deterministic simple expressions diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out index b15a6ec2551d..cfb57b847a74 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out @@ -589,6 +589,182 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +CREATE FUNCTION foo2a0() RETURNS TABLE() RETURN SELECT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "')'", + "hint" : "" + } +} + + +-- !query +CREATE FUNCTION foo2a2() RETURNS TABLE(c1 INT, c2 INT) RETURN SELECT 1, 2 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM foo2a2() +-- !query schema +struct<c1:int,c2:int> +-- !query output +1 2 + + +-- !query +CREATE FUNCTION foo2a4() RETURNS TABLE(c1 INT, c2 INT, c3 INT, c4 INT) RETURN SELECT 1, 2, 3, 4 +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM foo2a2() +-- !query schema +struct<c1:int,c2:int> +-- !query output +1 2 + + +-- !query +CREATE FUNCTION foo2b1() RETURNS TABLE(DuPLiCatE INT, duplicate INT) RETURN SELECT 1, 2 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_ROUTINE_RETURNS_COLUMNS", + "sqlState" : "42711", + "messageParameters" : { + "columns" : "`duplicate`", + "routineName" : "foo2b1" + } +} + + +-- !query +CREATE FUNCTION foo2b2() RETURNS TABLE(a INT, b INT, duplicate INT, c INT, d INT, e INT, DUPLICATE INT) +RETURN SELECT 1, 2, 3, 4, 5, 6, 7 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_ROUTINE_RETURNS_COLUMNS", + "sqlState" : "42711", + "messageParameters" : { + "columns" : "`duplicate`", + "routineName" : "foo2b2" + } +} + + +-- !query +CREATE FUNCTION foo2c1() RETURNS TABLE(c1 INT DEFAULT 5) RETURN SELECT 1, 2 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'DEFAULT'", + "hint" : "" + } +} + + +-- !query +CREATE FUNCTION foo31() RETURNS INT RETURN (SELECT 1, 2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN", + "sqlState" : "42823", + "messageParameters" : { + "number" : "2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 56, + "fragment" : "CREATE FUNCTION foo31() RETURNS INT RETURN (SELECT 1, 2)" + } ] +} + + +-- !query +CREATE FUNCTION foo32() RETURNS TABLE(a INT) RETURN SELECT 1, 2 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.RETURN_COLUMN_COUNT_MISMATCH", + "sqlState" : "42601", + "messageParameters" : { + "name" : "spark_catalog.default.foo32", + "outputSize" : "2", + "returnParamSize" : "1" + } +} + + +-- !query +CREATE FUNCTION foo33() RETURNS TABLE(a INT, b INT) RETURN SELECT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.RETURN_COLUMN_COUNT_MISMATCH", + "sqlState" : "42601", + "messageParameters" : { + "name" : "spark_catalog.default.foo33", + "outputSize" : "1", + "returnParamSize" : "2" + } +} + + +-- !query +CREATE FUNCTION foo41() RETURNS INT RETURN SELECT 1 +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION foo42() RETURNS TABLE(a INT) RETURN 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.SQL_TABLE_UDF_BODY_MUST_BE_A_QUERY", + "sqlState" : "42601", + "messageParameters" : { + "name" : "foo42" + } +} + + -- !query CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org