This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6df5cb777527 [SPARK-51439][SQL] Support SQL UDF with DEFAULT argument 6df5cb777527 is described below commit 6df5cb7775270ed566e9a964e54772d371d2c785 Author: Haoyu Weng <wengh...@gmail.com> AuthorDate: Thu Apr 24 08:01:36 2025 +0800 [SPARK-51439][SQL] Support SQL UDF with DEFAULT argument Continuing allisonwang-db's work on #50373 and #49471 ### What changes were proposed in this pull request? This PR adds support for DEFAULT arguments in SQL UDF. Examples: ```sql CREATE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a; SELECT foo1d1(); -- 10 SELECT foo1d1(20); -- 20 CREATE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b; SELECT * FROM foo1d6(5); -- 5, 7 SELECT * FROM foo1d6(5, 2); -- 5, 2 ``` See sql-udf.sql for more valid and invalid examples. ### Why are the changes needed? To support default arguments in SQL UDFs. ### Does this PR introduce _any_ user-facing change? Yes. Now SQL UDFs support DEFAULT arguments. A side effect of the grammar change is that some invalid function parameter definitions are now no longer rejected by the grammar, but instead rejected by the parser logic. Examples: ```sql -- multiple COMMENT or multiple NOT NULL CREATE TEMPORARY FUNCTION foo(a INT COMMENT 'hello' COMMENT 'world') RETURNS INT RETURN a; -- before: [PARSE_SYNTAX_ERROR] Syntax error at or near 'COMMENT'. SQLSTATE: 42601 == SQL (line 2, position 1) == CREATE TEMPORARY FUNCTION foo(a INT COMMENT 'hello' COMMENT 'world') RETURNS INT RETURN a; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- after: [CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE] CREATE TABLE column a specifies descriptor "COMMENT" more than once, which is invalid. SQLSTATE: 42710 == SQL (line 1, position 1) == CREATE TEMPORARY FUNCTION foo(a INT COMMENT 'hello' COMMENT 'world')... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``` ```sql -- GENERATED ALWAYS AS CREATE TEMPORARY FUNCTION foo(a INT GENERATED ALWAYS AS (1)) RETURNS INT RETURN a; -- before: [PARSE_SYNTAX_ERROR] Syntax error at or near 'GENERATED'. SQLSTATE: 42601 == SQL (line 2, position 1) == CREATE TEMPORARY FUNCTION foo(a INT GENERATED ALWAYS AS (1)) RETURNS INT RETURN a; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- after: [INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS] Invalid SQL syntax: CREATE FUNCTION with generated columns as parameters is not allowed. SQLSTATE: 42000 == SQL (line 2, position 1) == CREATE TEMPORARY FUNCTION foo(a INT GENERATED ALWAYS AS (1)) RETURNS INT RETURN a; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``` This doesn't change the behavior of existing valid SQL. ### How was this patch tested? End-to-end regression tests in `sql-udf.sql` and simple tests in `SQLFunctionSuite`. ### Was this patch authored or co-authored using generative AI tooling? No Closes #50408 from wengh/sql-udf-default. Lead-authored-by: Haoyu Weng <wengh...@gmail.com> Co-authored-by: Allison Wang <allison.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../src/main/resources/error/error-conditions.json | 10 + .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 4 + .../spark/sql/catalyst/util/SparkParserUtils.scala | 8 + .../spark/sql/errors/QueryParsingErrors.scala | 10 + .../org/apache/spark/sql/types/StructField.scala | 13 +- .../org/apache/spark/sql/types/StructType.scala | 1 + .../spark/sql/catalyst/catalog/SQLFunction.scala | 2 +- .../sql/catalyst/catalog/UserDefinedFunction.scala | 5 + .../sql/catalyst/parser/AbstractSqlParser.scala | 8 + .../spark/sql/catalyst/parser/AstBuilder.scala | 36 +- .../sql/catalyst/parser/ParserInterface.scala | 7 + .../spark/sql/catalyst/parser/ParserUtils.scala | 6 - .../SparkConnectWithSessionExtensionSuite.scala | 3 + .../spark/sql/execution/SparkSqlParser.scala | 14 + .../command/CreateSQLFunctionCommand.scala | 4 +- .../sql-tests/analyzer-results/sql-udf.sql.out | 391 ++++++++++++++++++- .../test/resources/sql-tests/inputs/sql-udf.sql | 108 +++++- .../resources/sql-tests/results/sql-udf.sql.out | 431 ++++++++++++++++++++- .../spark/sql/SparkSessionExtensionSuite.scala | 4 + .../spark/sql/execution/SQLFunctionSuite.scala | 13 + 20 files changed, 1025 insertions(+), 53 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 6591a62151d4..462daaa3fe74 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3444,6 +3444,16 @@ "ANALYZE TABLE(S) ... COMPUTE STATISTICS ... <ctx> must be either NOSCAN or empty." ] }, + "CREATE_FUNC_WITH_COLUMN_CONSTRAINTS" : { + "message" : [ + "CREATE FUNCTION with constraints on parameters is not allowed." + ] + }, + "CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS" : { + "message" : [ + "CREATE FUNCTION with generated columns as parameters is not allowed." + ] + }, "CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE" : { "message" : [ "Cannot create a routine with both IF NOT EXISTS and REPLACE specified." diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 51daf86388c4..e15441e14e42 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -177,6 +177,10 @@ singleTableSchema : colTypeList EOF ; +singleRoutineParamList + : colDefinitionList EOF + ; + statement : query #statementDefault | executeImmediate #visitExecuteImmediate diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala index 01ee89908570..9c9e623e0339 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala @@ -127,6 +127,14 @@ trait SparkParserUtils { } } + /** Get the code that creates the given node. */ + def source(ctx: ParserRuleContext): String = { + // Note: `exprCtx.getText` returns a string without spaces, so we need to + // get the text from the underlying char stream instead. + val stream = ctx.getStart.getInputStream + stream.getText(Interval.of(ctx.getStart.getStartIndex, ctx.getStop.getStopIndex)) + } + /** Convert a string token into a string. */ def string(token: Token): String = unescapeSQLString(token.getText) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index b04b5e71d098..3f6c20679e21 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -654,6 +654,16 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase { ctx) } + def createFuncWithGeneratedColumnsError(ctx: ParserRuleContext): Throwable = { + new ParseException( + errorClass = "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS", + ctx) + } + + def createFuncWithConstraintError(ctx: ParserRuleContext): Throwable = { + new ParseException(errorClass = "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_COLUMN_CONSTRAINTS", ctx) + } + def defineTempFuncWithIfNotExistsError(ctx: ParserRuleContext): Throwable = { new ParseException(errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS", ctx) } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala index 83f4e4d459c2..4c51980d4e6c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -150,10 +150,13 @@ case class StructField( /** * Return the default value of this StructField. This is used for storing the default value of a * function parameter. + * + * It is present when the field represents a function parameter with a default value, such as + * `CREATE FUNCTION f(arg INT DEFAULT 42) RETURN ...`. */ private[sql] def getDefault(): Option[String] = { - if (metadata.contains("default")) { - Option(metadata.getString("default")) + if (metadata.contains(StructType.SQL_FUNCTION_DEFAULT_METADATA_KEY)) { + Option(metadata.getString(StructType.SQL_FUNCTION_DEFAULT_METADATA_KEY)) } else { None } @@ -183,6 +186,9 @@ case class StructField( /** * Return the current default value of this StructField. + * + * It is present only when the field represents a table column with a default value, such as: + * `ALTER TABLE t ALTER COLUMN c SET DEFAULT 42`. */ def getCurrentDefaultValue(): Option[String] = { if (metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) { @@ -218,7 +224,8 @@ case class StructField( metadata.contains(EXISTS_DEFAULT_COLUMN_METADATA_KEY) } - private def getDDLDefault = getCurrentDefaultValue() + private def getDDLDefault = getDefault() + .orElse(getCurrentDefaultValue()) .map(" DEFAULT " + _) .getOrElse("") diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala index cc95d8ee94b0..4c49d3a58f4f 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -521,6 +521,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru */ @Stable object StructType extends AbstractDataType { + private[sql] val SQL_FUNCTION_DEFAULT_METADATA_KEY = "default" override private[sql] def defaultConcreteType: DataType = new StructType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala index 923373c1856a..f2fd3b90f646 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala @@ -207,7 +207,7 @@ object SQLFunction { val returnType = parseReturnTypeText(props(RETURN_TYPE), isTableFunc, parser) SQLFunction( name = function.identifier, - inputParam = props.get(INPUT_PARAM).map(parseTableSchema(_, parser)), + inputParam = props.get(INPUT_PARAM).map(parseRoutineParam(_, parser)), returnType = returnType.get, exprText = props.get(EXPRESSION), queryText = props.get(QUERY), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala index a76ca7b15c27..8ed241468352 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala @@ -86,6 +86,11 @@ object UserDefinedFunction { // The default Hive Metastore SQL schema length for function resource uri. private val HIVE_FUNCTION_RESOURCE_URI_LENGTH_THRESHOLD: Int = 4000 + def parseRoutineParam(text: String, parser: ParserInterface): StructType = { + val parsed = parser.parseRoutineParam(text) + CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType] + } + def parseTableSchema(text: String, parser: ParserInterface): StructType = { val parsed = parser.parseTableSchema(text) CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala index c17409a68c96..216136d8a7c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, Logic import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.errors.QueryParsingErrors import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.StructType /** * Base class for all ANTLR4 [[ParserInterface]] implementations. @@ -102,6 +103,13 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface { } } + override def parseRoutineParam(sqlText: String): StructType = parse(sqlText) { parser => + val ctx = parser.singleRoutineParamList() + withErrorHandling(ctx, Some(sqlText)) { + astBuilder.visitSingleRoutineParamList(ctx) + } + } + def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = { withOrigin(ctx, sqlText) { try { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d7d8f9d3d866..b15c5b17332c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -25,7 +25,6 @@ import scala.jdk.CollectionConverters._ import scala.util.{Left, Right} import org.antlr.v4.runtime.{ParserRuleContext, RuleContext, Token} -import org.antlr.v4.runtime.misc.Interval import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode} import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkThrowable, SparkThrowableHelper} @@ -129,9 +128,7 @@ class AstBuilder extends DataTypeAstBuilder * @return The original input text, including all whitespaces and formatting. */ private def getOriginalText(ctx: ParserRuleContext): String = { - ctx.getStart.getInputStream.getText( - new Interval(ctx.getStart.getStartIndex, ctx.getStop.getStopIndex) - ) + SparkParserUtils.source(ctx) } /** @@ -3897,6 +3894,30 @@ class AstBuilder extends DataTypeAstBuilder * DataType parsing * ******************************************************************************************** */ + override def visitSingleRoutineParamList( + ctx: SingleRoutineParamListContext): StructType = withOrigin(ctx) { + val (cols, constraints) = visitColDefinitionList(ctx.colDefinitionList()) + // Constraints and generated columns should have been rejected by the parser. + assert(constraints.isEmpty) + for (col <- cols) { + assert(col.generationExpression.isEmpty) + assert(col.identityColumnSpec.isEmpty) + } + // Build fields from the columns, converting comments and default values + val fields = for (col <- cols) yield { + val metadataBuilder = new MetadataBuilder().withMetadata(col.metadata) + col.comment.foreach { c => + metadataBuilder.putString("comment", c) + } + col.defaultValue.foreach { default => + metadataBuilder.putString( + StructType.SQL_FUNCTION_DEFAULT_METADATA_KEY, default.originalSQL) + } + StructField(col.name, col.dataType, col.nullable, metadataBuilder.build()) + } + StructType(fields.toArray) + } + /** * Create top level table schema. */ @@ -4061,12 +4082,7 @@ class AstBuilder extends DataTypeAstBuilder if (expr.containsPattern(PARAMETER)) { throw QueryParsingErrors.parameterMarkerNotAllowed(place, expr.origin) } - // Extract the raw expression text so that we can save the user provided text. We don't - // use `Expression.sql` to avoid storing incorrect text caused by bugs in any expression's - // `sql` method. Note: `exprCtx.getText` returns a string without spaces, so we need to - // get the text from the underlying char stream instead. - val originalSQL = getOriginalText(exprCtx) - DefaultValueExpression(expr, originalSQL) + DefaultValueExpression(expr, getOriginalText(exprCtx)) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala index 3aec1dd43113..f549f440596e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala @@ -21,6 +21,7 @@ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.types.StructType /** * Interface for a parser. @@ -62,4 +63,10 @@ trait ParserInterface extends DataTypeParserInterface { */ @throws[ParseException]("Text cannot be parsed to a LogicalPlan") def parseQuery(sqlText: String): LogicalPlan + + /** + * Parse a string to a [[StructType]] as routine parameters, handling default values and comments. + */ + @throws[ParseException]("Text cannot be parsed to routine parameters") + def parseRoutineParam(sqlText: String): StructType } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 4377f6b5bc0c..fe5bdcc00d30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -62,12 +62,6 @@ object ParserUtils extends SparkParserUtils { } } - /** Get the code that creates the given node. */ - def source(ctx: ParserRuleContext): String = { - val stream = ctx.getStart.getInputStream - stream.getText(Interval.of(ctx.getStart.getStartIndex, ctx.getStop.getStopIndex)) - } - /** Get all the text which comes after the given rule. */ def remainder(ctx: ParserRuleContext): String = remainder(ctx.getStop) diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala index 63d623cd2779..82c8192fe070 100644 --- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala +++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala @@ -55,6 +55,9 @@ class SparkConnectWithSessionExtensionSuite extends SparkFunSuite { override def parseQuery(sqlText: String): LogicalPlan = delegate.parseQuery(sqlText) + + override def parseRoutineParam(sqlText: String): StructType = + delegate.parseRoutineParam(sqlText) } test("Parse table name with test parser") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 6823c5ad7ca4..e797ba0392be 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -689,6 +689,20 @@ class SparkSqlAstBuilder extends AstBuilder { throw QueryParsingErrors.createFuncWithBothIfNotExistsAndReplaceError(ctx) } + // Reject invalid options + for { + parameters <- Option(ctx.parameters) + colDefinition <- parameters.colDefinition().asScala + option <- colDefinition.colDefinitionOption().asScala + } { + if (option.generationExpression() != null) { + throw QueryParsingErrors.createFuncWithGeneratedColumnsError(ctx.parameters) + } + if (option.columnConstraintDefinition() != null) { + throw QueryParsingErrors.createFuncWithConstraintError(ctx.parameters) + } + } + val inputParamText = Option(ctx.parameters).map(source) val returnTypeText: String = if (ctx.RETURNS != null && diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala index fe4e6f121f57..09b2c8697075 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala @@ -21,7 +21,7 @@ import org.apache.spark.SparkException import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.{Analyzer, SQLFunctionNode, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation} -import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, SQLFunction, UserDefinedFunctionErrors} +import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, SQLFunction, UserDefinedFunction, UserDefinedFunctionErrors} import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Generator, LateralSubquery, Literal, ScalarSubquery, SubqueryExpression, WindowExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.Inner @@ -70,7 +70,7 @@ case class CreateSQLFunctionCommand( val catalog = sparkSession.sessionState.catalog val conf = sparkSession.sessionState.conf - val inputParam = inputParamText.map(parser.parseTableSchema) + val inputParam = inputParamText.map(UserDefinedFunction.parseRoutineParam(_, parser)) val returnType = parseReturnTypeText(returnTypeText, isTableFunc, parser) val function = SQLFunction( diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out index 8a110190bb02..4fae5417d738 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out @@ -224,41 +224,404 @@ org.apache.spark.sql.AnalysisException -- !query CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT NULL, INT, a, false, false, false, true + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query analysis +Project [spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1()#x] ++- Project [cast(5 as int) AS a#x, cast(cast(null as int) as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT 10, INT, a, false, false, false, true + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query analysis +Project [spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1()#x] ++- Project [cast(5 as int) AS a#x, cast(cast(10 as int) as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT length(substr(current_database(), 1, 1))) RETURNS INT RETURN a +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT length(substr(current_database(), 1, 1)), INT, a, false, false, false, true + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query analysis +Project [spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1()#x] ++- Project [cast(5 as int) AS a#x, cast(cast(length(substr(current_schema(), 1, 1)) as int) as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT '5' || length(substr(current_database(), 1, 1))) + RETURNS INT RETURN a +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT '5' || length(substr(current_database(), 1, 1)), INT, a, false, false, false, true + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query analysis +Project [spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1(5)#x, spark_catalog.default.foo1d1(a#x) AS spark_catalog.default.foo1d1()#x] ++- Project [cast(5 as int) AS a#x, cast(cast(concat(5, cast(length(substr(current_schema(), 1, 1)) as string)) as int) as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT RAND()::INT) RETURNS INT RETURN a +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d1, a INT DEFAULT RAND()::INT, INT, a, false, false, false, true + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17)) + RETURNS INT RETURN a +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION", + "sqlState" : "42K09", "messageParameters" : { - "error" : "'DEFAULT'", - "hint" : "" + "sqlExpr" : "\"array(55, 17)\"", + "srcType" : "\"ARRAY<INT>\"", + "targetType" : "\"INT\"" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 1, - "stopIndex" : 74, - "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a" + "stopIndex" : 85, + "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17))\n RETURNS INT RETURN a" } ] } -- !query -CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT (SELECT max(c1) FROM VALUES (1) AS T(c1))) + RETURNS INT RETURN a -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.AnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_EXPRESSION", "sqlState" : "42601", "messageParameters" : { - "error" : "'DEFAULT'", - "hint" : "" + "functionName" : "foo1d1", + "parameterName" : "a" + } +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d2, a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test', STRING, a || ' ' || b || ' ' || c || ' ' || d, false, false, false, true + + +-- !query +SELECT foo1d2(1, 2, 3, 4), foo1d2(1, 2, 3), foo1d2(1, 2), foo1d2(1) +-- !query analysis +Project [spark_catalog.default.foo1d2(a#x, b#x, c#x, d#x) AS spark_catalog.default.foo1d2(1, 2, 3, 4)#x, spark_catalog.default.foo1d2(a#x, b#x, c#x, d#x) AS spark_catalog.default.foo1d2(1, 2, 3)#x, spark_catalog.default.foo1d2(a#x, b#x, c#x, d#x) AS spark_catalog.default.foo1d2(1, 2)#x, spark_catalog.default.foo1d2(a#x, b#x, c#x, d#x) AS spark_catalog.default.foo1d2(1)#x] ++- Project [cast(1 as int) AS a#x, cast(2 as int) AS b#x, cast(3 as int) AS c#x, cast(4 as int) AS d#x, cast(1 as int) AS a#x, cast(2 as int) AS b#x, cast(3 as int) AS c#x, cast(cast(9 as int) as int) AS d#x, cast(1 as int) AS a#x, cast(2 as int) AS b#x, cast(cast(8 as int) as int) AS c#x, cast(cast(9 as int) as int) AS d#x, cast(1 as int) AS a#x, cast(cast(7 as int) as int) AS b#x, cast(cast(8 as int) as int) AS c#x, cast(cast(9 as int) as int) AS d#x] + +- OneRowRelation + + +-- !query +SELECT foo1d2() +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "0", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "4", + "functionName" : "`spark_catalog`.`default`.`foo1d2`" }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 15, + "fragment" : "foo1d2()" + } ] +} + + +-- !query +SELECT foo1d2(1, 2, 3, 4, 5) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "5", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "4", + "functionName" : "`spark_catalog`.`default`.`foo1d2`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "foo1d2(1, 2, 3, 4, 5)" + } ] +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d2(a INT DEFAULT 5, b INT , c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION", + "sqlState" : "42601", + "messageParameters" : { + "functionName" : "foo1d2", + "nextParameterName" : "b", + "parameterName" : "a" + } +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION", + "sqlState" : "42601", + "messageParameters" : { + "functionName" : "foo1d2", + "nextParameterName" : "d", + "parameterName" : "c" + } +} + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION foo1d3(a INT DEFAULT 7 COMMENT 'hello') RETURNS INT RETURN a +-- !query analysis +CreateSQLFunctionCommand foo1d3, a INT DEFAULT 7 COMMENT 'hello', INT, a, false, true, false, true + + +-- !query +SELECT foo1d3(5), foo1d3() +-- !query analysis +Project [foo1d3(a#x) AS foo1d3(5)#x, foo1d3(a#x) AS foo1d3()#x] ++- Project [cast(5 as int) AS a#x, cast(cast(7 as int) as int) AS a#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT a) RETURNS INT RETURN a + b +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`a`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 1, + "fragment" : "a" + } ] +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT 3) RETURNS INT RETURN a + b +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d4, a INT, b INT DEFAULT 3, INT, a + b, false, false, false, true + + +-- !query +CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT DEFAULT foo1d4(6)) RETURNS INT RETURN a + b +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d5, a INT, b INT DEFAULT foo1d4(6), INT, a + b, false, false, false, true + + +-- !query +SELECT foo1d5(10), foo1d5(10, 2) +-- !query analysis +Project [spark_catalog.default.foo1d5(a#x, b#x) AS spark_catalog.default.foo1d5(10)#x, spark_catalog.default.foo1d5(a#x, b#x) AS spark_catalog.default.foo1d5(10, 2)#x] ++- Project [cast(10 as int) AS a#x, cast(cast(spark_catalog.default.foo1d4(a#x, b#x) as int) as int) AS b#x, cast(10 as int) AS a#x, cast(2 as int) AS b#x] + +- Project [cast(6 as int) AS a#x, cast(cast(3 as int) as int) AS b#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT) RETURNS INT RETURN a + foo1d4(b) +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d5, a INT, b INT, INT, a + foo1d4(b), false, false, false, true + + +-- !query +SELECT foo1d5(10, 2) +-- !query analysis +Project [spark_catalog.default.foo1d5(a#x, b#x) AS spark_catalog.default.foo1d5(10, 2)#x] ++- Project [a#x, b#x, cast(b#x as int) AS a#x, cast(cast(3 as int) as int) AS b#x] + +- Project [cast(10 as int) AS a#x, cast(2 as int) AS b#x] + +- OneRowRelation + + +-- !query +CREATE OR REPLACE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b +-- !query analysis +CreateSQLFunctionCommand spark_catalog.default.foo1d6, a INT, b INT DEFAULT 7, a INT, b INT, SELECT a, b, true, false, false, true + + +-- !query +SELECT * FROM foo1d6(5) +-- !query analysis +Project [a#x, b#x] ++- SQLFunctionNode spark_catalog.default.foo1d6 + +- SubqueryAlias foo1d6 + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [cast(5 as int) AS a#x, cast(7 as int) AS b#x] + +- OneRowRelation + + +-- !query +SELECT * FROM foo1d6(5, 2) +-- !query analysis +Project [a#x, b#x] ++- SQLFunctionNode spark_catalog.default.foo1d6 + +- SubqueryAlias foo1d6 + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [cast(5 as int) AS a#x, cast(2 as int) AS b#x] + +- OneRowRelation + + +-- !query +CREATE FUNCTION foo1e1(x INT NOT NULL, y INT) RETURNS INT RETURN 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS", + "sqlState" : "42601", + "messageParameters" : { + "input" : "x INT NOT NULL, y INT" + } +} + + +-- !query +CREATE FUNCTION foo1e2(x INT, y INT NOT NULL) RETURNS TABLE (x INT) RETURN SELECT 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS", + "sqlState" : "42601", + "messageParameters" : { + "input" : "x INT, y INT NOT NULL" + } +} + + +-- !query +CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN SELECT 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS", + "sqlState" : "42601", + "messageParameters" : { + "input" : "x INT NOT NULL" + } +} + + +-- !query +CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT RETURN y + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 90, + "fragment" : "CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT RETURN y + 1" + } ] +} + + +-- !query +CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT RETURN id + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 91, + "fragment" : "CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT RETURN id + 1" + } ] +} + + +-- !query +CREATE FUNCTION foo1g1(x INT, y INT UNIQUE) RETURNS INT RETURN y + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_COLUMN_CONSTRAINTS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 68, + "fragment" : "CREATE FUNCTION foo1g1(x INT, y INT UNIQUE) RETURNS INT RETURN y + 1" + } ] +} + + +-- !query +CREATE FUNCTION foo1g2(id BIGINT CHECK (true)) RETURNS BIGINT RETURN id + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_COLUMN_CONSTRAINTS", + "sqlState" : "42000", "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 1, - "stopIndex" : 104, - "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b" + "stopIndex" : 75, + "fragment" : "CREATE FUNCTION foo1g2(id BIGINT CHECK (true)) RETURNS BIGINT RETURN id + 1" } ] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql index 989fb7c4fae3..1ae75380abb0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql @@ -42,12 +42,114 @@ CREATE FUNCTION foo1c2(a INT, b INT, thisisaduplicate INT, c INT, d INT, e INT, RETURNS TABLE (a INT) RETURN SELECT 1; -- 1.1.d DEFAULT parameters --- TODO(SPARK-51439): Support default parameters in SQL UDF --- DEFAULT in scalar function +-- A NULL default CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a; +-- Expect 5, NULL +SELECT foo1d1(5), foo1d1(); + +-- A literal default +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a; + +-- Expect 5, 10 +SELECT foo1d1(5), foo1d1(); + +-- A constant expression +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT length(substr(current_database(), 1, 1))) RETURNS INT RETURN a; + +-- Expect 5, 1 +SELECT foo1d1(5), foo1d1(); + +-- An expression that needs a cast +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT '5' || length(substr(current_database(), 1, 1))) + RETURNS INT RETURN a; + +-- Expect 5, 51 +SELECT foo1d1(5), foo1d1(); + +-- A non deterministic default +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT RAND()::INT) RETURNS INT RETURN a; + +-- Expect 5, 0 +SELECT foo1d1(5), foo1d1(); + +-- Cannot cast +-- Expect error +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17)) + RETURNS INT RETURN a; + +-- A subquery +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT (SELECT max(c1) FROM VALUES (1) AS T(c1))) + RETURNS INT RETURN a; + +-- Multiple parameters +CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d; + +-- Expect: (1 2 3 4), (1 2 3 9), (1 2 8 9), (1 7 8 9) +SELECT foo1d2(1, 2, 3, 4), foo1d2(1, 2, 3), foo1d2(1, 2), foo1d2(1); + +-- Expect error a has no default +SELECT foo1d2(); + +-- Expect error, too many parameters +SELECT foo1d2(1, 2, 3, 4, 5); + +-- Sparse default, expect error +CREATE OR REPLACE FUNCTION foo1d2(a INT DEFAULT 5, b INT , c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d; + +CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d; + +-- Temporary function +CREATE OR REPLACE TEMPORARY FUNCTION foo1d3(a INT DEFAULT 7 COMMENT 'hello') RETURNS INT RETURN a; + +-- Expect 5, 7 +SELECT foo1d3(5), foo1d3(); + +-- Dependent default +-- Expect error +CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT a) RETURNS INT RETURN a + b; + +-- Defaults with SQL UDF +CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT 3) RETURNS INT RETURN a + b; + +CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT DEFAULT foo1d4(6)) RETURNS INT RETURN a + b; + +-- Expect 19, 12 +SELECT foo1d5(10), foo1d5(10, 2); + +-- Function invocation with default in SQL UDF +CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT) RETURNS INT RETURN a + foo1d4(b); + +-- Expect 15 +SELECT foo1d5(10, 2); + -- DEFAULT in table function -CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b; +CREATE OR REPLACE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b; + +-- Expect (5, 7) +SELECT * FROM foo1d6(5); + +-- Expect (5, 2) +SELECT * FROM foo1d6(5, 2); + +-- 1.1.e NOT NULL +-- Expect failure +CREATE FUNCTION foo1e1(x INT NOT NULL, y INT) RETURNS INT RETURN 1; +CREATE FUNCTION foo1e2(x INT, y INT NOT NULL) RETURNS TABLE (x INT) RETURN SELECT 1; +CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN SELECT 1; + +-- 1.1.f GENERATED ALWAYS AS +-- Expect failure +CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT RETURN y + 1; +CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT RETURN id + 1; + +-- 1.1.g Constraint +-- Expect failure +CREATE FUNCTION foo1g1(x INT, y INT UNIQUE) RETURNS INT RETURN y + 1; +CREATE FUNCTION foo1g2(id BIGINT CHECK (true)) RETURNS BIGINT RETURN id + 1; ------------------------------- -- 2. Scalar SQL UDF diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out index 184489190d0e..10db7b18f04a 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out @@ -183,43 +183,446 @@ CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException + + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query schema +struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int> +-- !query output +5 NULL + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT 10) RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query schema +struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int> +-- !query output +5 10 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT length(substr(current_database(), 1, 1))) RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query schema +struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int> +-- !query output +5 1 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT '5' || length(substr(current_database(), 1, 1))) + RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query schema +struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int> +-- !query output +5 51 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT RAND()::INT) RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d1(5), foo1d1() +-- !query schema +struct<spark_catalog.default.foo1d1(5):int,spark_catalog.default.foo1d1():int> +-- !query output +5 0 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17)) + RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION", + "sqlState" : "42K09", "messageParameters" : { - "error" : "'DEFAULT'", - "hint" : "" + "sqlExpr" : "\"array(55, 17)\"", + "srcType" : "\"ARRAY<INT>\"", + "targetType" : "\"INT\"" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 1, - "stopIndex" : 74, - "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT NULL) RETURNS INT RETURN a" + "stopIndex" : 85, + "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT array(55, 17))\n RETURNS INT RETURN a" } ] } -- !query -CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b +CREATE OR REPLACE FUNCTION foo1d1(a INT DEFAULT (SELECT max(c1) FROM VALUES (1) AS T(c1))) + RETURNS INT RETURN a -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.AnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_EXPRESSION", "sqlState" : "42601", "messageParameters" : { - "error" : "'DEFAULT'", - "hint" : "" + "functionName" : "foo1d1", + "parameterName" : "a" + } +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d2(1, 2, 3, 4), foo1d2(1, 2, 3), foo1d2(1, 2), foo1d2(1) +-- !query schema +struct<spark_catalog.default.foo1d2(1, 2, 3, 4):string,spark_catalog.default.foo1d2(1, 2, 3):string,spark_catalog.default.foo1d2(1, 2):string,spark_catalog.default.foo1d2(1):string> +-- !query output +1 2 3 4 1 2 3 9 1 2 8 9 1 7 8 9 + + +-- !query +SELECT foo1d2() +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "0", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "4", + "functionName" : "`spark_catalog`.`default`.`foo1d2`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 15, + "fragment" : "foo1d2()" + } ] +} + + +-- !query +SELECT foo1d2(1, 2, 3, 4, 5) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION", + "sqlState" : "42605", + "messageParameters" : { + "actualNum" : "5", + "docroot" : "https://spark.apache.org/docs/latest", + "expectedNum" : "4", + "functionName" : "`spark_catalog`.`default`.`foo1d2`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "foo1d2(1, 2, 3, 4, 5)" + } ] +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d2(a INT DEFAULT 5, b INT , c INT DEFAULT 8, d INT DEFAULT 9 COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION", + "sqlState" : "42601", + "messageParameters" : { + "functionName" : "foo1d2", + "nextParameterName" : "b", + "parameterName" : "a" + } +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d2(a INT, b INT DEFAULT 7, c INT DEFAULT 8, d INT COMMENT 'test') + RETURNS STRING RETURN a || ' ' || b || ' ' || c || ' ' || d +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION", + "sqlState" : "42601", + "messageParameters" : { + "functionName" : "foo1d2", + "nextParameterName" : "d", + "parameterName" : "c" + } +} + + +-- !query +CREATE OR REPLACE TEMPORARY FUNCTION foo1d3(a INT DEFAULT 7 COMMENT 'hello') RETURNS INT RETURN a +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d3(5), foo1d3() +-- !query schema +struct<foo1d3(5):int,foo1d3():int> +-- !query output +5 7 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT a) RETURNS INT RETURN a + b +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`a`" }, "queryContext" : [ { "objectType" : "", "objectName" : "", "startIndex" : 1, - "stopIndex" : 104, - "fragment" : "CREATE OR REPLACE FUNCTION foo1d1(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b" + "stopIndex" : 1, + "fragment" : "a" + } ] +} + + +-- !query +CREATE OR REPLACE FUNCTION foo1d4(a INT, b INT DEFAULT 3) RETURNS INT RETURN a + b +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT DEFAULT foo1d4(6)) RETURNS INT RETURN a + b +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d5(10), foo1d5(10, 2) +-- !query schema +struct<spark_catalog.default.foo1d5(10):int,spark_catalog.default.foo1d5(10, 2):int> +-- !query output +19 12 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d5(a INT, b INT) RETURNS INT RETURN a + foo1d4(b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT foo1d5(10, 2) +-- !query schema +struct<spark_catalog.default.foo1d5(10, 2):int> +-- !query output +15 + + +-- !query +CREATE OR REPLACE FUNCTION foo1d6(a INT, b INT DEFAULT 7) RETURNS TABLE(a INT, b INT) RETURN SELECT a, b +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM foo1d6(5) +-- !query schema +struct<a:int,b:int> +-- !query output +5 7 + + +-- !query +SELECT * FROM foo1d6(5, 2) +-- !query schema +struct<a:int,b:int> +-- !query output +5 2 + + +-- !query +CREATE FUNCTION foo1e1(x INT NOT NULL, y INT) RETURNS INT RETURN 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS", + "sqlState" : "42601", + "messageParameters" : { + "input" : "x INT NOT NULL, y INT" + } +} + + +-- !query +CREATE FUNCTION foo1e2(x INT, y INT NOT NULL) RETURNS TABLE (x INT) RETURN SELECT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS", + "sqlState" : "42601", + "messageParameters" : { + "input" : "x INT, y INT NOT NULL" + } +} + + +-- !query +CREATE FUNCTION foo1e3(x INT, y INT) RETURNS TABLE (x INT NOT NULL) RETURN SELECT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS", + "sqlState" : "42601", + "messageParameters" : { + "input" : "x INT NOT NULL" + } +} + + +-- !query +CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT RETURN y + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 90, + "fragment" : "CREATE FUNCTION foo1f1(x INT, y INT GENERATED ALWAYS AS (x + 10)) RETURNS INT RETURN y + 1" + } ] +} + + +-- !query +CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT RETURN id + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_GENERATED_COLUMNS_AS_PARAMETERS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 91, + "fragment" : "CREATE FUNCTION foo1f2(id BIGINT GENERATED ALWAYS AS IDENTITY) RETURNS BIGINT RETURN id + 1" + } ] +} + + +-- !query +CREATE FUNCTION foo1g1(x INT, y INT UNIQUE) RETURNS INT RETURN y + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_COLUMN_CONSTRAINTS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 68, + "fragment" : "CREATE FUNCTION foo1g1(x INT, y INT UNIQUE) RETURNS INT RETURN y + 1" + } ] +} + + +-- !query +CREATE FUNCTION foo1g2(id BIGINT CHECK (true)) RETURNS BIGINT RETURN id + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_COLUMN_CONSTRAINTS", + "sqlState" : "42000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 75, + "fragment" : "CREATE FUNCTION foo1g2(id BIGINT CHECK (true)) RETURNS BIGINT RETURN id + 1" } ] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala index 8f0a62e210d8..c1c041509c35 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala @@ -640,6 +640,10 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars override def parseQuery(sqlText: String): LogicalPlan = delegate.parseQuery(sqlText) + + override def parseRoutineParam(sqlText: String): StructType = { + delegate.parseRoutineParam(sqlText) + } } object MyExtensions { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala index 0878a0cb54d2..721bfe002969 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala @@ -74,4 +74,17 @@ class SQLFunctionSuite extends QueryTest with SharedSparkSession { |""".stripMargin), Seq(Row(2), Row(4))) } } + + test("SQL scalar function with default value") { + withUserDefinedFunction("bar" -> false) { + sql( + """ + |CREATE FUNCTION bar(x INT DEFAULT 7) + |RETURNS INT + |RETURN x + 1 + |""".stripMargin) + checkAnswer(sql("SELECT bar()"), Row(8)) + checkAnswer(sql("SELECT bar(1)"), Row(2)) + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org