This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1d8c11120fde [SPARK-54652][SQL] Complete conversion of IDENTIFIER()
1d8c11120fde is described below
commit 1d8c11120fdee04fe88b536bf4cf11d1da5141eb
Author: Aleksandr Chernousov <[email protected]>
AuthorDate: Fri Dec 19 11:43:10 2025 +0800
[SPARK-54652][SQL] Complete conversion of IDENTIFIER()
### What changes were proposed in this pull request?
I replace incorrect usages of "ctx.getText()" with "getIdentifierText(ctx)"
### Why are the changes needed?
It blocks from using IDENTIFIER() in the remaining expected places
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
Added new tests in identifier-clause.sql
### Was this patch authored or co-authored using generative AI tooling?
Claude Sonnet 4.5
Closes #53407 from aleksandr-chernousov-db/SPARK-54652.
Lead-authored-by: Aleksandr Chernousov <[email protected]>
Co-authored-by: Ubuntu <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 10 +-
.../sql/catalyst/parser/DataTypeAstBuilder.scala | 2 +-
.../spark/sql/errors/QueryParsingErrors.scala | 2 +-
.../spark/sql/catalyst/parser/AstBuilder.scala | 46 ++--
.../spark/sql/execution/SparkSqlParser.scala | 12 +-
.../identifier-clause-legacy.sql.out | 41 +++
.../analyzer-results/identifier-clause.sql.out | 46 ++--
.../sql-tests/inputs/identifier-clause.sql | 6 +
.../results/identifier-clause-legacy.sql.out | 48 ++++
.../sql-tests/results/identifier-clause.sql.out | 57 +++--
.../command/IdentifierClauseParserSuite.scala | 282 +++++++++++++++++++++
11 files changed, 483 insertions(+), 69 deletions(-)
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 9b7eaece945b..c29452cab180 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -259,9 +259,9 @@ statement
createTableClauses
(AS? query)?
#replaceTable
| ANALYZE TABLE identifierReference partitionSpec? COMPUTE STATISTICS
- (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze
+ (simpleIdentifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)?
#analyze
| ANALYZE TABLES ((FROM | IN) identifierReference)? COMPUTE STATISTICS
- (identifier)?
#analyzeTables
+ (simpleIdentifier)?
#analyzeTables
| ALTER TABLE identifierReference
ADD (COLUMN | COLUMNS)
columns=qualifiedColTypeWithPositionList
#addTableColumns
@@ -391,7 +391,7 @@ statement
| TRUNCATE TABLE identifierReference partitionSpec?
#truncateTable
| (MSCK)? REPAIR TABLE identifierReference
(option=(ADD|DROP|SYNC) PARTITIONS)?
#repairTable
- | op=(ADD | LIST) identifier .*?
#manageResource
+ | op=(ADD | LIST) simpleIdentifier .*?
#manageResource
| CREATE INDEX (IF errorCapturingNot EXISTS)? identifier ON TABLE?
identifierReference (USING indexType=identifier)?
LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN
@@ -695,7 +695,7 @@ createFileFormat
fileFormat
: INPUTFORMAT inFmt=stringLit OUTPUTFORMAT outFmt=stringLit
#tableFileFormat
- | identifier #genericFileFormat
+ | simpleIdentifier
#genericFileFormat
;
storageHandler
@@ -703,7 +703,7 @@ storageHandler
;
resource
- : identifier stringLit
+ : simpleIdentifier stringLit
;
dmlStatementNoWith
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 212c80a3cb43..51c846f93c1e 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -497,7 +497,7 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] with DataTypeE
override def visitComplexColType(ctx: ComplexColTypeContext): StructField =
withOrigin(ctx) {
import ctx._
val structField = StructField(
- name = errorCapturingIdentifier.getText,
+ name = getIdentifierText(errorCapturingIdentifier),
dataType = typedVisit(dataType()),
nullable = NULL == null)
Option(commentSpec).map(visitCommentSpec).map(structField.withComment).getOrElse(structField)
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 696ef78a1a97..cd302ef6fcca 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -461,7 +461,7 @@ private[sql] object QueryParsingErrors extends
DataTypeErrorsBase {
ctx)
}
- def computeStatisticsNotExpectedError(ctx: IdentifierContext): Throwable = {
+ def computeStatisticsNotExpectedError(ctx: ParserRuleContext): Throwable = {
new ParseException(
errorClass = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
messageParameters = Map("ctx" -> toSQLStmt(ctx.getText)),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 563eacab244d..c3571ad70962 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1235,7 +1235,7 @@ class AstBuilder extends DataTypeAstBuilder
if (pVal.DEFAULT != null) {
throw
QueryParsingErrors.defaultColumnReferencesNotAllowedInPartitionSpec(ctx)
}
- val name = pVal.identifier.getText
+ val name = getIdentifierText(pVal.identifier)
val value = Option(pVal.constant).map(v => {
visitStringConstant(v, legacyNullAsString, keepPartitionSpecAsString)
})
@@ -1958,11 +1958,11 @@ class AstBuilder extends DataTypeAstBuilder
.flatMap(_.namedExpression.asScala)
.map(typedVisit[Expression])
val pivotColumn = if (ctx.pivotColumn.identifiers.size == 1) {
-
UnresolvedAttribute.quoted(ctx.pivotColumn.errorCapturingIdentifier.getText)
+
UnresolvedAttribute.quoted(getIdentifierText(ctx.pivotColumn.errorCapturingIdentifier))
} else {
CreateStruct(
ctx.pivotColumn.identifiers.asScala.map(
- identifier => UnresolvedAttribute.quoted(identifier.getText)).toSeq)
+ identifier =>
UnresolvedAttribute.quoted(getIdentifierText(identifier))).toSeq)
}
val pivotValues = ctx.pivotValues.asScala.map(visitPivotValue)
Pivot(None, pivotColumn, pivotValues.toSeq, aggregates, query)
@@ -1974,7 +1974,7 @@ class AstBuilder extends DataTypeAstBuilder
override def visitPivotValue(ctx: PivotValueContext): Expression =
withOrigin(ctx) {
val e = expression(ctx.expression)
if (ctx.errorCapturingIdentifier != null) {
- Alias(e, ctx.errorCapturingIdentifier.getText)()
+ Alias(e, getIdentifierText(ctx.errorCapturingIdentifier))()
} else {
e
}
@@ -2039,7 +2039,7 @@ class AstBuilder extends DataTypeAstBuilder
// alias unpivot result
if (ctx.errorCapturingIdentifier() != null) {
- val alias = ctx.errorCapturingIdentifier().getText
+ val alias = getIdentifierText(ctx.errorCapturingIdentifier())
SubqueryAlias(alias, filtered)
} else {
filtered
@@ -2541,7 +2541,7 @@ class AstBuilder extends DataTypeAstBuilder
*/
private def mayApplyAliasPlan(tableAlias: TableAliasContext, plan:
LogicalPlan): LogicalPlan = {
if (tableAlias.strictIdentifier != null) {
- val alias = tableAlias.strictIdentifier.getText
+ val alias = getIdentifierText(tableAlias.strictIdentifier)
if (tableAlias.identifierList != null) {
val columnNames = visitIdentifierList(tableAlias.identifierList)
SubqueryAlias(alias, UnresolvedSubqueryColumnAliases(columnNames,
plan))
@@ -3229,7 +3229,7 @@ class AstBuilder extends DataTypeAstBuilder
*/
override def visitLambda(ctx: LambdaContext): Expression = withOrigin(ctx) {
val arguments = ctx.identifier().asScala.map { name =>
-
UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(name.getText).nameParts)
+
UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(getIdentifierText(name)).nameParts)
}
val function = expression(ctx.expression).transformUp {
case a: UnresolvedAttribute => UnresolvedNamedLambdaVariable(a.nameParts)
@@ -4271,7 +4271,7 @@ class AstBuilder extends DataTypeAstBuilder
if (!SQLConf.get.objectLevelCollationsEnabled) {
throw QueryCompilationErrors.objectLevelCollationsNotEnabledError()
}
- val collationName = ctx.identifier.getText
+ val collationName = getIdentifierText(ctx.identifier)
CollationFactory.fetchCollation(collationName).collationName
}
@@ -4510,7 +4510,7 @@ class AstBuilder extends DataTypeAstBuilder
def getFieldReference(
ctx: ApplyTransformContext,
arg: V2Expression): FieldReference = {
- lazy val name: String = ctx.identifier.getText
+ lazy val name: String = getIdentifierText(ctx.identifier)
arg match {
case ref: FieldReference =>
ref
@@ -4522,7 +4522,7 @@ class AstBuilder extends DataTypeAstBuilder
def getSingleFieldReference(
ctx: ApplyTransformContext,
arguments: Seq[V2Expression]): FieldReference = {
- lazy val name: String = ctx.identifier.getText
+ lazy val name: String = getIdentifierText(ctx.identifier)
if (arguments.size > 1) {
throw QueryParsingErrors.wrongNumberArgumentsForTransformError(name,
arguments.size, ctx)
} else if (arguments.isEmpty) {
@@ -4807,7 +4807,7 @@ class AstBuilder extends DataTypeAstBuilder
string(visitStringLit(c.outFmt)))))
// Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET |
AVRO
case (c: GenericFileFormatContext, null) =>
- SerdeInfo(storedAs = Some(c.identifier.getText))
+ SerdeInfo(storedAs = Some(c.simpleIdentifier.getText))
case (null, storageHandler) =>
invalidStatement("STORED BY", ctx)
case _ =>
@@ -4897,7 +4897,7 @@ class AstBuilder extends DataTypeAstBuilder
(rowFormatCtx, createFileFormatCtx.fileFormat) match {
case (_, ffTable: TableFileFormatContext) => // OK
case (rfSerde: RowFormatSerdeContext, ffGeneric:
GenericFileFormatContext) =>
- ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
+ ffGeneric.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
case ("sequencefile" | "textfile" | "rcfile") => // OK
case fmt =>
operationNotAllowed(
@@ -4905,7 +4905,7 @@ class AstBuilder extends DataTypeAstBuilder
parentCtx)
}
case (rfDelimited: RowFormatDelimitedContext, ffGeneric:
GenericFileFormatContext) =>
- ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
+ ffGeneric.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
case "textfile" => // OK
case fmt => operationNotAllowed(
s"ROW FORMAT DELIMITED is only compatible with 'textfile', not
'$fmt'", parentCtx)
@@ -5847,9 +5847,9 @@ class AstBuilder extends DataTypeAstBuilder
log"${MDC(PARTITION_SPECIFICATION, ctx.partitionSpec.getText)}")
}
}
- if (ctx.identifier != null &&
- ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
- throw
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.identifier())
+ if (ctx.simpleIdentifier != null &&
+ ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
+ throw
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.simpleIdentifier)
}
if (ctx.ALL() != null) {
@@ -5870,7 +5870,7 @@ class AstBuilder extends DataTypeAstBuilder
"ANALYZE TABLE",
allowTempView = false),
partitionSpec,
- noScan = ctx.identifier != null)
+ noScan = ctx.simpleIdentifier != null)
} else {
checkPartitionSpec()
AnalyzeColumn(
@@ -5888,16 +5888,16 @@ class AstBuilder extends DataTypeAstBuilder
* }}}
*/
override def visitAnalyzeTables(ctx: AnalyzeTablesContext): LogicalPlan =
withOrigin(ctx) {
- if (ctx.identifier != null &&
- ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
- throw
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.identifier())
+ if (ctx.simpleIdentifier != null &&
+ ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
+ throw
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.simpleIdentifier())
}
val ns = if (ctx.identifierReference() != null) {
withIdentClause(ctx.identifierReference, UnresolvedNamespace(_))
} else {
CurrentNamespace
}
- AnalyzeTables(ns, noScan = ctx.identifier != null)
+ AnalyzeTables(ns, noScan = ctx.simpleIdentifier != null)
}
/**
@@ -6443,7 +6443,7 @@ class AstBuilder extends DataTypeAstBuilder
* }}}
*/
override def visitDropIndex(ctx: DropIndexContext): LogicalPlan =
withOrigin(ctx) {
- val indexName = ctx.identifier.getText
+ val indexName = getIdentifierText(ctx.identifier)
DropIndex(
createUnresolvedTable(ctx.identifierReference, "DROP INDEX"),
indexName,
@@ -6665,7 +6665,7 @@ class AstBuilder extends DataTypeAstBuilder
target = None, excepts = ids.map(s => Seq(s)), replacements = None))
Project(projectList, left)
}.getOrElse(Option(ctx.AS).map { _ =>
- SubqueryAlias(ctx.errorCapturingIdentifier().getText, left)
+ SubqueryAlias(getIdentifierText(ctx.errorCapturingIdentifier()), left)
}.getOrElse(Option(ctx.whereClause).map { c =>
if (ctx.windowClause() != null) {
throw
QueryParsingErrors.windowClauseInPipeOperatorWhereClauseNotAllowedError(ctx)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2885d215ee34..be4dcc550a83 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -187,7 +187,7 @@ class SparkSqlAstBuilder extends AstBuilder {
(ident, _) => builder(ident))
} else if (ctx.errorCapturingIdentifier() != null) {
// resolve immediately
- builder.apply(Seq(ctx.errorCapturingIdentifier().getText))
+ builder.apply(Seq(getIdentifierText(ctx.errorCapturingIdentifier())))
} else if (ctx.stringLit() != null) {
// resolve immediately
builder.apply(Seq(string(visitStringLit(ctx.stringLit()))))
@@ -567,7 +567,7 @@ class SparkSqlAstBuilder extends AstBuilder {
* - '/path/to/fileOrJar'
*/
override def visitManageResource(ctx: ManageResourceContext): LogicalPlan =
withOrigin(ctx) {
- val rawArg = remainder(ctx.identifier).trim
+ val rawArg = remainder(ctx.simpleIdentifier).trim
val maybePaths = strLiteralDef.findAllIn(rawArg).toSeq.map {
case p if p.startsWith("\"") || p.startsWith("'") => unescapeSQLString(p)
case p => p
@@ -575,14 +575,14 @@ class SparkSqlAstBuilder extends AstBuilder {
ctx.op.getType match {
case SqlBaseParser.ADD =>
- ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
+ ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
case "files" | "file" => AddFilesCommand(maybePaths)
case "jars" | "jar" => AddJarsCommand(maybePaths)
case "archives" | "archive" => AddArchivesCommand(maybePaths)
case other => operationNotAllowed(s"ADD with resource type
'$other'", ctx)
}
case SqlBaseParser.LIST =>
- ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
+ ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
case "files" | "file" =>
if (maybePaths.length > 0) {
ListFilesCommand(maybePaths)
@@ -785,7 +785,7 @@ class SparkSqlAstBuilder extends AstBuilder {
*/
override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan =
withOrigin(ctx) {
val resources = ctx.resource.asScala.map { resource =>
- val resourceType = resource.identifier.getText.toLowerCase(Locale.ROOT)
+ val resourceType =
resource.simpleIdentifier.getText.toLowerCase(Locale.ROOT)
resourceType match {
case "jar" | "file" | "archive" =>
FunctionResource(FunctionResourceType.fromString(resourceType),
@@ -1358,7 +1358,7 @@ class SparkSqlAstBuilder extends AstBuilder {
} else {
DescribeColumn(
relation,
-
UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(_.getText).toSeq),
+
UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(getIdentifierText).toSeq),
isExtended)
}
} else {
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
index e53a8153e829..94fff8f58697 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
@@ -2436,6 +2436,47 @@ DropTable false, false
+- ResolvedIdentifier V2SessionCatalog(spark_catalog),
identifier_clause_test_schema.unpivot_test
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query analysis
+CreateDataSourceTableCommand
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`, false
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'('",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'('",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog),
identifier_clause_test_schema.describe_col_test
+
+
-- !query
SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
-- !query analysis
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index abc6cc625b6a..e6a406072c48 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -2021,22 +2021,11 @@ Project [map(mykey, 42)[mykey] AS result#x]
EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS
IDENTIFIER(:alias) ORDER BY ALL'
USING 't' AS alias
-- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
- "sqlState" : "42703",
- "messageParameters" : {
- "objectName" : "`t`.`c1`",
- "proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`,
`IDENTIFIER('t')`.`c4`"
- },
- "queryContext" : [ {
- "objectType" : "EXECUTE IMMEDIATE",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 31,
- "fragment" : "IDENTIFIER(:alias '.c1')"
- } ]
-}
+Sort [c1#x ASC NULLS FIRST], true
++- Project [c1#x]
+ +- SubqueryAlias t
+ +- SubqueryAlias
spark_catalog.identifier_clause_test_schema.integration_test
+ +- Relation
spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x,c4#x] csv
-- !query
@@ -2182,6 +2171,31 @@ DropTable false, false
+- ResolvedIdentifier V2SessionCatalog(spark_catalog),
identifier_clause_test_schema.unpivot_test
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query analysis
+CreateDataSourceTableCommand
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`, false
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query analysis
+DescribeColumnCommand
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`,
[spark_catalog, identifier_clause_test_schema, describe_col_test, c1], false,
[info_name#x, info_value#x]
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query analysis
+DescribeColumnCommand
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`,
[spark_catalog, identifier_clause_test_schema, describe_col_test, c2], false,
[info_name#x, info_value#x]
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog),
identifier_clause_test_schema.describe_col_test
+
+
-- !query
SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index bb6c7107062d..d9bafe7cc607 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -367,6 +367,12 @@ SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS
IDENTIFIER('col_a'), b
SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS
IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL;
DROP TABLE unpivot_test;
+-- DESCRIBE column with IDENTIFIER()
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV;
+DESCRIBE describe_col_test IDENTIFIER('c1');
+DESCRIBE describe_col_test IDENTIFIER('c2');
+DROP TABLE describe_col_test;
+
-- All the following tests fail because they are not about "true" identifiers
-- This should fail - named parameters don't support IDENTIFIER()
diff --git
a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
index 272d9bce8165..6a99be057010 100644
---
a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
@@ -2740,6 +2740,54 @@ struct<>
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'('",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'('",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
-- !query
SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
-- !query schema
diff --git
a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index b398c07b14e1..0c0473791201 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -2227,24 +2227,11 @@ struct<result:int>
EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS
IDENTIFIER(:alias) ORDER BY ALL'
USING 't' AS alias
-- !query schema
-struct<>
+struct<c1:int>
-- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
- "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
- "sqlState" : "42703",
- "messageParameters" : {
- "objectName" : "`t`.`c1`",
- "proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`,
`IDENTIFIER('t')`.`c4`"
- },
- "queryContext" : [ {
- "objectType" : "EXECUTE IMMEDIATE",
- "objectName" : "",
- "startIndex" : 8,
- "stopIndex" : 31,
- "fragment" : "IDENTIFIER(:alias '.c1')"
- } ]
-}
+1
+2
+3
-- !query
@@ -2396,6 +2383,42 @@ struct<>
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name c1
+data_type int
+comment NULL
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name c2
+data_type string
+comment NULL
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
-- !query
SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
-- !query schema
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/IdentifierClauseParserSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/IdentifierClauseParserSuite.scala
new file mode 100644
index 000000000000..c2efc43ae576
--- /dev/null
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/IdentifierClauseParserSuite.scala
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest,
ExpressionWithUnresolvedIdentifier, UnresolvedAttribute,
UnresolvedExtractValue, UnresolvedFunction, UnresolvedInlineTable,
UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, LambdaFunction,
Literal, UnresolvedNamedLambdaVariable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement,
OneRowRelation, Pivot, Project, SubqueryAlias, Unpivot}
+import org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField,
StructType}
+
+class IdentifierClauseParserSuite extends AnalysisTest {
+
+ import org.apache.spark.sql.catalyst.dsl.expressions._
+ import org.apache.spark.sql.catalyst.dsl.plans._
+
+ private def intercept(sqlCommand: String): ParseException = {
+ intercept[ParseException](parsePlan(sqlCommand))
+ }
+
+ test("UNPIVOT column alias with IDENTIFIER()") {
+ comparePlans(
+ parsePlan(
+ "SELECT * FROM unpivot_test UNPIVOT (val FOR col IN " +
+ "(a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b')))"),
+ Unpivot(
+ None,
+ Some(Seq(Seq($"a"), Seq($"b"))),
+ Some(Seq(Some("col_a"), Some("col_b"))),
+ "col",
+ Seq("val"),
+ table("unpivot_test"))
+ .where(coalesce($"val").isNotNull)
+ .select(star())
+ )
+ }
+
+ test("UNPIVOT multi-value column alias with IDENTIFIER()") {
+ comparePlans(
+ parsePlan(
+ "SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN " +
+ "((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS
IDENTIFIER('cols_bc')))"),
+ Unpivot(
+ None,
+ Some(Seq(Seq($"a", $"b"), Seq($"b", $"c"))),
+ Some(Seq(Some("cols_ab"), Some("cols_bc"))),
+ "col",
+ Seq("v1", "v2"),
+ table("unpivot_test"))
+ .where(coalesce($"v1", $"v2").isNotNull)
+ .select(star())
+ )
+ }
+
+ test("PIVOT column with IDENTIFIER()") {
+ comparePlans(
+ parsePlan(
+ "SELECT * FROM pivot_test PIVOT (SUM(revenue) FOR
IDENTIFIER('quarter') IN ('Q1', 'Q2'))"),
+ Pivot(
+ None,
+ UnresolvedAttribute.quoted("quarter"),
+ Seq(Literal("Q1"), Literal("Q2")),
+ Seq(UnresolvedFunction("SUM", Seq($"revenue"), isDistinct = false)),
+ table("pivot_test"))
+ .select(star())
+ )
+ }
+
+ test("PIVOT value alias with IDENTIFIER()") {
+ comparePlans(
+ parsePlan(
+ "SELECT * FROM pivot_test PIVOT (SUM(revenue) AS IDENTIFIER('total')
FOR quarter IN " +
+ "('Q1' AS IDENTIFIER('first_quarter'), 'Q2' AS
IDENTIFIER('second_quarter')))"),
+ Pivot(
+ None,
+ $"quarter",
+ Seq(
+ Alias(Literal("Q1"), "first_quarter")(),
+ Alias(Literal("Q2"), "second_quarter")()
+ ),
+ Seq(Alias(UnresolvedFunction("SUM", Seq($"revenue"), isDistinct =
false), "total")()),
+ table("pivot_test"))
+ .select(star())
+ )
+ }
+
+ test("Lambda variable name with IDENTIFIER()") {
+ val lambdaVar = UnresolvedNamedLambdaVariable(Seq("x"))
+ comparePlans(
+ parsePlan("SELECT transform(array(1, 2, 3), IDENTIFIER('x') -> x + 1)"),
+ OneRowRelation()
+ .select(
+ UnresolvedFunction(
+ "transform",
+ Seq(
+ UnresolvedFunction(
+ "array",
+ Seq(Literal(1), Literal(2), Literal(3)),
+ isDistinct = false),
+ LambdaFunction(
+ lambdaVar + Literal(1),
+ Seq(lambdaVar)
+ )
+ ),
+ isDistinct = false
+ )
+ )
+ )
+ }
+
+ test("Struct field names with IDENTIFIER() in CAST") {
+ val structType = StructType(Seq(
+ StructField("field1", IntegerType),
+ StructField("field2", StringType)
+ ))
+ comparePlans(
+ parsePlan(
+ "SELECT CAST(named_struct('field1', 1, 'field2', 'a') AS " +
+ "STRUCT<IDENTIFIER('field1'): INT, IDENTIFIER('field2'): STRING>)"),
+ OneRowRelation()
+ .select(
+ Cast(
+ UnresolvedFunction(
+ "named_struct",
+ Seq(Literal("field1"), Literal(1), Literal("field2"),
Literal("a")),
+ isDistinct = false),
+ structType
+ )
+ )
+ )
+ }
+
+ test("Struct field access with IDENTIFIER()") {
+ val plan = parsePlan("SELECT IDENTIFIER('data').IDENTIFIER('field1') FROM
struct_field_test")
+ val resolvedPlan = plan.transformAllExpressions {
+ case e: ExpressionWithUnresolvedIdentifier =>
+ e.exprBuilder(Seq(e.identifierExpr.eval().toString), e.otherExprs)
+ }
+
+ comparePlans(
+ resolvedPlan,
+ table("struct_field_test").select(UnresolvedExtractValue($"data",
Literal("field1")))
+ )
+ }
+
+ test("Struct field access with multiple IDENTIFIER() parts") {
+ val plan = parsePlan("SELECT
IDENTIFIER('a').IDENTIFIER('b').IDENTIFIER('c') FROM t")
+ val resolvedPlan = plan.transformAllExpressions {
+ case e: ExpressionWithUnresolvedIdentifier =>
+ e.exprBuilder(Seq(e.identifierExpr.eval().toString), e.otherExprs)
+ }
+
+ comparePlans(
+ resolvedPlan,
+ table("t").select(
+ UnresolvedExtractValue(
+ UnresolvedExtractValue($"a", Literal("b")),
+ Literal("c")
+ )
+ )
+ )
+ }
+
+ test("Partition spec with IDENTIFIER() for partition column name") {
+ val plan = parsePlan(
+ "INSERT INTO partition_spec_test PARTITION (IDENTIFIER('c2') = 'value1')
VALUES (1)")
+ .asInstanceOf[InsertIntoStatement]
+ val values = EvaluateUnresolvedInlineTable.evaluate(
+ UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal(1)))))
+
+ comparePlans(
+ plan,
+ InsertIntoStatement(
+ plan.table,
+ Map("c2" -> Some("value1")),
+ Nil,
+ values,
+ overwrite = false,
+ ifPartitionNotExists = false
+ )
+ )
+ }
+
+ test("Pipe operator alias with IDENTIFIER()") {
+ val values = EvaluateUnresolvedInlineTable.evaluate(
+ UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Literal(1), Literal(2)))))
+ comparePlans(
+ parsePlan(
+ "SELECT * FROM VALUES(1, 2) AS T(c1, c2) |> AS
IDENTIFIER('pipe_alias') |> SELECT c1, c2"),
+ Project(
+ Seq($"c1", $"c2"),
+ SubqueryAlias(
+ "pipe_alias",
+ Project(
+ Seq(UnresolvedStar(None)),
+ SubqueryAlias("T", values)
+ )
+ )
+ )
+ )
+ }
+
+ test("Pipe operator alias with IDENTIFIER() - second variant") {
+ val values = EvaluateUnresolvedInlineTable.evaluate(
+ UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Literal(1), Literal(2)))))
+ comparePlans(
+ parsePlan(
+ "SELECT c1, c2 FROM VALUES(1, 2) AS T(c1, c2) |> AS
IDENTIFIER('my_result') |> SELECT *"),
+ Project(
+ Seq(UnresolvedStar(None)),
+ SubqueryAlias(
+ "my_result",
+ Project(
+ Seq($"c1", $"c2"),
+ SubqueryAlias("T", values)
+ )
+ )
+ )
+ )
+ }
+
+ test("Resource type ADD is a keyword - should fail") {
+ checkError(
+ exception = intercept("ADD IDENTIFIER('file') '/tmp/test.txt'"),
+ condition = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+ parameters = Map("sqlText" -> "ADD IDENTIFIER('file') '/tmp/test.txt'"),
+ context = ExpectedContext(
+ fragment = "ADD IDENTIFIER('file') '/tmp/test.txt'",
+ start = 0,
+ stop = 37
+ )
+ )
+ }
+
+ test("Resource type LIST is a keyword - should fail") {
+ checkError(
+ exception = intercept("LIST IDENTIFIER('files')"),
+ condition = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+ parameters = Map("sqlText" -> "LIST IDENTIFIER('files')"),
+ context = ExpectedContext(
+ fragment = "LIST IDENTIFIER('files')",
+ start = 0,
+ stop = 23
+ )
+ )
+ }
+
+ test("CREATE FUNCTION USING resource type is a keyword - should fail") {
+ checkError(
+ exception = intercept(
+ "CREATE FUNCTION keyword_test_func AS 'com.example.Test' " +
+ "USING IDENTIFIER('jar') '/path/to.jar'"),
+ condition = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'('", "hint" -> "")
+ )
+ }
+
+ test("ANALYZE TABLE NOSCAN is a keyword - should fail") {
+ checkError(
+ exception = intercept(
+ "ANALYZE TABLE analyze_keyword_test COMPUTE STATISTICS
IDENTIFIER('noscan')"),
+ condition = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'('", "hint" -> "")
+ )
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]