(spark) branch master updated: [SPARK-54652][SQL] Complete conversion of IDENTIFIER()

wenchen Thu, 18 Dec 2025 19:43:44 -0800

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 1d8c11120fde [SPARK-54652][SQL] Complete conversion of IDENTIFIER()
1d8c11120fde is described below

commit 1d8c11120fdee04fe88b536bf4cf11d1da5141eb
Author: Aleksandr Chernousov <[email protected]>
AuthorDate: Fri Dec 19 11:43:10 2025 +0800

    [SPARK-54652][SQL] Complete conversion of IDENTIFIER()
    
    ### What changes were proposed in this pull request?
    
    I replace incorrect usages of "ctx.getText()" with "getIdentifierText(ctx)"
    
    ### Why are the changes needed?
    
    It blocks from using IDENTIFIER() in the remaining expected places
    
    ### Does this PR introduce _any_ user-facing change?
    
    ### How was this patch tested?
    
    Added new tests in identifier-clause.sql
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Claude Sonnet 4.5
    
    Closes #53407 from aleksandr-chernousov-db/SPARK-54652.
    
    Lead-authored-by: Aleksandr Chernousov <[email protected]>
    Co-authored-by: Ubuntu <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/parser/SqlBaseParser.g4     |  10 +-
 .../sql/catalyst/parser/DataTypeAstBuilder.scala   |   2 +-
 .../spark/sql/errors/QueryParsingErrors.scala      |   2 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  46 ++--
 .../spark/sql/execution/SparkSqlParser.scala       |  12 +-
 .../identifier-clause-legacy.sql.out               |  41 +++
 .../analyzer-results/identifier-clause.sql.out     |  46 ++--
 .../sql-tests/inputs/identifier-clause.sql         |   6 +
 .../results/identifier-clause-legacy.sql.out       |  48 ++++
 .../sql-tests/results/identifier-clause.sql.out    |  57 +++--
 .../command/IdentifierClauseParserSuite.scala      | 282 +++++++++++++++++++++
 11 files changed, 483 insertions(+), 69 deletions(-)

diff --git 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 9b7eaece945b..c29452cab180 100644
--- 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -259,9 +259,9 @@ statement
         createTableClauses
         (AS? query)?                                                   
#replaceTable
     | ANALYZE TABLE identifierReference partitionSpec? COMPUTE STATISTICS
-        (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)?    #analyze
+        (simpleIdentifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)?    
#analyze
     | ANALYZE TABLES ((FROM | IN) identifierReference)? COMPUTE STATISTICS
-        (identifier)?                                                  
#analyzeTables
+        (simpleIdentifier)?                                            
#analyzeTables
     | ALTER TABLE identifierReference
         ADD (COLUMN | COLUMNS)
         columns=qualifiedColTypeWithPositionList                       
#addTableColumns
@@ -391,7 +391,7 @@ statement
     | TRUNCATE TABLE identifierReference partitionSpec?                
#truncateTable
     | (MSCK)? REPAIR TABLE identifierReference
         (option=(ADD|DROP|SYNC) PARTITIONS)?                           
#repairTable
-    | op=(ADD | LIST) identifier .*?                                   
#manageResource
+    | op=(ADD | LIST) simpleIdentifier .*?                             
#manageResource
     | CREATE INDEX (IF errorCapturingNot EXISTS)? identifier ON TABLE?
         identifierReference (USING indexType=identifier)?
         LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN
@@ -695,7 +695,7 @@ createFileFormat
 
 fileFormat
     : INPUTFORMAT inFmt=stringLit OUTPUTFORMAT outFmt=stringLit    
#tableFileFormat
-    | identifier                                             #genericFileFormat
+    | simpleIdentifier                                             
#genericFileFormat
     ;
 
 storageHandler
@@ -703,7 +703,7 @@ storageHandler
     ;
 
 resource
-    : identifier stringLit
+    : simpleIdentifier stringLit
     ;
 
 dmlStatementNoWith
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 212c80a3cb43..51c846f93c1e 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -497,7 +497,7 @@ class DataTypeAstBuilder extends 
SqlBaseParserBaseVisitor[AnyRef] with DataTypeE
   override def visitComplexColType(ctx: ComplexColTypeContext): StructField = 
withOrigin(ctx) {
     import ctx._
     val structField = StructField(
-      name = errorCapturingIdentifier.getText,
+      name = getIdentifierText(errorCapturingIdentifier),
       dataType = typedVisit(dataType()),
       nullable = NULL == null)
     
Option(commentSpec).map(visitCommentSpec).map(structField.withComment).getOrElse(structField)
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 696ef78a1a97..cd302ef6fcca 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -461,7 +461,7 @@ private[sql] object QueryParsingErrors extends 
DataTypeErrorsBase {
       ctx)
   }
 
-  def computeStatisticsNotExpectedError(ctx: IdentifierContext): Throwable = {
+  def computeStatisticsNotExpectedError(ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
       messageParameters = Map("ctx" -> toSQLStmt(ctx.getText)),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 563eacab244d..c3571ad70962 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1235,7 +1235,7 @@ class AstBuilder extends DataTypeAstBuilder
       if (pVal.DEFAULT != null) {
         throw 
QueryParsingErrors.defaultColumnReferencesNotAllowedInPartitionSpec(ctx)
       }
-      val name = pVal.identifier.getText
+      val name = getIdentifierText(pVal.identifier)
       val value = Option(pVal.constant).map(v => {
         visitStringConstant(v, legacyNullAsString, keepPartitionSpecAsString)
       })
@@ -1958,11 +1958,11 @@ class AstBuilder extends DataTypeAstBuilder
       .flatMap(_.namedExpression.asScala)
       .map(typedVisit[Expression])
     val pivotColumn = if (ctx.pivotColumn.identifiers.size == 1) {
-      
UnresolvedAttribute.quoted(ctx.pivotColumn.errorCapturingIdentifier.getText)
+      
UnresolvedAttribute.quoted(getIdentifierText(ctx.pivotColumn.errorCapturingIdentifier))
     } else {
       CreateStruct(
         ctx.pivotColumn.identifiers.asScala.map(
-          identifier => UnresolvedAttribute.quoted(identifier.getText)).toSeq)
+          identifier => 
UnresolvedAttribute.quoted(getIdentifierText(identifier))).toSeq)
     }
     val pivotValues = ctx.pivotValues.asScala.map(visitPivotValue)
     Pivot(None, pivotColumn, pivotValues.toSeq, aggregates, query)
@@ -1974,7 +1974,7 @@ class AstBuilder extends DataTypeAstBuilder
   override def visitPivotValue(ctx: PivotValueContext): Expression = 
withOrigin(ctx) {
     val e = expression(ctx.expression)
     if (ctx.errorCapturingIdentifier != null) {
-      Alias(e, ctx.errorCapturingIdentifier.getText)()
+      Alias(e, getIdentifierText(ctx.errorCapturingIdentifier))()
     } else {
       e
     }
@@ -2039,7 +2039,7 @@ class AstBuilder extends DataTypeAstBuilder
 
     // alias unpivot result
     if (ctx.errorCapturingIdentifier() != null) {
-      val alias = ctx.errorCapturingIdentifier().getText
+      val alias = getIdentifierText(ctx.errorCapturingIdentifier())
       SubqueryAlias(alias, filtered)
     } else {
       filtered
@@ -2541,7 +2541,7 @@ class AstBuilder extends DataTypeAstBuilder
    */
   private def mayApplyAliasPlan(tableAlias: TableAliasContext, plan: 
LogicalPlan): LogicalPlan = {
     if (tableAlias.strictIdentifier != null) {
-      val alias = tableAlias.strictIdentifier.getText
+      val alias = getIdentifierText(tableAlias.strictIdentifier)
       if (tableAlias.identifierList != null) {
         val columnNames = visitIdentifierList(tableAlias.identifierList)
         SubqueryAlias(alias, UnresolvedSubqueryColumnAliases(columnNames, 
plan))
@@ -3229,7 +3229,7 @@ class AstBuilder extends DataTypeAstBuilder
    */
   override def visitLambda(ctx: LambdaContext): Expression = withOrigin(ctx) {
     val arguments = ctx.identifier().asScala.map { name =>
-      
UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(name.getText).nameParts)
+      
UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(getIdentifierText(name)).nameParts)
     }
     val function = expression(ctx.expression).transformUp {
       case a: UnresolvedAttribute => UnresolvedNamedLambdaVariable(a.nameParts)
@@ -4271,7 +4271,7 @@ class AstBuilder extends DataTypeAstBuilder
     if (!SQLConf.get.objectLevelCollationsEnabled) {
       throw QueryCompilationErrors.objectLevelCollationsNotEnabledError()
     }
-    val collationName = ctx.identifier.getText
+    val collationName = getIdentifierText(ctx.identifier)
     CollationFactory.fetchCollation(collationName).collationName
   }
 
@@ -4510,7 +4510,7 @@ class AstBuilder extends DataTypeAstBuilder
     def getFieldReference(
         ctx: ApplyTransformContext,
         arg: V2Expression): FieldReference = {
-      lazy val name: String = ctx.identifier.getText
+      lazy val name: String = getIdentifierText(ctx.identifier)
       arg match {
         case ref: FieldReference =>
           ref
@@ -4522,7 +4522,7 @@ class AstBuilder extends DataTypeAstBuilder
     def getSingleFieldReference(
         ctx: ApplyTransformContext,
         arguments: Seq[V2Expression]): FieldReference = {
-      lazy val name: String = ctx.identifier.getText
+      lazy val name: String = getIdentifierText(ctx.identifier)
       if (arguments.size > 1) {
         throw QueryParsingErrors.wrongNumberArgumentsForTransformError(name, 
arguments.size, ctx)
       } else if (arguments.isEmpty) {
@@ -4807,7 +4807,7 @@ class AstBuilder extends DataTypeAstBuilder
           string(visitStringLit(c.outFmt)))))
       // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | 
AVRO
       case (c: GenericFileFormatContext, null) =>
-        SerdeInfo(storedAs = Some(c.identifier.getText))
+        SerdeInfo(storedAs = Some(c.simpleIdentifier.getText))
       case (null, storageHandler) =>
         invalidStatement("STORED BY", ctx)
       case _ =>
@@ -4897,7 +4897,7 @@ class AstBuilder extends DataTypeAstBuilder
     (rowFormatCtx, createFileFormatCtx.fileFormat) match {
       case (_, ffTable: TableFileFormatContext) => // OK
       case (rfSerde: RowFormatSerdeContext, ffGeneric: 
GenericFileFormatContext) =>
-        ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
+        ffGeneric.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
           case ("sequencefile" | "textfile" | "rcfile") => // OK
           case fmt =>
             operationNotAllowed(
@@ -4905,7 +4905,7 @@ class AstBuilder extends DataTypeAstBuilder
               parentCtx)
         }
       case (rfDelimited: RowFormatDelimitedContext, ffGeneric: 
GenericFileFormatContext) =>
-        ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
+        ffGeneric.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
           case "textfile" => // OK
           case fmt => operationNotAllowed(
             s"ROW FORMAT DELIMITED is only compatible with 'textfile', not 
'$fmt'", parentCtx)
@@ -5847,9 +5847,9 @@ class AstBuilder extends DataTypeAstBuilder
             log"${MDC(PARTITION_SPECIFICATION, ctx.partitionSpec.getText)}")
       }
     }
-    if (ctx.identifier != null &&
-        ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
-      throw 
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.identifier())
+    if (ctx.simpleIdentifier != null &&
+        ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
+      throw 
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.simpleIdentifier)
     }
 
     if (ctx.ALL() != null) {
@@ -5870,7 +5870,7 @@ class AstBuilder extends DataTypeAstBuilder
           "ANALYZE TABLE",
           allowTempView = false),
         partitionSpec,
-        noScan = ctx.identifier != null)
+        noScan = ctx.simpleIdentifier != null)
     } else {
       checkPartitionSpec()
       AnalyzeColumn(
@@ -5888,16 +5888,16 @@ class AstBuilder extends DataTypeAstBuilder
    * }}}
    */
   override def visitAnalyzeTables(ctx: AnalyzeTablesContext): LogicalPlan = 
withOrigin(ctx) {
-    if (ctx.identifier != null &&
-      ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
-      throw 
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.identifier())
+    if (ctx.simpleIdentifier != null &&
+      ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
+      throw 
QueryParsingErrors.computeStatisticsNotExpectedError(ctx.simpleIdentifier())
     }
     val ns = if (ctx.identifierReference() != null) {
       withIdentClause(ctx.identifierReference, UnresolvedNamespace(_))
     } else {
       CurrentNamespace
     }
-    AnalyzeTables(ns, noScan = ctx.identifier != null)
+    AnalyzeTables(ns, noScan = ctx.simpleIdentifier != null)
   }
 
   /**
@@ -6443,7 +6443,7 @@ class AstBuilder extends DataTypeAstBuilder
    * }}}
    */
   override def visitDropIndex(ctx: DropIndexContext): LogicalPlan = 
withOrigin(ctx) {
-    val indexName = ctx.identifier.getText
+    val indexName = getIdentifierText(ctx.identifier)
     DropIndex(
       createUnresolvedTable(ctx.identifierReference, "DROP INDEX"),
       indexName,
@@ -6665,7 +6665,7 @@ class AstBuilder extends DataTypeAstBuilder
           target = None, excepts = ids.map(s => Seq(s)), replacements = None))
       Project(projectList, left)
     }.getOrElse(Option(ctx.AS).map { _ =>
-      SubqueryAlias(ctx.errorCapturingIdentifier().getText, left)
+      SubqueryAlias(getIdentifierText(ctx.errorCapturingIdentifier()), left)
     }.getOrElse(Option(ctx.whereClause).map { c =>
       if (ctx.windowClause() != null) {
         throw 
QueryParsingErrors.windowClauseInPipeOperatorWhereClauseNotAllowedError(ctx)
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2885d215ee34..be4dcc550a83 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -187,7 +187,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         (ident, _) => builder(ident))
     } else if (ctx.errorCapturingIdentifier() != null) {
       // resolve immediately
-      builder.apply(Seq(ctx.errorCapturingIdentifier().getText))
+      builder.apply(Seq(getIdentifierText(ctx.errorCapturingIdentifier())))
     } else if (ctx.stringLit() != null) {
       // resolve immediately
       builder.apply(Seq(string(visitStringLit(ctx.stringLit()))))
@@ -567,7 +567,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    *  - '/path/to/fileOrJar'
    */
   override def visitManageResource(ctx: ManageResourceContext): LogicalPlan = 
withOrigin(ctx) {
-    val rawArg = remainder(ctx.identifier).trim
+    val rawArg = remainder(ctx.simpleIdentifier).trim
     val maybePaths = strLiteralDef.findAllIn(rawArg).toSeq.map {
       case p if p.startsWith("\"") || p.startsWith("'") => unescapeSQLString(p)
       case p => p
@@ -575,14 +575,14 @@ class SparkSqlAstBuilder extends AstBuilder {
 
     ctx.op.getType match {
       case SqlBaseParser.ADD =>
-        ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
+        ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
           case "files" | "file" => AddFilesCommand(maybePaths)
           case "jars" | "jar" => AddJarsCommand(maybePaths)
           case "archives" | "archive" => AddArchivesCommand(maybePaths)
           case other => operationNotAllowed(s"ADD with resource type 
'$other'", ctx)
         }
       case SqlBaseParser.LIST =>
-        ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
+        ctx.simpleIdentifier.getText.toLowerCase(Locale.ROOT) match {
           case "files" | "file" =>
             if (maybePaths.length > 0) {
               ListFilesCommand(maybePaths)
@@ -785,7 +785,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    */
   override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan = 
withOrigin(ctx) {
     val resources = ctx.resource.asScala.map { resource =>
-      val resourceType = resource.identifier.getText.toLowerCase(Locale.ROOT)
+      val resourceType = 
resource.simpleIdentifier.getText.toLowerCase(Locale.ROOT)
       resourceType match {
         case "jar" | "file" | "archive" =>
           FunctionResource(FunctionResourceType.fromString(resourceType),
@@ -1358,7 +1358,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       } else {
         DescribeColumn(
           relation,
-          
UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(_.getText).toSeq),
+          
UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(getIdentifierText).toSeq),
           isExtended)
       }
     } else {
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
index e53a8153e829..94fff8f58697 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out
@@ -2436,6 +2436,47 @@ DropTable false, false
 +- ResolvedIdentifier V2SessionCatalog(spark_catalog), 
identifier_clause_test_schema.unpivot_test
 
 
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query analysis
+CreateDataSourceTableCommand 
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`, false
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'('",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'('",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), 
identifier_clause_test_schema.describe_col_test
+
+
 -- !query
 SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
 -- !query analysis
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index abc6cc625b6a..e6a406072c48 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -2021,22 +2021,11 @@ Project [map(mykey, 42)[mykey] AS result#x]
 EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS 
IDENTIFIER(:alias) ORDER BY ALL'
   USING 't' AS alias
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-  "sqlState" : "42703",
-  "messageParameters" : {
-    "objectName" : "`t`.`c1`",
-    "proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`, 
`IDENTIFIER('t')`.`c4`"
-  },
-  "queryContext" : [ {
-    "objectType" : "EXECUTE IMMEDIATE",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 31,
-    "fragment" : "IDENTIFIER(:alias '.c1')"
-  } ]
-}
+Sort [c1#x ASC NULLS FIRST], true
++- Project [c1#x]
+   +- SubqueryAlias t
+      +- SubqueryAlias 
spark_catalog.identifier_clause_test_schema.integration_test
+         +- Relation 
spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x,c4#x] csv
 
 
 -- !query
@@ -2182,6 +2171,31 @@ DropTable false, false
 +- ResolvedIdentifier V2SessionCatalog(spark_catalog), 
identifier_clause_test_schema.unpivot_test
 
 
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query analysis
+CreateDataSourceTableCommand 
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`, false
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query analysis
+DescribeColumnCommand 
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`, 
[spark_catalog, identifier_clause_test_schema, describe_col_test, c1], false, 
[info_name#x, info_value#x]
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query analysis
+DescribeColumnCommand 
`spark_catalog`.`identifier_clause_test_schema`.`describe_col_test`, 
[spark_catalog, identifier_clause_test_schema, describe_col_test, c2], false, 
[info_name#x, info_value#x]
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), 
identifier_clause_test_schema.describe_col_test
+
+
 -- !query
 SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql 
b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index bb6c7107062d..d9bafe7cc607 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -367,6 +367,12 @@ SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS 
IDENTIFIER('col_a'), b
 SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS 
IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL;
 DROP TABLE unpivot_test;
 
+-- DESCRIBE column with IDENTIFIER()
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV;
+DESCRIBE describe_col_test IDENTIFIER('c1');
+DESCRIBE describe_col_test IDENTIFIER('c2');
+DROP TABLE describe_col_test;
+
 -- All the following tests fail because they are not about "true" identifiers
 
 -- This should fail - named parameters don't support IDENTIFIER()
diff --git 
a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
index 272d9bce8165..6a99be057010 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out
@@ -2740,6 +2740,54 @@ struct<>
 
 
 
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'('",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'('",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
 -- !query schema
diff --git 
a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out 
b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index b398c07b14e1..0c0473791201 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -2227,24 +2227,11 @@ struct<result:int>
 EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS 
IDENTIFIER(:alias) ORDER BY ALL'
   USING 't' AS alias
 -- !query schema
-struct<>
+struct<c1:int>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-  "sqlState" : "42703",
-  "messageParameters" : {
-    "objectName" : "`t`.`c1`",
-    "proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`, 
`IDENTIFIER('t')`.`c4`"
-  },
-  "queryContext" : [ {
-    "objectType" : "EXECUTE IMMEDIATE",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 31,
-    "fragment" : "IDENTIFIER(:alias '.c1')"
-  } ]
-}
+1
+2
+3
 
 
 -- !query
@@ -2396,6 +2383,42 @@ struct<>
 
 
 
+-- !query
+CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c1')
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name       c1
+data_type      int
+comment        NULL
+
+
+-- !query
+DESCRIBE describe_col_test IDENTIFIER('c2')
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name       c2
+data_type      string
+comment        NULL
+
+
+-- !query
+DROP TABLE describe_col_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
 -- !query schema
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/IdentifierClauseParserSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/IdentifierClauseParserSuite.scala
new file mode 100644
index 000000000000..c2efc43ae576
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/IdentifierClauseParserSuite.scala
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, 
ExpressionWithUnresolvedIdentifier, UnresolvedAttribute, 
UnresolvedExtractValue, UnresolvedFunction, UnresolvedInlineTable, 
UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, LambdaFunction, 
Literal, UnresolvedNamedLambdaVariable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, 
OneRowRelation, Pivot, Project, SubqueryAlias, Unpivot}
+import org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, 
StructType}
+
+class IdentifierClauseParserSuite extends AnalysisTest {
+
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+  import org.apache.spark.sql.catalyst.dsl.plans._
+
+  private def intercept(sqlCommand: String): ParseException = {
+    intercept[ParseException](parsePlan(sqlCommand))
+  }
+
+  test("UNPIVOT column alias with IDENTIFIER()") {
+    comparePlans(
+      parsePlan(
+        "SELECT * FROM unpivot_test UNPIVOT (val FOR col IN " +
+          "(a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b')))"),
+      Unpivot(
+        None,
+        Some(Seq(Seq($"a"), Seq($"b"))),
+        Some(Seq(Some("col_a"), Some("col_b"))),
+        "col",
+        Seq("val"),
+        table("unpivot_test"))
+        .where(coalesce($"val").isNotNull)
+        .select(star())
+    )
+  }
+
+  test("UNPIVOT multi-value column alias with IDENTIFIER()") {
+    comparePlans(
+      parsePlan(
+        "SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN " +
+          "((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS 
IDENTIFIER('cols_bc')))"),
+      Unpivot(
+        None,
+        Some(Seq(Seq($"a", $"b"), Seq($"b", $"c"))),
+        Some(Seq(Some("cols_ab"), Some("cols_bc"))),
+        "col",
+        Seq("v1", "v2"),
+        table("unpivot_test"))
+        .where(coalesce($"v1", $"v2").isNotNull)
+        .select(star())
+    )
+  }
+
+  test("PIVOT column with IDENTIFIER()") {
+    comparePlans(
+      parsePlan(
+        "SELECT * FROM pivot_test PIVOT (SUM(revenue) FOR 
IDENTIFIER('quarter') IN ('Q1', 'Q2'))"),
+      Pivot(
+        None,
+        UnresolvedAttribute.quoted("quarter"),
+        Seq(Literal("Q1"), Literal("Q2")),
+        Seq(UnresolvedFunction("SUM", Seq($"revenue"), isDistinct = false)),
+        table("pivot_test"))
+        .select(star())
+    )
+  }
+
+  test("PIVOT value alias with IDENTIFIER()") {
+    comparePlans(
+      parsePlan(
+        "SELECT * FROM pivot_test PIVOT (SUM(revenue) AS IDENTIFIER('total') 
FOR quarter IN " +
+          "('Q1' AS IDENTIFIER('first_quarter'), 'Q2' AS 
IDENTIFIER('second_quarter')))"),
+      Pivot(
+        None,
+        $"quarter",
+        Seq(
+          Alias(Literal("Q1"), "first_quarter")(),
+          Alias(Literal("Q2"), "second_quarter")()
+        ),
+        Seq(Alias(UnresolvedFunction("SUM", Seq($"revenue"), isDistinct = 
false), "total")()),
+        table("pivot_test"))
+        .select(star())
+    )
+  }
+
+  test("Lambda variable name with IDENTIFIER()") {
+    val lambdaVar = UnresolvedNamedLambdaVariable(Seq("x"))
+    comparePlans(
+      parsePlan("SELECT transform(array(1, 2, 3), IDENTIFIER('x') -> x + 1)"),
+      OneRowRelation()
+        .select(
+          UnresolvedFunction(
+            "transform",
+            Seq(
+              UnresolvedFunction(
+                "array",
+                Seq(Literal(1), Literal(2), Literal(3)),
+                isDistinct = false),
+              LambdaFunction(
+                lambdaVar + Literal(1),
+                Seq(lambdaVar)
+              )
+            ),
+            isDistinct = false
+          )
+        )
+    )
+  }
+
+  test("Struct field names with IDENTIFIER() in CAST") {
+    val structType = StructType(Seq(
+      StructField("field1", IntegerType),
+      StructField("field2", StringType)
+    ))
+    comparePlans(
+      parsePlan(
+        "SELECT CAST(named_struct('field1', 1, 'field2', 'a') AS " +
+          "STRUCT<IDENTIFIER('field1'): INT, IDENTIFIER('field2'): STRING>)"),
+      OneRowRelation()
+        .select(
+          Cast(
+            UnresolvedFunction(
+              "named_struct",
+              Seq(Literal("field1"), Literal(1), Literal("field2"), 
Literal("a")),
+              isDistinct = false),
+            structType
+          )
+        )
+    )
+  }
+
+  test("Struct field access with IDENTIFIER()") {
+    val plan = parsePlan("SELECT IDENTIFIER('data').IDENTIFIER('field1') FROM 
struct_field_test")
+    val resolvedPlan = plan.transformAllExpressions {
+      case e: ExpressionWithUnresolvedIdentifier =>
+        e.exprBuilder(Seq(e.identifierExpr.eval().toString), e.otherExprs)
+    }
+
+    comparePlans(
+      resolvedPlan,
+      table("struct_field_test").select(UnresolvedExtractValue($"data", 
Literal("field1")))
+    )
+  }
+
+  test("Struct field access with multiple IDENTIFIER() parts") {
+    val plan = parsePlan("SELECT 
IDENTIFIER('a').IDENTIFIER('b').IDENTIFIER('c') FROM t")
+    val resolvedPlan = plan.transformAllExpressions {
+      case e: ExpressionWithUnresolvedIdentifier =>
+        e.exprBuilder(Seq(e.identifierExpr.eval().toString), e.otherExprs)
+    }
+
+    comparePlans(
+      resolvedPlan,
+      table("t").select(
+        UnresolvedExtractValue(
+          UnresolvedExtractValue($"a", Literal("b")),
+          Literal("c")
+        )
+      )
+    )
+  }
+
+  test("Partition spec with IDENTIFIER() for partition column name") {
+    val plan = parsePlan(
+      "INSERT INTO partition_spec_test PARTITION (IDENTIFIER('c2') = 'value1') 
VALUES (1)")
+      .asInstanceOf[InsertIntoStatement]
+    val values = EvaluateUnresolvedInlineTable.evaluate(
+      UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal(1)))))
+
+    comparePlans(
+      plan,
+      InsertIntoStatement(
+        plan.table,
+        Map("c2" -> Some("value1")),
+        Nil,
+        values,
+        overwrite = false,
+        ifPartitionNotExists = false
+      )
+    )
+  }
+
+  test("Pipe operator alias with IDENTIFIER()") {
+    val values = EvaluateUnresolvedInlineTable.evaluate(
+      UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Literal(1), Literal(2)))))
+    comparePlans(
+      parsePlan(
+        "SELECT * FROM VALUES(1, 2) AS T(c1, c2) |> AS 
IDENTIFIER('pipe_alias') |> SELECT c1, c2"),
+      Project(
+        Seq($"c1", $"c2"),
+        SubqueryAlias(
+          "pipe_alias",
+          Project(
+            Seq(UnresolvedStar(None)),
+            SubqueryAlias("T", values)
+          )
+        )
+      )
+    )
+  }
+
+  test("Pipe operator alias with IDENTIFIER() - second variant") {
+    val values = EvaluateUnresolvedInlineTable.evaluate(
+      UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Literal(1), Literal(2)))))
+    comparePlans(
+      parsePlan(
+        "SELECT c1, c2 FROM VALUES(1, 2) AS T(c1, c2) |> AS 
IDENTIFIER('my_result') |> SELECT *"),
+      Project(
+        Seq(UnresolvedStar(None)),
+        SubqueryAlias(
+          "my_result",
+          Project(
+            Seq($"c1", $"c2"),
+            SubqueryAlias("T", values)
+          )
+        )
+      )
+    )
+  }
+
+  test("Resource type ADD is a keyword - should fail") {
+    checkError(
+      exception = intercept("ADD IDENTIFIER('file') '/tmp/test.txt'"),
+      condition = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+      parameters = Map("sqlText" -> "ADD IDENTIFIER('file') '/tmp/test.txt'"),
+      context = ExpectedContext(
+        fragment = "ADD IDENTIFIER('file') '/tmp/test.txt'",
+        start = 0,
+        stop = 37
+      )
+    )
+  }
+
+  test("Resource type LIST is a keyword - should fail") {
+    checkError(
+      exception = intercept("LIST IDENTIFIER('files')"),
+      condition = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+      parameters = Map("sqlText" -> "LIST IDENTIFIER('files')"),
+      context = ExpectedContext(
+        fragment = "LIST IDENTIFIER('files')",
+        start = 0,
+        stop = 23
+      )
+    )
+  }
+
+  test("CREATE FUNCTION USING resource type is a keyword - should fail") {
+    checkError(
+      exception = intercept(
+        "CREATE FUNCTION keyword_test_func AS 'com.example.Test' " +
+          "USING IDENTIFIER('jar') '/path/to.jar'"),
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'('", "hint" -> "")
+    )
+  }
+
+  test("ANALYZE TABLE NOSCAN is a keyword - should fail") {
+    checkError(
+      exception = intercept(
+        "ANALYZE TABLE analyze_keyword_test COMPUTE STATISTICS 
IDENTIFIER('noscan')"),
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'('", "hint" -> "")
+    )
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-54652][SQL] Complete conversion of IDENTIFIER()

Reply via email to