This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new f4febd077631 [SPARK-51849][SQL] Refactoring 
`ResolveDDLCommandStringTypes`
f4febd077631 is described below

commit f4febd0776319e1b51513ee1760c736928fb6643
Author: ilicmarkodb <marko.i...@databricks.com>
AuthorDate: Tue Apr 22 23:51:32 2025 +0800

    [SPARK-51849][SQL] Refactoring `ResolveDDLCommandStringTypes`
    
    ### What changes were proposed in this pull request?
    `ResolveDDLCommandStringTypes` renamed to 
`ApplyDefaultCollationToStringType`.
    
    ### Why are the changes needed?
    This is needed because this rule applies also to non-DDL plans (when 
querying View).
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50609 from ilicmarkodb/split_resolve_ddl_and_view.
    
    Authored-by: ilicmarkodb <marko.i...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit c86f617b2f1a2813c099f5a98eb5732b9c3f86b2)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  2 +-
 ...ala => ApplyDefaultCollationToStringType.scala} | 80 +++++++++-------------
 2 files changed, 33 insertions(+), 49 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ca84d02430b2..f50bbc79e70d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -396,7 +396,7 @@ class Analyzer(override val catalogManager: CatalogManager) 
extends RuleExecutor
       ResolveAliases ::
       ResolveSubquery ::
       ResolveSubqueryColumnAliases ::
-      ResolveDDLCommandStringTypes ::
+      ApplyDefaultCollationToStringType ::
       ResolveWindowOrder ::
       ResolveWindowFrame ::
       ResolveNaturalAndUsingJoin ::
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala
similarity index 74%
rename from 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
rename to 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala
index 2f7156a80389..cea2988badf4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala
@@ -24,88 +24,72 @@ import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.types.{DataType, StringType}
 
 /**
- * Resolves string types in DDL commands, where the string type inherits the
- * collation from the corresponding object (table/view -> schema -> catalog).
+ * Resolves string types in logical plans by assigning them the appropriate 
collation. The
+ * collation is inherited from the relevant object in the hierarchy (e.g., 
table/view -> schema ->
+ * catalog). This rule is primarily applied to DDL commands, but it can also 
be triggered in other
+ * scenarios. For example, when querying a view, its query is re-resolved each 
time, and that query
+ * can take various forms.
  */
-object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
+object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    if (isDDLCommand(plan)) {
-      transformDDL(plan)
-    } else {
-      // For non-DDL commands no need to do any further resolution of string 
types
-      plan
+    fetchDefaultCollation(plan) match {
+      case Some(collation) =>
+        transform(plan, StringType(collation))
+      case None => plan
     }
   }
 
-  /** Default collation used, if object level collation is not provided */
-  private def defaultCollation: String = "UTF8_BINARY"
-
-  /** Returns the string type that should be used in a given DDL command */
-  private def stringTypeForDDLCommand(table: LogicalPlan): StringType = {
-    table match {
-      case createTable: CreateTable if 
createTable.tableSpec.collation.isDefined =>
-        StringType(createTable.tableSpec.collation.get)
+  /** Returns the default collation that should be applied to the plan
+   * if specified; otherwise, returns None.
+   */
+  private def fetchDefaultCollation(plan: LogicalPlan): Option[String] = {
+    plan match {
+      case createTable: CreateTable =>
+        createTable.tableSpec.collation
 
       // CreateView also handles CREATE OR REPLACE VIEW
       // Unlike for tables, CreateView also handles CREATE OR REPLACE VIEW
-      case createView: CreateView if createView.collation.isDefined =>
-        StringType(createView.collation.get)
+      case createView: CreateView =>
+        createView.collation
 
-      case replaceTable: ReplaceTable if 
replaceTable.tableSpec.collation.isDefined =>
-        StringType(replaceTable.tableSpec.collation.get)
+      case replaceTable: ReplaceTable =>
+        replaceTable.tableSpec.collation
 
       case alterTable: AlterTableCommand if alterTable.table.resolved =>
         alterTable.table match {
-          case resolvedTbl: ResolvedTable =>
-            val collation = resolvedTbl.table.properties.getOrDefault(
-              TableCatalog.PROP_COLLATION, defaultCollation)
-            StringType(collation)
-
-          case _ =>
-            // As a safeguard, use the default collation for unknown cases.
-            StringType(defaultCollation)
+          case resolvedTbl: ResolvedTable
+            if 
resolvedTbl.table.properties.containsKey(TableCatalog.PROP_COLLATION ) =>
+              
Some(resolvedTbl.table.properties.get(TableCatalog.PROP_COLLATION))
+          case _ => None
         }
 
       case alterViewAs: AlterViewAs =>
         alterViewAs.child match {
           case resolvedPersistentView: ResolvedPersistentView =>
-            val collation = 
resolvedPersistentView.metadata.collation.getOrElse(defaultCollation)
-            StringType(collation)
+            resolvedPersistentView.metadata.collation
           case resolvedTempView: ResolvedTempView =>
-            val collation = 
resolvedTempView.metadata.collation.getOrElse(defaultCollation)
-            StringType(collation)
-          case _ =>
-            // As a safeguard, use the default collation for unknown cases.
-            StringType(defaultCollation)
+            resolvedTempView.metadata.collation
+          case _ => None
         }
 
       // Check if view has default collation
       case _ if AnalysisContext.get.collation.isDefined =>
-        StringType(AnalysisContext.get.collation.get)
+        AnalysisContext.get.collation
 
-      case _ => StringType(defaultCollation)
+      case _ => None
     }
   }
 
-  private def isDDLCommand(plan: LogicalPlan): Boolean = plan exists {
-    case _: AddColumns | _: ReplaceColumns | _: AlterColumns => true
-    case _ => isCreateOrAlterPlan(plan)
-  }
-
   private def isCreateOrAlterPlan(plan: LogicalPlan): Boolean = plan match {
     // For CREATE TABLE, only v2 CREATE TABLE command is supported.
     // Also, table DEFAULT COLLATION cannot be specified through CREATE TABLE 
AS SELECT command.
     case _: V2CreateTablePlan | _: ReplaceTable | _: CreateView | _: 
AlterViewAs => true
-    // Check if view has default collation
-    case _ if AnalysisContext.get.collation.isDefined => true
     case _ => false
   }
 
-  private def transformDDL(plan: LogicalPlan): LogicalPlan = {
-    val newType = stringTypeForDDLCommand(plan)
-
+  private def transform(plan: LogicalPlan, newType: StringType): LogicalPlan = 
{
     plan resolveOperators {
-      case p if isCreateOrAlterPlan(p) =>
+      case p if isCreateOrAlterPlan(p) || 
AnalysisContext.get.collation.isDefined =>
         transformPlan(p, newType)
 
       case addCols: AddColumns =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to