This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new f4febd077631 [SPARK-51849][SQL] Refactoring `ResolveDDLCommandStringTypes` f4febd077631 is described below commit f4febd0776319e1b51513ee1760c736928fb6643 Author: ilicmarkodb <marko.i...@databricks.com> AuthorDate: Tue Apr 22 23:51:32 2025 +0800 [SPARK-51849][SQL] Refactoring `ResolveDDLCommandStringTypes` ### What changes were proposed in this pull request? `ResolveDDLCommandStringTypes` renamed to `ApplyDefaultCollationToStringType`. ### Why are the changes needed? This is needed because this rule applies also to non-DDL plans (when querying View). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50609 from ilicmarkodb/split_resolve_ddl_and_view. Authored-by: ilicmarkodb <marko.i...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit c86f617b2f1a2813c099f5a98eb5732b9c3f86b2) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- ...ala => ApplyDefaultCollationToStringType.scala} | 80 +++++++++------------- 2 files changed, 33 insertions(+), 49 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ca84d02430b2..f50bbc79e70d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -396,7 +396,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor ResolveAliases :: ResolveSubquery :: ResolveSubqueryColumnAliases :: - ResolveDDLCommandStringTypes :: + ApplyDefaultCollationToStringType :: ResolveWindowOrder :: ResolveWindowFrame :: ResolveNaturalAndUsingJoin :: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala similarity index 74% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala index 2f7156a80389..cea2988badf4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala @@ -24,88 +24,72 @@ import org.apache.spark.sql.connector.catalog.TableCatalog import org.apache.spark.sql.types.{DataType, StringType} /** - * Resolves string types in DDL commands, where the string type inherits the - * collation from the corresponding object (table/view -> schema -> catalog). + * Resolves string types in logical plans by assigning them the appropriate collation. The + * collation is inherited from the relevant object in the hierarchy (e.g., table/view -> schema -> + * catalog). This rule is primarily applied to DDL commands, but it can also be triggered in other + * scenarios. For example, when querying a view, its query is re-resolved each time, and that query + * can take various forms. */ -object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] { +object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - if (isDDLCommand(plan)) { - transformDDL(plan) - } else { - // For non-DDL commands no need to do any further resolution of string types - plan + fetchDefaultCollation(plan) match { + case Some(collation) => + transform(plan, StringType(collation)) + case None => plan } } - /** Default collation used, if object level collation is not provided */ - private def defaultCollation: String = "UTF8_BINARY" - - /** Returns the string type that should be used in a given DDL command */ - private def stringTypeForDDLCommand(table: LogicalPlan): StringType = { - table match { - case createTable: CreateTable if createTable.tableSpec.collation.isDefined => - StringType(createTable.tableSpec.collation.get) + /** Returns the default collation that should be applied to the plan + * if specified; otherwise, returns None. + */ + private def fetchDefaultCollation(plan: LogicalPlan): Option[String] = { + plan match { + case createTable: CreateTable => + createTable.tableSpec.collation // CreateView also handles CREATE OR REPLACE VIEW // Unlike for tables, CreateView also handles CREATE OR REPLACE VIEW - case createView: CreateView if createView.collation.isDefined => - StringType(createView.collation.get) + case createView: CreateView => + createView.collation - case replaceTable: ReplaceTable if replaceTable.tableSpec.collation.isDefined => - StringType(replaceTable.tableSpec.collation.get) + case replaceTable: ReplaceTable => + replaceTable.tableSpec.collation case alterTable: AlterTableCommand if alterTable.table.resolved => alterTable.table match { - case resolvedTbl: ResolvedTable => - val collation = resolvedTbl.table.properties.getOrDefault( - TableCatalog.PROP_COLLATION, defaultCollation) - StringType(collation) - - case _ => - // As a safeguard, use the default collation for unknown cases. - StringType(defaultCollation) + case resolvedTbl: ResolvedTable + if resolvedTbl.table.properties.containsKey(TableCatalog.PROP_COLLATION ) => + Some(resolvedTbl.table.properties.get(TableCatalog.PROP_COLLATION)) + case _ => None } case alterViewAs: AlterViewAs => alterViewAs.child match { case resolvedPersistentView: ResolvedPersistentView => - val collation = resolvedPersistentView.metadata.collation.getOrElse(defaultCollation) - StringType(collation) + resolvedPersistentView.metadata.collation case resolvedTempView: ResolvedTempView => - val collation = resolvedTempView.metadata.collation.getOrElse(defaultCollation) - StringType(collation) - case _ => - // As a safeguard, use the default collation for unknown cases. - StringType(defaultCollation) + resolvedTempView.metadata.collation + case _ => None } // Check if view has default collation case _ if AnalysisContext.get.collation.isDefined => - StringType(AnalysisContext.get.collation.get) + AnalysisContext.get.collation - case _ => StringType(defaultCollation) + case _ => None } } - private def isDDLCommand(plan: LogicalPlan): Boolean = plan exists { - case _: AddColumns | _: ReplaceColumns | _: AlterColumns => true - case _ => isCreateOrAlterPlan(plan) - } - private def isCreateOrAlterPlan(plan: LogicalPlan): Boolean = plan match { // For CREATE TABLE, only v2 CREATE TABLE command is supported. // Also, table DEFAULT COLLATION cannot be specified through CREATE TABLE AS SELECT command. case _: V2CreateTablePlan | _: ReplaceTable | _: CreateView | _: AlterViewAs => true - // Check if view has default collation - case _ if AnalysisContext.get.collation.isDefined => true case _ => false } - private def transformDDL(plan: LogicalPlan): LogicalPlan = { - val newType = stringTypeForDDLCommand(plan) - + private def transform(plan: LogicalPlan, newType: StringType): LogicalPlan = { plan resolveOperators { - case p if isCreateOrAlterPlan(p) => + case p if isCreateOrAlterPlan(p) || AnalysisContext.get.collation.isDefined => transformPlan(p, newType) case addCols: AddColumns => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org