This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 222385da5481 [SPARK-51645][SQL] Fix `CREATE OR REPLACE TABLE ...
DEFAULT COLLATION ...` query
222385da5481 is described below
commit 222385da5481f63d54f1594a6426bf87ec08ed71
Author: ilicmarkodb <[email protected]>
AuthorDate: Sun Mar 30 20:39:43 2025 +0800
[SPARK-51645][SQL] Fix `CREATE OR REPLACE TABLE ... DEFAULT COLLATION ...`
query
### What changes were proposed in this pull request?
Fixed `CREATE OR REPLACE TABLE ... DEFAULT COLLATION ...`. Problem was that
ReplaceTable was not applicable to ResolveDDLCommandStringTypes rule, so
default collation was not propagated to columns.
### Why are the changes needed?
Bug fix.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Tests added to `DefaultCollationTestSuite`.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #50435 from
ilicmarkodb/fix_create_or_replace_table_with_default_collation.
Authored-by: ilicmarkodb <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit c9733e0460a943d6feef7a6f5280f49f55787aaa)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../analysis/ResolveDDLCommandStringTypes.scala | 9 +++--
.../sql/collation/DefaultCollationTestSuite.scala | 41 ++++++++++++++++++++++
2 files changed, 48 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
index 9ac04236a1b1..2945bab57ad3 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.expressions.{Cast,
DefaultStringProducingExpression, Expression, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumns,
AlterColumnSpec, AlterTableCommand, AlterViewAs, ColumnDefinition, CreateTable,
CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, V2CreateTablePlan}
+import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumns,
AlterColumnSpec, AlterTableCommand, AlterViewAs, ColumnDefinition, CreateTable,
CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, ReplaceTable,
V2CreateTablePlan}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.connector.catalog.TableCatalog
import org.apache.spark.sql.types.{DataType, StringType}
@@ -46,9 +46,14 @@ object ResolveDDLCommandStringTypes extends
Rule[LogicalPlan] {
case createTable: CreateTable if
createTable.tableSpec.collation.isDefined =>
StringType(createTable.tableSpec.collation.get)
+ // CreateView also handles CREATE OR REPLACE VIEW
+ // Unlike for tables, CreateView also handles CREATE OR REPLACE VIEW
case createView: CreateView if createView.collation.isDefined =>
StringType(createView.collation.get)
+ case replaceTable: ReplaceTable if
replaceTable.tableSpec.collation.isDefined =>
+ StringType(replaceTable.tableSpec.collation.get)
+
case alterTable: AlterTableCommand if alterTable.table.resolved =>
alterTable.table match {
case resolvedTbl: ResolvedTable =>
@@ -73,7 +78,7 @@ object ResolveDDLCommandStringTypes extends Rule[LogicalPlan]
{
private def isCreateOrAlterPlan(plan: LogicalPlan): Boolean = plan match {
// For CREATE TABLE, only v2 CREATE TABLE command is supported.
// Also, table DEFAULT COLLATION cannot be specified through CREATE TABLE
AS SELECT command.
- case _: V2CreateTablePlan | _: CreateView | _: AlterViewAs => true
+ case _: V2CreateTablePlan | _: ReplaceTable | _: CreateView | _:
AlterViewAs => true
case _ => false
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
index 9679ecb55438..1c41fe6f9dd1 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
@@ -388,6 +388,29 @@ class DefaultCollationTestSuiteV1 extends
DefaultCollationTestSuite {
assertTableColumnCollation(testTable, "c4", "UTF8_BINARY")
}
}
+
+ test("CREATE OR REPLACE VIEW with DEFAULT COLLATION") {
+ withTable(testTable) {
+ sql(s"CREATE TABLE $testTable (c1 STRING, c2 STRING COLLATE UTF8_LCASE)")
+ sql(s"INSERT INTO $testTable VALUES ('a', 'a'), ('A', 'A'), ('b', 'b')")
+ withView(testView) {
+ // scalastyle:off
+ sql(
+ s"""CREATE OR REPLACE VIEW $testView
+ | DEFAULT COLLATION sr_ci_ai
+ | AS SELECT *, 'ć' AS c3 FROM $testTable
+ |""".stripMargin)
+ val prefix = "SYSTEM.BUILTIN"
+ checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $testView"),
Row(s"$prefix.UTF8_BINARY"))
+ checkAnswer(sql(s"SELECT DISTINCT COLLATION(c2) FROM $testView"),
Row(s"$prefix.UTF8_LCASE"))
+ checkAnswer(sql(s"SELECT DISTINCT COLLATION(c3) FROM $testView"),
Row(s"$prefix.sr_CI_AI"))
+ checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"),
Row(1))
+ checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c2 = 'a'"),
Row(2))
+ checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c3 = 'Č'"),
Row(3))
+ // scalastyle:on
+ }
+ }
+ }
}
class DefaultCollationTestSuiteV2 extends DefaultCollationTestSuite with
DatasourceV2SQLBase {
@@ -409,4 +432,22 @@ class DefaultCollationTestSuiteV2 extends
DefaultCollationTestSuite with Datasou
checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"),
Seq(Row(0)))
}
}
+
+ test("CREATE OR REPLACE TABLE with DEFAULT COLLATION") {
+ withTable(testTable) {
+ sql(
+ s"""CREATE OR REPLACE TABLE $testTable
+ | (c1 STRING, c2 STRING COLLATE UTF8_LCASE)
+ | DEFAULT COLLATION sr_ai
+ |""".stripMargin)
+ // scalastyle:off
+ sql(s"INSERT INTO $testTable VALUES ('Ć', 'a'), ('Č', 'A'), ('C', 'b')")
+ checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c1 = 'Ć'"),
Row(3))
+ // scalastyle:on
+ checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2 = 'a'"),
Row(2))
+ val prefix = "SYSTEM.BUILTIN"
+ checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $testTable"),
Row(s"$prefix.sr_AI"))
+ checkAnswer(sql(s"SELECT DISTINCT COLLATION(c2) FROM $testTable"),
Row(s"$prefix.UTF8_LCASE"))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]