This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6320b5cdbb1d [SPARK-50779][SQL] Adding feature flag for object level
collations
6320b5cdbb1d is described below
commit 6320b5cdbb1d7058b288f495e10a645c86a8d037
Author: Dejan Krakovic <[email protected]>
AuthorDate: Fri Jan 10 13:19:10 2025 +0800
[SPARK-50779][SQL] Adding feature flag for object level collations
### What changes were proposed in this pull request?
As a follow up from https://github.com/apache/spark/pull/49084 and
associated JIRA issue https://issues.apache.org/jira/browse/SPARK-50675, adding
an internal feature SQL conf flag around object level collations, to be able to
enable/disable the feature.
### Why are the changes needed?
The object level collations feature is still in development phase, and so
it should be disabled by default until it is completed, other than in unit
tests that validate the functionality.
### Does this PR introduce _any_ user-facing change?
Just explicitly throwing a query compilation error that object level
collations are unsupported feature, in case the newly added feature flag is
disabled.
### How was this patch tested?
Dedicated unit tests are added to verify that when the feature flag is
disabled, attempting to set object level collation (on table or view object)
results in query compilation error that the feature is not supported. The
existing unit tests already added as part of previous PR linked above validate
that when the feature flag is enabled, the object level collation could be
properly set (flag is enabled by default in testing environment)
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #49431 from dejankrak-db/conf-flag-object-level-collations.
Authored-by: Dejan Krakovic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 5 +++++
.../apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 +++
.../spark/sql/errors/QueryCompilationErrors.scala | 7 +++++++
.../scala/org/apache/spark/sql/internal/SQLConf.scala | 14 ++++++++++++++
.../spark/sql/errors/QueryCompilationErrorsSuite.scala | 17 +++++++++++++++++
5 files changed, 46 insertions(+)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index f7ca1eae0ef2..e9b32b5f9cbe 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -5381,6 +5381,11 @@
"The target JDBC server hosting table <tableName> does not support
ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual
actions to avoid this error."
]
},
+ "OBJECT_LEVEL_COLLATIONS" : {
+ "message" : [
+ "Default collation for the specified object."
+ ]
+ },
"ORC_TYPE_CAST" : {
"message" : [
"Unable to convert <orcType> of Orc to data type <toType>."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f4f6d2b310f4..5858425f6646 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3875,6 +3875,9 @@ class AstBuilder extends DataTypeAstBuilder
}
override def visitCollationSpec(ctx: CollationSpecContext): String =
withOrigin(ctx) {
+ if (!SQLConf.get.objectLevelCollationsEnabled) {
+ throw QueryCompilationErrors.objectLevelCollationsNotEnabledError()
+ }
val collationName = ctx.identifier.getText
CollationFactory.fetchCollation(collationName).collationName
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 0d5fe7bc1459..afae0565133b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -357,6 +357,13 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
messageParameters = Map.empty)
}
+ def objectLevelCollationsNotEnabledError(): Throwable = {
+ new AnalysisException(
+ errorClass = "UNSUPPORTED_FEATURE.OBJECT_LEVEL_COLLATIONS",
+ messageParameters = Map.empty
+ )
+ }
+
def trimCollationNotEnabledError(): Throwable = {
new AnalysisException(
errorClass = "UNSUPPORTED_FEATURE.TRIM_COLLATION",
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 875e9543c472..6077e55561e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -859,6 +859,18 @@ object SQLConf {
.booleanConf
.createWithDefault(false)
+ lazy val OBJECT_LEVEL_COLLATIONS_ENABLED =
+ buildConf("spark.sql.collation.objectLevel.enabled")
+ .internal()
+ .doc(
+ "Object level collations feature is under development and its use
should be done " +
+ "under this feature flag. The feature allows setting default collation
for all " +
+ "underlying columns within that object, except the ones that were
previously created."
+ )
+ .version("4.0.0")
+ .booleanConf
+ .createWithDefault(Utils.isTesting)
+
lazy val TRIM_COLLATION_ENABLED =
buildConf("spark.sql.collation.trim.enabled")
.internal()
@@ -5770,6 +5782,8 @@ class SQLConf extends Serializable with Logging with
SqlApiConf {
def allowCollationsInMapKeys: Boolean = getConf(ALLOW_COLLATIONS_IN_MAP_KEYS)
+ def objectLevelCollationsEnabled: Boolean =
getConf(OBJECT_LEVEL_COLLATIONS_ENABLED)
+
def trimCollationEnabled: Boolean = getConf(TRIM_COLLATION_ENABLED)
override def defaultStringType: StringType = {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 92c175fe2f94..779b5ba530aa 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -901,6 +901,23 @@ class QueryCompilationErrorsSuite
}
}
+ test("SPARK-50779: the object level collations feature is unsupported when
flag is disabled") {
+ withSQLConf(SQLConf.OBJECT_LEVEL_COLLATIONS_ENABLED.key -> "false") {
+ Seq(
+ "CREATE TABLE t (c STRING) USING parquet DEFAULT COLLATION UNICODE",
+ "REPLACE TABLE t (c STRING) USING parquet DEFAULT COLLATION
UNICODE_CI",
+ "ALTER TABLE t DEFAULT COLLATION sr_CI_AI",
+ "CREATE VIEW v DEFAULT COLLATION UNICODE as SELECT * FROM t",
+ "CREATE TEMPORARY VIEW v DEFAULT COLLATION UTF8_LCASE as SELECT * FROM
t"
+ ).foreach { sqlText =>
+ checkError(
+ exception = intercept[AnalysisException](sql(sqlText)),
+ condition = "UNSUPPORTED_FEATURE.OBJECT_LEVEL_COLLATIONS"
+ )
+ }
+ }
+ }
+
test("UNSUPPORTED_CALL: call the unsupported method update()") {
checkError(
exception = intercept[SparkUnsupportedOperationException] {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]