This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 8914bc00d7ae [SPARK-51616][SQL] Run CollationTypeCasts before
ResolveAliases and ResolveAggregateFunctions
8914bc00d7ae is described below
commit 8914bc00d7aec26767371aa0383f5539eaa25ad6
Author: Vladimir Golubev <[email protected]>
AuthorDate: Fri Mar 28 23:29:36 2025 +0800
[SPARK-51616][SQL] Run CollationTypeCasts before ResolveAliases and
ResolveAggregateFunctions
Run `CollationTypeCasts` before `ResolveAliases` and
`ResolveAggregateFunctions` to get correct alias names to collated expression
trees.
This is a better (alternative) solution for
https://github.com/apache/spark/pull/50192.
`ReassignAliasNamesWithCollations` changes `Alias` names only cosmetically.
Name resolution is still done in the main Analyzer batch based on the old
names. For example, users would still be able to reference expressions using
old alias names without `collate` information.
A better solution would be to run `CollationTypeCasts` manually In the
rules that generate `Alias`es.
No, just a different (better) way to do the same thing.
New golden file tests.
No.
Closes #50410 from
vladimirg-db/vladimir-golubev_data/run-collation-type-coercion-before-regenerating-aliases.
Authored-by: Vladimir Golubev <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit c9fbcb1c74cedda9400d9730d60428056ac0ef75)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 27 +-
.../ReassignAliasNamesWithCollations.scala | 78 ------
.../sql/catalyst/rules/RuleIdCollection.scala | 1 +
.../org/apache/spark/sql/internal/SQLConf.scala | 12 +
.../sql-tests/analyzer-results/collations.sql.out | 203 +++++++++++++++
.../test/resources/sql-tests/inputs/collations.sql | 77 ++++++
.../resources/sql-tests/results/collations.sql.out | 273 +++++++++++++++++++++
7 files changed, 589 insertions(+), 82 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a2f42158dc45..fa6a6a90005a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -455,8 +455,6 @@ class Analyzer(override val catalogManager: CatalogManager)
extends RuleExecutor
RewriteMergeIntoTable),
Batch("Subquery", Once,
UpdateOuterReferences),
- Batch("ReassignAliasNamesWithCollations", Once,
- ReassignAliasNamesWithCollations),
Batch("Cleanup", fixedPoint,
CleanupAliases),
Batch("HandleSpecialCommand", Once,
@@ -526,7 +524,17 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
* Replaces [[UnresolvedAlias]]s with concrete aliases.
*/
object ResolveAliases extends Rule[LogicalPlan] {
- def apply(plan: LogicalPlan): LogicalPlan =
+ def apply(plan: LogicalPlan): LogicalPlan = {
+ val collatedPlan =
+ if
(conf.getConf(SQLConf.RUN_COLLATION_TYPE_CASTS_BEFORE_ALIAS_ASSIGNMENT)) {
+ CollationTypeCasts(plan)
+ } else {
+ plan
+ }
+ doApply(collatedPlan)
+ }
+
+ private def doApply(plan: LogicalPlan): LogicalPlan = {
plan.resolveOperatorsUpWithPruning(_.containsPattern(UNRESOLVED_ALIAS),
ruleId) {
case Aggregate(groups, aggs, child, _)
if child.resolved && AliasResolution.hasUnresolvedAlias(aggs) =>
@@ -562,6 +570,7 @@ class Analyzer(override val catalogManager: CatalogManager)
extends RuleExecutor
if c.child.resolved &&
AliasResolution.hasUnresolvedAlias(c.metrics) =>
c.copy(metrics = AliasResolution.assignAliases(c.metrics))
}
+ }
}
object ResolveGroupingAnalytics extends Rule[LogicalPlan] {
@@ -2785,7 +2794,17 @@ class Analyzer(override val catalogManager:
CatalogManager) extends RuleExecutor
* and group by expressions from them.
*/
object ResolveAggregateFunctions extends Rule[LogicalPlan] {
- def apply(plan: LogicalPlan): LogicalPlan =
plan.resolveOperatorsUpWithPruning(
+ def apply(plan: LogicalPlan): LogicalPlan = {
+ val collatedPlan =
+ if
(conf.getConf(SQLConf.RUN_COLLATION_TYPE_CASTS_BEFORE_ALIAS_ASSIGNMENT)) {
+ CollationTypeCasts(plan)
+ } else {
+ plan
+ }
+ doApply(collatedPlan)
+ }
+
+ def doApply(plan: LogicalPlan): LogicalPlan =
plan.resolveOperatorsUpWithPruning(
_.containsPattern(AGGREGATE), ruleId) {
case UnresolvedHaving(cond, agg: Aggregate) if agg.resolved &&
cond.resolved =>
resolveOperatorWithAggregate(Seq(cond), agg, (newExprs, newChild) => {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ReassignAliasNamesWithCollations.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ReassignAliasNamesWithCollations.scala
deleted file mode 100644
index b9d7d7a24d48..000000000000
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ReassignAliasNamesWithCollations.scala
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Expression,
Literal}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.ALIAS
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, AUTO_GENERATED_ALIAS}
-import org.apache.spark.sql.types.StringType
-
-/**
- * Reassign [[Alias]] names for expression trees with collations. We need this
rule because
- * [[AliasResolution]] cannot easily detect if the expression nodes are
properly casted to collated
- * types or not, and sometimes assigns alias names before
[[CollationTypeCoercion]] is run.
- *
- * For example, if we didn't have this rule:
- *
- * {{{
- * -- The output alias name is "(collate('a', UTF8_LCASE) < 'A' collate
UTF8_LCASE)"
- * SELECT 'a' COLLATE UTF8_LCASE < 'A';
- *
- * -- The output alias name is "concat_ws(a, col1, col1)"
- * SELECT CONCAT_WS('a', col1, col1) FROM VALUES ('a' COLLATE UTF8_LCASE);
- * }}}
- *
- * In the second case literal 'a' does not have "collate" information after it
because
- * [[ResolveAliases]] runs before [[AnsiCombinedTypeCoercionRule]].
- */
-object ReassignAliasNamesWithCollations extends Rule[LogicalPlan] {
- override def apply(plan: LogicalPlan): LogicalPlan = {
- plan.resolveExpressionsWithPruning(_.containsPattern(ALIAS)) {
- case a: Alias
- if a.resolved &&
- a.metadata.contains(AUTO_GENERATED_ALIAS) &&
- hasNonDefaultCollationInTheSubtree(a.child) =>
- val newName = toPrettySQL(a.child)
- if (newName != a.name) {
- a.withName(newName)
- } else {
- a
- }
- }
- }
-
- /**
- * Detect if we have a non-default collation in the subtree under [[Alias]].
We only need to check
- * [[Cast]] and [[Literal]], because only those expressions are affected by
- * [[CollationTypeCoercion]].
- */
- private def hasNonDefaultCollationInTheSubtree(rootExpression: Expression) =
{
- rootExpression.exists { expression =>
- expression match {
- case _: Cast | _: Literal =>
- expression.dataType match {
- case stringType: StringType => !stringType.isUTF8BinaryCollation
- case _ => false
- }
- case _ => false
- }
- }
- }
-}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index ee5245054bcc..8cf13168466f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -82,6 +82,7 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.analysis.Analyzer$WrapLateralColumnAliasReference"
::
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$AnsiCombinedTypeCoercionRule"
::
"org.apache.spark.sql.catalyst.analysis.ApplyCharTypePadding" ::
+ "org.apache.spark.sql.catalyst.analysis.CollationTypeCasts" ::
"org.apache.spark.sql.catalyst.analysis.DeduplicateRelations" ::
"org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases" ::
"org.apache.spark.sql.catalyst.analysis.EliminateUnions" ::
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index aabe40005fc2..97b36385395d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -5554,6 +5554,18 @@ object SQLConf {
.booleanConf
.createWithDefault(false)
+ val RUN_COLLATION_TYPE_CASTS_BEFORE_ALIAS_ASSIGNMENT =
+ buildConf("spark.sql.runCollationTypeCastsBeforeAliasAssignment.enabled")
+ .internal()
+ .doc(
+ "When set to true, rules like ResolveAliases or
ResolveAggregateFunctions will run " +
+ "CollationTypeCasts before alias assignment. This is necessary for
correct alias " +
+ "generation."
+ )
+ .version("4.0.0")
+ .booleanConf
+ .createWithDefault(true)
+
/**
* Holds information about keys that have been deprecated.
*
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
index e9287bca8b80..957bcabb078f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
@@ -3058,6 +3058,209 @@ Project [rtrim(cast(utf8_binary#x as string collate
UTF8_LCASE), Some(collate(AB
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+-- !query
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+-- !query analysis
+Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' ' collate
UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+ +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query analysis
+Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias __auto_generated_subquery_name
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x]
parquet
+
+
+-- !query
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query analysis
+Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias __auto_generated_subquery_name
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x]
parquet
+
+
+-- !query
+select subq1.* from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+) AS subq1
+-- !query analysis
+Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias subq1
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x]
parquet
+
+
+-- !query
+with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+select * from cte
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias cte
+: +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+: +- SubqueryAlias spark_catalog.default.t5
+: +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
++- Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias cte
+ +- CTERelationRef xxxx, true, [concat_ws(' ' collate UTF8_LCASE,
utf8_lcase, utf8_lcase)#x], false, false
+
+
+-- !query
+select * from values (1) where exists (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query analysis
+Project [col1#x]
++- Filter exists#x []
+ : +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ : +- SubqueryAlias spark_catalog.default.t5
+ : +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+ +- LocalRelation [col1#x]
+
+
+-- !query
+select (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+)
+-- !query analysis
+Project [scalar-subquery#x [] AS scalarsubquery()#x]
+: +- GlobalLimit 1
+: +- LocalLimit 1
+: +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+: +- SubqueryAlias spark_catalog.default.t5
+: +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
++- OneRowRelation
+
+
+-- !query
+select (
+ with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ )
+ select * from cte limit 1
+)
+-- !query analysis
+Project [scalar-subquery#x [] AS scalarsubquery()#x]
+: +- WithCTE
+: :- CTERelationDef xxxx, false
+: : +- SubqueryAlias cte
+: : +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS
concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+: : +- SubqueryAlias spark_catalog.default.t5
+: : +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+: +- GlobalLimit 1
+: +- LocalLimit 1
+: +- Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x]
+: +- SubqueryAlias cte
+: +- CTERelationRef xxxx, true, [concat_ws(' ' collate
UTF8_LCASE, utf8_lcase, utf8_lcase)#x], false, false
++- OneRowRelation
+
+
+-- !query
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+)
+where (
+ `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` == 'aaa'
+)
+-- !query analysis
+Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- Filter (concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x = aaa)
+ +- SubqueryAlias __auto_generated_subquery_name
+ +- GlobalLimit 1
+ +- LocalLimit 1
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS
concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from
(
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by 1
+)
+-- !query analysis
+Project [lower(concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x) AS
lower(concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias __auto_generated_subquery_name
+ +- Sort [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x ASC
NULLS FIRST], true
+ +- Aggregate [concat_ws( , utf8_lcase#x, utf8_lcase#x)], [concat_ws( ,
utf8_lcase#x, utf8_lcase#x) AS concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from
(
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by max(concat_ws(' ', utf8_lcase, utf8_lcase))
+)
+-- !query analysis
+Project [lower(concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x) AS
lower(concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias __auto_generated_subquery_name
+ +- Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- Sort [max(concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase))#x ASC NULLS FIRST], true
+ +- Aggregate [concat_ws( , utf8_lcase#x, utf8_lcase#x)], [concat_ws(
, utf8_lcase#x, utf8_lcase#x) AS concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x, max(concat_ws( , utf8_lcase#x, utf8_lcase#x)) AS max(concat_ws('
' collate UTF8_LCASE, utf8_lcase, utf8_lcase))#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+create temporary view v1 as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query analysis
+CreateViewCommand `v1`, (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+), false, false, LocalTempView, UNSUPPORTED, true
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x]
parquet
+
+
+-- !query
+select * from v1
+-- !query analysis
+Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias v1
+ +- View (`v1`, [concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x])
+ +- Project [cast(concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x as string collate UTF8_LCASE) AS concat_ws(' ' collate
UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from v1
+-- !query analysis
+Project [concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias v1
+ +- View (`v1`, [concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x])
+ +- Project [cast(concat_ws(' ' collate UTF8_LCASE, utf8_lcase,
utf8_lcase)#x as string collate UTF8_LCASE) AS concat_ws(' ' collate
UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws(' '
collate UTF8_LCASE, utf8_lcase, utf8_lcase)#x]
+ +- SubqueryAlias spark_catalog.default.t5
+ +- Relation
spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+drop view v1
+-- !query analysis
+DropTempViewCommand v1
+
+
-- !query
drop table t5
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/collations.sql
b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
index df15adf2f8fe..17815ed5dde6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/collations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
@@ -442,6 +442,83 @@ select RTRIM(utf8_binary collate utf8_binary_rtrim,
utf8_lcase collate utf8_bina
select RTRIM('ABc', utf8_binary), RTRIM('ABc', utf8_lcase) from t5;
select RTRIM('ABc' collate utf8_lcase, utf8_binary), RTRIM('AAa' collate
utf8_binary, utf8_lcase) from t5;
+-- Implicit aliases to collated expression trees are correctly generated
+
+-- Simple select
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5;
+
+-- Select by implicit alias
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+-- Select by star
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+-- Select by qualified star
+select subq1.* from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+) AS subq1;
+
+-- Implicit alias in CTE output
+with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+select * from cte;
+
+-- Implicit alias in EXISTS subquery output
+select * from values (1) where exists (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+-- Implicit alias in scalar subquery output
+select (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+);
+
+-- Scalar subquery with CTE with implicit alias
+select (
+ with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ )
+ select * from cte limit 1
+);
+
+-- Outer reference to implicit alias
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+)
+where (
+ `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` == 'aaa'
+);
+
+-- Implicit alias reference in Sort
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from
(
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by 1
+);
+
+-- Implciit alias from aggregate in Sort
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from
(
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by max(concat_ws(' ', utf8_lcase, utf8_lcase))
+);
+
+-- Implicit alias in view schema
+create temporary view v1 as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+);
+
+select * from v1;
+
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from v1;
+
+drop view v1;
+
drop table t5;
drop table t6;
drop table t7;
diff --git a/sql/core/src/test/resources/sql-tests/results/collations.sql.out
b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
index 5dbc9195ba8b..2055ec0c2c80 100644
--- a/sql/core/src/test/resources/sql-tests/results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
@@ -5513,6 +5513,279 @@ kitten sitTing
İo İo
+-- !query
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+select subq1.* from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+) AS subq1
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+select * from cte
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+select * from values (1) where exists (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query schema
+struct<col1:int>
+-- !query output
+1
+
+
+-- !query
+select (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+)
+-- !query schema
+struct<scalarsubquery():string collate UTF8_LCASE>
+-- !query output
+Something else. Nothing here. Something else. Nothing here.
+
+
+-- !query
+select (
+ with cte as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ )
+ select * from cte limit 1
+)
+-- !query schema
+struct<scalarsubquery():string collate UTF8_LCASE>
+-- !query output
+Something else. Nothing here. Something else. Nothing here.
+
+
+-- !query
+select * from (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5 limit 1
+)
+where (
+ `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` == 'aaa'
+)
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+
+
+
+-- !query
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from
(
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by 1
+)
+-- !query schema
+struct<lower(concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)):string
collate UTF8_LCASE>
+-- !query output
+a a
+aaaaaaaa aaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaa
+abc abc
+abcdcba abcdcba
+efd2 efd2
+hello, world! nice day. hello, world! nice day.
+i̇o i̇o
+i̇o i̇o
+sitting sitting
+something else. nothing here. something else. nothing here.
+sql sql
+
+
+-- !query
+select lower(`concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)`) from
(
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+ group by 1
+ order by max(concat_ws(' ', utf8_lcase, utf8_lcase))
+)
+-- !query schema
+struct<lower(concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)):string
collate UTF8_LCASE>
+-- !query output
+a a
+aaaaaaaa aaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaa
+abc abc
+abcdcba abcdcba
+efd2 efd2
+hello, world! nice day. hello, world! nice day.
+i̇o i̇o
+i̇o i̇o
+sitting sitting
+something else. nothing here. something else. nothing here.
+sql sql
+
+
+-- !query
+create temporary view v1 as (
+ select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select * from v1
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+select `concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase)` from v1
+-- !query schema
+struct<concat_ws(' ' collate UTF8_LCASE, utf8_lcase, utf8_lcase):string
collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo İo
+İo İo
+İo İo
+
+
+-- !query
+drop view v1
+-- !query schema
+struct<>
+-- !query output
+
+
+
-- !query
drop table t5
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]