This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9353d67f929 [SPARK-43851][SQL] Support LCA in grouping expressions
9353d67f929 is described below
commit 9353d67f9290bae1e7d7e16a2caf5256cc4e2f92
Author: Jia Fan <[email protected]>
AuthorDate: Sat Jul 1 08:48:10 2023 +0300
[SPARK-43851][SQL] Support LCA in grouping expressions
### What changes were proposed in this pull request?
This PR bring support lateral column alias reference in grouping
expressions.
### Why are the changes needed?
add new feature for LCA
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
exist test
Closes #41804 from Hisoka-X/SPARK-43851_LCA_in_group.
Authored-by: Jia Fan <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../src/main/resources/error/error-classes.json | 5 -----
...r-conditions-unsupported-feature-error-class.md | 4 ----
.../analysis/ResolveReferencesInAggregate.scala | 22 ++++++++----------
.../column-resolution-aggregate.sql.out | 26 +++++++++++++---------
.../results/column-resolution-aggregate.sql.out | 16 ++++---------
5 files changed, 29 insertions(+), 44 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index 3cc35d668e0..eabd5533e13 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -2530,11 +2530,6 @@
"Referencing lateral column alias <lca> in the aggregate query both
with window expressions and with having clause. Please rewrite the aggregate
query by removing the having clause or removing lateral alias reference in the
SELECT list."
]
},
- "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : {
- "message" : [
- "Referencing a lateral column alias via GROUP BY alias/ALL is not
supported yet."
- ]
- },
"LATERAL_COLUMN_ALIAS_IN_WINDOW" : {
"message" : [
"Referencing a lateral column alias <lca> in window expression
<windowExpr>."
diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md
b/docs/sql-error-conditions-unsupported-feature-error-class.md
index 64d7eb347e5..78bf301c49d 100644
--- a/docs/sql-error-conditions-unsupported-feature-error-class.md
+++ b/docs/sql-error-conditions-unsupported-feature-error-class.md
@@ -65,10 +65,6 @@ Referencing a lateral column alias `<lca>` in the aggregate
function `<aggFunc>`
Referencing lateral column alias `<lca>` in the aggregate query both with
window expressions and with having clause. Please rewrite the aggregate query
by removing the having clause or removing lateral alias reference in the SELECT
list.
-## LATERAL_COLUMN_ALIAS_IN_GROUP_BY
-
-Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet.
-
## LATERAL_COLUMN_ALIAS_IN_WINDOW
Referencing a lateral column alias `<lca>` in window expression `<windowExpr>`.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
index 09ae87b071f..41bcb337c67 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
@@ -17,9 +17,8 @@
package org.apache.spark.sql.catalyst.analysis
-import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute,
Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute,
Expression, LateralColumnAliasReference, NamedExpression}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AppendColumns,
LogicalPlan}
import
org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE,
UNRESOLVED_ATTRIBUTE}
@@ -74,12 +73,6 @@ object ResolveReferencesInAggregate extends SQLConfHelper
resolvedAggExprsWithOuter,
resolveGroupByAlias(resolvedAggExprsWithOuter,
resolvedGroupExprsNoOuter)
).map(resolveOuterRef)
- // TODO: currently we don't support LCA in `groupingExpressions` yet.
- if (resolved.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE))) {
- throw new AnalysisException(
- errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
- messageParameters = Map.empty)
- }
resolved
} else {
// Do not resolve columns in grouping expressions to outer references
here, as the aggregate
@@ -112,9 +105,11 @@ object ResolveReferencesInAggregate extends SQLConfHelper
assert(selectList.forall(_.resolved))
if (conf.groupByAliases) {
groupExprs.map { g =>
- g.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) {
- case u: UnresolvedAttribute =>
- selectList.find(ne => conf.resolver(ne.name, u.name)).getOrElse(u)
+ g.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE,
+ LATERAL_COLUMN_ALIAS_REFERENCE)) {
+ case u @ (_: UnresolvedAttribute | _: LateralColumnAliasReference) =>
+ selectList.find(ne => conf.resolver(ne.name,
u.asInstanceOf[NamedExpression].name))
+ .getOrElse(u)
}
}
} else {
@@ -133,8 +128,9 @@ object ResolveReferencesInAggregate extends SQLConfHelper
// tell the user in checkAnalysis that we cannot resolve the all in
group by.
groupExprs
} else {
- // This is a valid GROUP BY ALL aggregate.
- expandedGroupExprs.get
+ // This is a valid GROUP BY ALL aggregate, resolve group by alias
again to transform the
+ // LCA reference
+ resolveGroupByAlias(selectList, expandedGroupExprs.get)
}
} else {
groupExprs
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
index eb30443cbae..3dab6c386cb 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
@@ -94,21 +94,27 @@ org.apache.spark.sql.AnalysisException
-- !query
SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col
-- !query analysis
-org.apache.spark.sql.AnalysisException
-{
- "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
- "sqlState" : "0A000"
-}
+Project [lca#x, (lca#x + 1) AS col#x]
++- Project [k#x, k#x AS lca#x]
+ +- Aggregate [k#x, (k#x + 1)], [k#x]
+ +- SubqueryAlias v1
+ +- View (`v1`, [a#x,b#x,k#x])
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x,
cast(k#x as int) AS k#x]
+ +- SubqueryAlias t
+ +- LocalRelation [a#x, b#x, k#x]
-- !query
SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all
-- !query analysis
-org.apache.spark.sql.AnalysisException
-{
- "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
- "sqlState" : "0A000"
-}
+Project [lca#x, (lca#x + 1) AS col#x]
++- Project [k#x, k#x AS lca#x]
+ +- Aggregate [k#x, (k#x + 1)], [k#x]
+ +- SubqueryAlias v1
+ +- View (`v1`, [a#x,b#x,k#x])
+ +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x,
cast(k#x as int) AS k#x]
+ +- SubqueryAlias t
+ +- LocalRelation [a#x, b#x, k#x]
-- !query
diff --git
a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
index e8ab766751c..e0bbcae91f1 100644
---
a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
@@ -91,25 +91,17 @@ org.apache.spark.sql.AnalysisException
-- !query
SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col
-- !query schema
-struct<>
+struct<lca:int,col:int>
-- !query output
-org.apache.spark.sql.AnalysisException
-{
- "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
- "sqlState" : "0A000"
-}
+1 2
-- !query
SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all
-- !query schema
-struct<>
+struct<lca:int,col:int>
-- !query output
-org.apache.spark.sql.AnalysisException
-{
- "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
- "sqlState" : "0A000"
-}
+1 2
-- !query
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]