This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 46ac78ea367c [SPARK-53734][SQL] Prefer table column over LCA when
resolving array index
46ac78ea367c is described below
commit 46ac78ea367cfa9a7acc04482770aaca33f5a575
Author: Mihailo Timotic <[email protected]>
AuthorDate: Tue Sep 30 13:23:45 2025 +0800
[SPARK-53734][SQL] Prefer table column over LCA when resolving array index
### What changes were proposed in this pull request?
Prefer table column over LCA when resolving array index.
### Why are the changes needed?
For a query like:
```
SELECT 1 AS col1, col2[col1] FROM VALUES(0, ARRAY(1,2));
```
the output should be (1,1), but current Spark implementation outputs (1,2).
This is because `[col1]` is resolved as an LCA instead of being resolved to a
column. This is because we never actually resolve `field` of
`UnresolvedExtractValue` in `innerResolve`, so the resolution of `field` fails
over to the next item in precedence chain, which is LCA.
### Does this PR introduce _any_ user-facing change?
Yes, user now sees the correct result for the impacted query shape
### How was this patch tested?
Added test case for the impacted query.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #52472 from
mihailotim-db/mihailo-timotic_data/array_index_lca_correctness.
Authored-by: Mihailo Timotic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/analysis/ColumnResolutionHelper.scala | 11 ++++++++---
.../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++
.../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 +++++++++++
3 files changed, 29 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
index 3224ccafafec..0502f7f67078 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -167,12 +167,17 @@ trait ColumnResolutionHelper extends Logging with
DataTypeErrorsBase {
}
}
- case u @ UnresolvedExtractValue(child, fieldName) =>
+ case u @ UnresolvedExtractValue(child, field) =>
val newChild = innerResolve(child, isTopLevel = false)
+ val resolvedField = if
(conf.getConf(SQLConf.PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX)) {
+ innerResolve(field, isTopLevel = false)
+ } else {
+ field
+ }
if (newChild.resolved) {
- ExtractValue(newChild, fieldName, resolver)
+ ExtractValue(child = newChild, extraction = resolvedField,
resolver = resolver)
} else {
- u.copy(child = newChild)
+ u.copy(child = newChild, extraction = resolvedField)
}
case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 477d09d29a05..eea92dffb048 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -241,6 +241,16 @@ object SQLConf {
}
}
+ val PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX =
+ buildConf("spark.sql.analyzer.preferColumnOverLcaInArrayIndex")
+ .internal()
+ .doc(
+ "When true, prefer the column from the underlying relation over the
lateral column alias " +
+ "reference with the same name (see SPARK-53734)."
+ )
+ .booleanConf
+ .createWithDefault(true)
+
val DONT_DEDUPLICATE_EXPRESSION_IF_EXPR_ID_IN_OUTPUT =
buildConf("spark.sql.analyzer.dontDeduplicateExpressionIfExprIdInOutput")
.internal()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 89a6a12a7e4e..90375d0e0873 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -5079,6 +5079,17 @@ class SQLQuerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
checkAnswer(df, Row(1))
}
+
+ test("SPARK-53734: Prefer table column over LCA when resolving array index")
{
+ val query = "SELECT 1 AS col1, col2[col1] FROM VALUES(0, ARRAY(1, 2));"
+ withSQLConf(SQLConf.PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX.key -> "true") {
+ checkAnswer(sql(query), Row(1, 1))
+ }
+
+ withSQLConf(SQLConf.PREFER_COLUMN_OVER_LCA_IN_ARRAY_INDEX.key -> "false") {
+ checkAnswer(sql(query), Row(1, 2))
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]