This is an automated email from the ASF dual-hosted git repository.
szehon-ho pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.2 by this push:
new 0d68d8a1bdb8 [SPARK-56462][SQL] Fix MERGE UPDATE */INSERT * schema
evolution failure when source col contains special characters like dot/space
0d68d8a1bdb8 is described below
commit 0d68d8a1bdb8f745b13519c4be4d542e54dd4c0b
Author: Eric Yang <[email protected]>
AuthorDate: Sun May 31 15:26:10 2026 -0700
[SPARK-56462][SQL] Fix MERGE UPDATE */INSERT * schema evolution failure
when source col contains special characters like dot/space
### What changes were proposed in this pull request?
In `Analyzer.scala`, the `UPDATE *` and `INSERT *` schema-evolution paths
inside `ResolveReferences` built assignment keys for new source columns using
`UnresolvedAttribute(sourceAttr.name)`. That constructor passes the name
through `CatalystSqlParser.parseMultipartIdentifier`, which treats it as SQL
text rather than a verbatim identifier. For a dot-containing name like
`job.title`, the parser splits it into `Seq("job","title")`, for other
non-SQL-identifier names (spaces, leading dig [...]
The fix `UnresolvedAttribute.quoted` bypasses the parser entirely —
wrapping the stored name as `Seq(name)`.
### Why are the changes needed?
With a dot in the source column name, analysis fails with
`UNRESOLVED_COLUMN`, for col name containing space, it throws `ParseException`.
### Does this PR introduce _any_ user-facing change?
Yes. `MERGE INTO … WHEN MATCHED UPDATE * WHEN NOT MATCHED INSERT *` with
schema evolution now correctly handles source columns whose names contain a
dot/space, etc.
### How was this patch tested?
Added a `testEvolution` case in `MergeIntoSchemaEvolutionBasicTests`
covering both schema-evolution-enabled (dot-named column is added and
populated) and schema-evolution-disabled (extra column is silently ignored).
### Was this patch authored or co-authored using generative AI tooling?
Yes.
Closes #55329 from jiwen624/fix-merge-into-schema-evolution-dot-column.
Authored-by: Eric Yang <[email protected]>
Signed-off-by: Szehon Ho <[email protected]>
(cherry picked from commit c53fd863ccee00f77f5d3b3b40b80c26664bfec5)
Signed-off-by: Szehon Ho <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 4 +--
.../MergeIntoSchemaEvolutionBasicTests.scala | 38 ++++++++++++++++++++++
2 files changed, 40 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index df28d708623e..24d88c674c7c 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1741,7 +1741,7 @@ class Analyzer(
// These columns will be added by ResolveSchemaEvolution
later.
sourceTable.output.map { sourceAttr =>
val key = findAttrInTarget(sourceAttr.name).getOrElse(
- UnresolvedAttribute(sourceAttr.name))
+ UnresolvedAttribute.quoted(sourceAttr.name))
Assignment(key, sourceAttr)
}
} else {
@@ -1777,7 +1777,7 @@ class Analyzer(
// These columns will be added by ResolveSchemaEvolution
later.
sourceTable.output.map { sourceAttr =>
val key = findAttrInTarget(sourceAttr.name).getOrElse(
- UnresolvedAttribute(sourceAttr.name))
+ UnresolvedAttribute.quoted(sourceAttr.name))
Assignment(key, sourceAttr)
}
} else {
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
index 4118e5721614..bb221a200b2d 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
@@ -1265,4 +1265,42 @@ trait MergeIntoSchemaEvolutionBasicTests extends
MergeIntoSchemaEvolutionSuiteBa
expected = Seq((1, "hr")).toDF("pk", "dep"),
expectedWithoutEvolution = Seq((1, "hr")).toDF("pk", "dep")
)
+
+ for (colName <- Seq("job.title", "job title")) {
+ testEvolution(s"SPARK-56462: source has extra column with special-char
name: $colName")(
+ targetData = Seq(
+ (1, 100, "hr"),
+ (2, 200, "software"),
+ (3, 300, "hr")
+ ).toDF("pk", "salary", "dep"),
+ sourceData = Seq(
+ (2, 150, "finance", "engineer"),
+ (4, 400, "finance", "manager")
+ ).toDF("pk", "salary", "dep", colName),
+ clauses = Seq(updateAll(), insertAll()),
+ expected = Seq[(Int, Int, String, String)](
+ (1, 100, "hr", null),
+ (2, 150, "finance", "engineer"),
+ (3, 300, "hr", null),
+ (4, 400, "finance", "manager")
+ ).toDF("pk", "salary", "dep", colName),
+ expectedWithoutEvolution = Seq(
+ (1, 100, "hr"),
+ (2, 150, "finance"),
+ (3, 300, "hr"),
+ (4, 400, "finance")
+ ).toDF("pk", "salary", "dep"),
+ expectedSchema = StructType(Seq(
+ StructField("pk", IntegerType, nullable = false),
+ StructField("salary", IntegerType, nullable = false),
+ StructField("dep", StringType),
+ StructField(colName, StringType)
+ )),
+ expectedSchemaWithoutEvolution = StructType(Seq(
+ StructField("pk", IntegerType, nullable = false),
+ StructField("salary", IntegerType, nullable = false),
+ StructField("dep", StringType)
+ ))
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]