This is an automated email from the ASF dual-hosted git repository.

szehon-ho pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new de20f6624e40 [SPARK-56462][SQL] Fix MERGE UPDATE */INSERT * schema 
evolution failure when source col contains special characters like dot/space
de20f6624e40 is described below

commit de20f6624e40df5c757f9cd93904b6c11aed259f
Author: Eric Yang <[email protected]>
AuthorDate: Sun May 31 15:26:10 2026 -0700

    [SPARK-56462][SQL] Fix MERGE UPDATE */INSERT * schema evolution failure 
when source col contains special characters like dot/space
    
    ### What changes were proposed in this pull request?
    
    In `Analyzer.scala`, the `UPDATE *` and `INSERT *` schema-evolution paths 
inside `ResolveReferences` built assignment keys for new source columns using 
`UnresolvedAttribute(sourceAttr.name)`. That constructor passes the name 
through `CatalystSqlParser.parseMultipartIdentifier`, which treats it as SQL 
text rather than a verbatim identifier. For a dot-containing name like 
`job.title`, the parser splits it into `Seq("job","title")`, for other 
non-SQL-identifier names (spaces, leading dig [...]
    
    The fix `UnresolvedAttribute.quoted` bypasses the parser entirely — 
wrapping the stored name as `Seq(name)`.
    
    ### Why are the changes needed?
    
    With a dot in the source column name, analysis fails with 
`UNRESOLVED_COLUMN`, for col name containing space, it throws `ParseException`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. `MERGE INTO … WHEN MATCHED UPDATE * WHEN NOT MATCHED INSERT *` with 
schema evolution now correctly handles source columns whose names contain a 
dot/space, etc.
    
    ### How was this patch tested?
    
    Added a `testEvolution` case in `MergeIntoSchemaEvolutionBasicTests` 
covering both schema-evolution-enabled (dot-named column is added and 
populated) and schema-evolution-disabled (extra column is silently ignored).
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes.
    
    Closes #55329 from jiwen624/fix-merge-into-schema-evolution-dot-column.
    
    Authored-by: Eric Yang <[email protected]>
    Signed-off-by: Szehon Ho <[email protected]>
    (cherry picked from commit c53fd863ccee00f77f5d3b3b40b80c26664bfec5)
    Signed-off-by: Szehon Ho <[email protected]>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  4 +--
 .../MergeIntoSchemaEvolutionBasicTests.scala       | 38 ++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a446ffae9a00..4c5cafeaeab9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1742,7 +1742,7 @@ class Analyzer(
                   // These columns will be added by ResolveSchemaEvolution 
later.
                   sourceTable.output.map { sourceAttr =>
                     val key = findAttrInTarget(sourceAttr.name).getOrElse(
-                      UnresolvedAttribute(sourceAttr.name))
+                      UnresolvedAttribute.quoted(sourceAttr.name))
                     Assignment(key, sourceAttr)
                   }
                 } else {
@@ -1778,7 +1778,7 @@ class Analyzer(
                   // These columns will be added by ResolveSchemaEvolution 
later.
                   sourceTable.output.map { sourceAttr =>
                     val key = findAttrInTarget(sourceAttr.name).getOrElse(
-                      UnresolvedAttribute(sourceAttr.name))
+                      UnresolvedAttribute.quoted(sourceAttr.name))
                     Assignment(key, sourceAttr)
                   }
                 } else {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
index 4118e5721614..bb221a200b2d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoSchemaEvolutionBasicTests.scala
@@ -1265,4 +1265,42 @@ trait MergeIntoSchemaEvolutionBasicTests extends 
MergeIntoSchemaEvolutionSuiteBa
     expected = Seq((1, "hr")).toDF("pk", "dep"),
     expectedWithoutEvolution = Seq((1, "hr")).toDF("pk", "dep")
   )
+
+  for (colName <- Seq("job.title", "job title")) {
+    testEvolution(s"SPARK-56462: source has extra column with special-char 
name: $colName")(
+      targetData = Seq(
+        (1, 100, "hr"),
+        (2, 200, "software"),
+        (3, 300, "hr")
+      ).toDF("pk", "salary", "dep"),
+      sourceData = Seq(
+        (2, 150, "finance", "engineer"),
+        (4, 400, "finance", "manager")
+      ).toDF("pk", "salary", "dep", colName),
+      clauses = Seq(updateAll(), insertAll()),
+      expected = Seq[(Int, Int, String, String)](
+        (1, 100, "hr", null),
+        (2, 150, "finance", "engineer"),
+        (3, 300, "hr", null),
+        (4, 400, "finance", "manager")
+      ).toDF("pk", "salary", "dep", colName),
+      expectedWithoutEvolution = Seq(
+        (1, 100, "hr"),
+        (2, 150, "finance"),
+        (3, 300, "hr"),
+        (4, 400, "finance")
+      ).toDF("pk", "salary", "dep"),
+      expectedSchema = StructType(Seq(
+        StructField("pk", IntegerType, nullable = false),
+        StructField("salary", IntegerType, nullable = false),
+        StructField("dep", StringType),
+        StructField(colName, StringType)
+      )),
+      expectedSchemaWithoutEvolution = StructType(Seq(
+        StructField("pk", IntegerType, nullable = false),
+        StructField("salary", IntegerType, nullable = false),
+        StructField("dep", StringType)
+      ))
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to