This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch branch-52
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/branch-52 by this push:
     new 822f3fa5ed [branch-52] fix: Ensure columns are casted to the correct 
names with Unions (#20146) (#20879)
822f3fa5ed is described below

commit 822f3fa5ed551d746f31b7fec65fde84c207db11
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Mar 12 06:57:02 2026 -0400

    [branch-52] fix: Ensure columns are casted to the correct names with Unions 
(#20146) (#20879)
    
    - Part of https://github.com/apache/datafusion/issues/20855
    - Closes https://github.com/apache/datafusion/issues/20123 on branch-52
    
    This PR:
    - Backports https://github.com/apache/datafusion/pull/20146 from
    @nuno-faria to the branch-52 line
    
    ---------
    
    Co-authored-by: Nuno Faria <[email protected]>
---
 datafusion/expr/src/expr_rewriter/mod.rs           |  13 +-
 .../optimizer/tests/optimizer_integration.rs       |   6 +-
 datafusion/substrait/tests/cases/logical_plans.rs  |  24 +++
 .../duplicate_name_in_union.substrait.json         | 171 +++++++++++++++++++++
 4 files changed, 208 insertions(+), 6 deletions(-)

diff --git a/datafusion/expr/src/expr_rewriter/mod.rs 
b/datafusion/expr/src/expr_rewriter/mod.rs
index a0faca76e9..32a88ab8cf 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -261,9 +261,16 @@ fn coerce_exprs_for_schema(
                     #[expect(deprecated)]
                     Expr::Wildcard { .. } => Ok(expr),
                     _ => {
-                        // maintain the original name when casting
-                        let name = dst_schema.field(idx).name();
-                        Ok(expr.cast_to(new_type, src_schema)?.alias(name))
+                        match expr {
+                            // maintain the original name when casting a 
column, to avoid the
+                            // tablename being added to it when not explicitly 
set by the query
+                            // (see: 
https://github.com/apache/datafusion/issues/18818)
+                            Expr::Column(ref column) => {
+                                let name = column.name().to_owned();
+                                Ok(expr.cast_to(new_type, 
src_schema)?.alias(name))
+                            }
+                            _ => Ok(expr.cast_to(new_type, src_schema)?),
+                        }
                     }
                 }
             } else {
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs 
b/datafusion/optimizer/tests/optimizer_integration.rs
index 36a6df54dd..fd4991c244 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -543,7 +543,7 @@ fn recursive_cte_projection_pushdown() -> Result<()> {
       RecursiveQuery: is_distinct=false
         Projection: test.col_int32 AS id
           TableScan: test projection=[col_int32]
-        Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id
+        Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
           Filter: nodes.id < Int32(3)
             TableScan: nodes projection=[id]
     "
@@ -567,7 +567,7 @@ fn recursive_cte_with_aliased_self_reference() -> 
Result<()> {
       RecursiveQuery: is_distinct=false
         Projection: test.col_int32 AS id
           TableScan: test projection=[col_int32]
-        Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id
+        Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32)
           SubqueryAlias: child
             Filter: nodes.id < Int32(3)
               TableScan: nodes projection=[id]
@@ -630,7 +630,7 @@ fn recursive_cte_projection_pushdown_baseline() -> 
Result<()> {
         Projection: test.col_int32 AS n
           Filter: test.col_int32 = Int32(5)
             TableScan: test projection=[col_int32]
-        Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n
+        Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32)
           Filter: countdown.n > Int32(1)
             TableScan: countdown projection=[n]
     "
diff --git a/datafusion/substrait/tests/cases/logical_plans.rs 
b/datafusion/substrait/tests/cases/logical_plans.rs
index 41f08c579f..e3e45193e7 100644
--- a/datafusion/substrait/tests/cases/logical_plans.rs
+++ b/datafusion/substrait/tests/cases/logical_plans.rs
@@ -220,6 +220,30 @@ mod tests {
         // Trigger execution to ensure plan validity
         DataFrame::new(ctx.state(), plan).show().await?;
 
+        Ok(())
+    }
+    #[tokio::test]
+    async fn duplicate_name_in_union() -> Result<()> {
+        let proto_plan =
+            
read_json("tests/testdata/test_plans/duplicate_name_in_union.substrait.json");
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+        assert_snapshot!(
+        plan,
+        @r"
+        Projection: foo AS col1, bar AS col2
+          Union
+            Projection: foo, bar
+              Values: (Int64(100), Int64(200))
+            Projection: x, foo
+              Values: (Int32(300), Int64(400))
+        "
+                );
+
+        // Trigger execution to ensure plan validity
+        DataFrame::new(ctx.state(), plan).show().await?;
+
         Ok(())
     }
 }
diff --git 
a/datafusion/substrait/tests/testdata/test_plans/duplicate_name_in_union.substrait.json
 
b/datafusion/substrait/tests/testdata/test_plans/duplicate_name_in_union.substrait.json
new file mode 100644
index 0000000000..1da2ff6131
--- /dev/null
+++ 
b/datafusion/substrait/tests/testdata/test_plans/duplicate_name_in_union.substrait.json
@@ -0,0 +1,171 @@
+{
+  "version": {
+    "minorNumber": 54,
+    "producer": "datafusion-test"
+  },
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "set": {
+            "common": {
+              "direct": {}
+            },
+            "inputs": [
+              {
+                "project": {
+                  "common": {
+                    "emit": {
+                      "outputMapping": [2, 3]
+                    }
+                  },
+                  "input": {
+                    "read": {
+                      "common": {
+                        "direct": {}
+                      },
+                      "baseSchema": {
+                        "names": ["foo", "bar"],
+                        "struct": {
+                          "types": [
+                            {
+                              "i64": {
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            {
+                              "i64": {
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            }
+                          ],
+                          "nullability": "NULLABILITY_REQUIRED"
+                        }
+                      },
+                      "virtualTable": {
+                        "expressions": [
+                          {
+                            "fields": [
+                              {
+                                "literal": {
+                                  "i64": "100"
+                                }
+                              },
+                              {
+                                "literal": {
+                                  "i64": "200"
+                                }
+                              }
+                            ]
+                          }
+                        ]
+                      }
+                    }
+                  },
+                  "expressions": [
+                    {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 0
+                          }
+                        },
+                        "rootReference": {}
+                      }
+                    },
+                    {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 1
+                          }
+                        },
+                        "rootReference": {}
+                      }
+                    }
+                  ]
+                }
+              },
+              {
+                "project": {
+                  "common": {
+                    "emit": {
+                      "outputMapping": [2, 3]
+                    }
+                  },
+                  "input": {
+                    "read": {
+                      "common": {
+                        "direct": {}
+                      },
+                      "baseSchema": {
+                        "names": ["x", "foo"],
+                        "struct": {
+                          "types": [
+                            {
+                              "i32": {
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            {
+                              "i64": {
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            }
+                          ],
+                          "nullability": "NULLABILITY_REQUIRED"
+                        }
+                      },
+                      "virtualTable": {
+                        "expressions": [
+                          {
+                            "fields": [
+                              {
+                                "literal": {
+                                  "i32": 300
+                                }
+                              },
+                              {
+                                "literal": {
+                                  "i64": "400"
+                                }
+                              }
+                            ]
+                          }
+                        ]
+                      }
+                    }
+                  },
+                  "expressions": [
+                    {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 0
+                          }
+                        },
+                        "rootReference": {}
+                      }
+                    },
+                    {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 1
+                          }
+                        },
+                        "rootReference": {}
+                      }
+                    }
+                  ]
+                }
+              }
+            ],
+            "op": "SET_OP_UNION_ALL"
+          }
+        },
+        "names": ["col1", "col2"]
+      }
+    }
+  ]
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to