This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a4603f134fb0 [SPARK-47334][SQL] Make `withColumnRenamed` reuse the
implementation of `withColumnsRenamed`
a4603f134fb0 is described below
commit a4603f134fb0d496109d4c90889191c506e82691
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Mar 11 09:06:20 2024 +0900
[SPARK-47334][SQL] Make `withColumnRenamed` reuse the implementation of
`withColumnsRenamed`
### What changes were proposed in this pull request?
Make `withColumnRenamed` reuse the implementation of `withColumnsRenamed`
### Why are the changes needed?
to avoid any divergence in the future
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #45450 from zhengruifeng/with_rename_consistent.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../main/scala/org/apache/spark/sql/Dataset.scala | 27 ++++++++--------------
1 file changed, 9 insertions(+), 18 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index f3bf6119659d..f0c9f7ae53fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2876,23 +2876,8 @@ class Dataset[T] private[sql](
* @group untypedrel
* @since 2.0.0
*/
- def withColumnRenamed(existingName: String, newName: String): DataFrame = {
- val resolver = sparkSession.sessionState.analyzer.resolver
- val output = queryExecution.analyzed.output
- val shouldRename = output.exists(f => resolver(f.name, existingName))
- if (shouldRename) {
- val columns = output.map { col =>
- if (resolver(col.name, existingName)) {
- Column(col).as(newName)
- } else {
- Column(col)
- }
- }
- select(columns : _*)
- } else {
- toDF()
- }
- }
+ def withColumnRenamed(existingName: String, newName: String): DataFrame =
+ withColumnsRenamed(Seq(existingName), Seq(newName))
/**
* (Scala-specific)
@@ -2921,18 +2906,24 @@ class Dataset[T] private[sql](
val resolver = sparkSession.sessionState.analyzer.resolver
val output: Seq[NamedExpression] = queryExecution.analyzed.output
+ var shouldRename = false
val projectList = colNames.zip(newColNames).foldLeft(output) {
case (attrs, (existingName, newName)) =>
attrs.map(attr =>
if (resolver(attr.name, existingName)) {
+ shouldRename = true
Alias(attr, newName)()
} else {
attr
}
)
}
- withPlan(Project(projectList, logicalPlan))
+ if (shouldRename) {
+ withPlan(Project(projectList, logicalPlan))
+ } else {
+ toDF()
+ }
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]