This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 20469d4  [SPARK-28189][SQL] Use semanticEquals in Dataset drop method 
for attributes comparison
20469d4 is described below

commit 20469d43eb22c832b4d8f30c9e611ef622c3366d
Author: Tony Zhang <[email protected]>
AuthorDate: Sat Jul 6 21:39:04 2019 -0700

    [SPARK-28189][SQL] Use semanticEquals in Dataset drop method for attributes 
comparison
    
    ## What changes were proposed in this pull request?
    
    In Dataset drop(col: Column) method, the `equals` comparison method was 
used instead of `semanticEquals`, which caused the problem of abnormal 
case-sensitivity behavior. When attributes of LogicalPlan are checked for 
equality, `semanticEquals` should be used instead.
    
    A similar PR I referred to: https://github.com/apache/spark/pull/22713 
created by mgaido91
    
    ## How was this patch tested?
    
    - Added new unit test case in DataFrameSuite
    - ./build/sbt "testOnly org.apache.spark.sql.*"
    - The python code from ticket reporter at 
https://issues.apache.org/jira/browse/SPARK-28189
    
    Closes #25055 from Tonix517/SPARK-28189.
    
    Authored-by: Tony Zhang <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../main/scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala      | 23 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 147222c..ef03a09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2322,7 +2322,7 @@ class Dataset[T] private[sql](
     }
     val attrs = this.logicalPlan.output
     val colsAfterDrop = attrs.filter { attr =>
-      attr != expression
+      !attr.semanticEquals(expression)
     }.map(attr => Column(attr))
     select(colsAfterDrop : _*)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d15c1f4..9893670 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -572,6 +572,29 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
     assert(df.schema.map(_.name) === Seq("value"))
   }
 
+  test("SPARK-28189 drop column using drop with column reference with 
case-insensitive names") {
+    // With SQL config caseSensitive OFF, case insensitive column name should 
work
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val col1 = testData("KEY")
+      val df1 = testData.drop(col1)
+      checkAnswer(df1, testData.selectExpr("value"))
+      assert(df1.schema.map(_.name) === Seq("value"))
+
+      val col2 = testData("Key")
+      val df2 = testData.drop(col2)
+      checkAnswer(df2, testData.selectExpr("value"))
+      assert(df2.schema.map(_.name) === Seq("value"))
+    }
+
+    // With SQL config caseSensitive ON, AnalysisException should be thrown
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val e = intercept[AnalysisException] {
+        testData("KEY")
+      }.getMessage
+      assert(e.contains("Cannot resolve column name"))
+    }
+  }
+
   test("drop unknown column (no-op) with column reference") {
     val col = Column("random")
     val df = testData.drop(col)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to