Repository: spark
Updated Branches:
  refs/heads/master 441e072a2 -> 59d24c226


[SPARK-9130][SQL] throw exception when check equality between external and 
internal row

instead of return false, throw exception when check equality between external 
and internal row is better.

Author: Wenchen Fan <[email protected]>

Closes #7460 from cloud-fan/row-compare and squashes the following commits:

8a20911 [Wenchen Fan] improve equals
402daa8 [Wenchen Fan] throw exception when check equality between external and 
internal row


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59d24c22
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59d24c22
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59d24c22

Branch: refs/heads/master
Commit: 59d24c226a441db5f08c58ec407ba5873bd3b954
Parents: 441e072
Author: Wenchen Fan <[email protected]>
Authored: Fri Jul 17 09:31:13 2015 -0700
Committer: Reynold Xin <[email protected]>
Committed: Fri Jul 17 09:31:13 2015 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/sql/Row.scala   | 27 +++++++++++++++-----
 .../apache/spark/sql/catalyst/InternalRow.scala |  7 ++++-
 .../scala/org/apache/spark/sql/RowTest.scala    | 26 +++++++++++++++++++
 3 files changed, 53 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/59d24c22/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 3623fef..2cb64d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -364,18 +364,33 @@ trait Row extends Serializable {
     false
   }
 
-  protected def canEqual(other: Any) = {
-    // Note that InternalRow overrides canEqual. These two canEqual's together 
makes sure that
-    // comparing the external Row and InternalRow will always yield false.
+  /**
+   * Returns true if we can check equality for these 2 rows.
+   * Equality check between external row and internal row is not allowed.
+   * Here we do this check to prevent call `equals` on external row with 
internal row.
+   */
+  protected def canEqual(other: Row) = {
+    // Note that `Row` is not only the interface of external row but also the 
parent
+    // of `InternalRow`, so we have to ensure `other` is not a internal row 
here to prevent
+    // call `equals` on external row with internal row.
+    // `InternalRow` overrides canEqual, and these two canEquals together 
makes sure that
+    // equality check between external Row and InternalRow will always fail.
     // In the future, InternalRow should not extend Row. In that case, we can 
remove these
     // canEqual methods.
-    other.isInstanceOf[Row] && !other.isInstanceOf[InternalRow]
+    !other.isInstanceOf[InternalRow]
   }
 
   override def equals(o: Any): Boolean = {
-    if (o == null || !canEqual(o)) return false
-
+    if (!o.isInstanceOf[Row]) return false
     val other = o.asInstanceOf[Row]
+
+    if (!canEqual(other)) {
+      throw new UnsupportedOperationException(
+        "cannot check equality between external and internal rows")
+    }
+
+    if (other eq null) return false
+
     if (length != other.length) {
       return false
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/59d24c22/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index e2fafb8..024973a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -54,7 +54,12 @@ abstract class InternalRow extends Row {
   // A default implementation to change the return type
   override def copy(): InternalRow = this
 
-  protected override def canEqual(other: Any) = other.isInstanceOf[InternalRow]
+  /**
+   * Returns true if we can check equality for these 2 rows.
+   * Equality check between external row and internal row is not allowed.
+   * Here we do this check to prevent call `equals` on internal row with 
external row.
+   */
+  protected override def canEqual(other: Row) = other.isInstanceOf[InternalRow]
 
   // Custom hashCode function that matches the efficient code generated 
version.
   override def hashCode: Int = {

http://git-wip-us.apache.org/repos/asf/spark/blob/59d24c22/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index bbb9739..878a1bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, 
GenericRowWithSchema}
 import org.apache.spark.sql.types._
 import org.scalatest.{Matchers, FunSpec}
@@ -68,4 +69,29 @@ class RowTest extends FunSpec with Matchers {
       sampleRow.getValuesMap(List("col1", "col2")) shouldBe expected
     }
   }
+
+  describe("row equals") {
+    val externalRow = Row(1, 2)
+    val externalRow2 = Row(1, 2)
+    val internalRow = InternalRow(1, 2)
+    val internalRow2 = InternalRow(1, 2)
+
+    it("equality check for external rows") {
+      externalRow shouldEqual externalRow2
+    }
+
+    it("equality check for internal rows") {
+      internalRow shouldEqual internalRow2
+    }
+
+    it("throws an exception when check equality between external and internal 
rows") {
+      def assertError(f: => Unit): Unit = {
+        val e = intercept[UnsupportedOperationException](f)
+        e.getMessage.contains("cannot check equality between external and 
internal rows")
+      }
+
+      assertError(internalRow.equals(externalRow))
+      assertError(externalRow.equals(internalRow))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to