Repository: spark
Updated Branches:
  refs/heads/master cd1d4110c -> 92f8f803a


[SPARK-7452] [MLLIB] fix bug in topBykey and update test

the toArray function of the BoundedPriorityQueue does not necessarily preserve 
order. Add a counter-example as the test, which would fail the original impl.

Author: Shuo Xiang <[email protected]>

Closes #5990 from coderxiang/topbykey-test and squashes the following commits:

98804c9 [Shuo Xiang] fix bug in topBykey and update test


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/92f8f803
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/92f8f803
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/92f8f803

Branch: refs/heads/master
Commit: 92f8f803a68e0c16771e9793098c6d76dfdf99af
Parents: cd1d411
Author: Shuo Xiang <[email protected]>
Authored: Thu May 7 20:55:08 2015 -0700
Committer: Joseph K. Bradley <[email protected]>
Committed: Thu May 7 20:55:08 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala     | 2 +-
 .../apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala    | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/92f8f803/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 5af55aa..1b93e2d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: 
RDD[(K, V)]) extends Se
       combOp = (queue1, queue2) => {
         queue1 ++= queue2
       }
-    ).mapValues(_.toArray.reverse)  // This is an min-heap, so we reverse the 
order.
+    ).mapValues(_.toArray.sorted(ord.reverse))  // This is an min-heap, so we 
reverse the order.
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/92f8f803/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
index cb8fe4d..57216e8 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
 
 class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
   test("topByKey") {
-    val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), 
(3, 5)), 2)
-      .topByKey(2)
+    val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), 
(3, 2), (3, 7), (5,
+      1), (3, 5)), 2)
+      .topByKey(5)
       .collectAsMap()
 
     assert(topMap.size === 3)
-    assert(topMap(1) === Array(2, 1))
-    assert(topMap(3) === Array(7, 5))
+    assert(topMap(1) === Array(7, 6, 3, 2, 1))
+    assert(topMap(3) === Array(7, 5, 2))
     assert(topMap(5) === Array(1))
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to