Repository: spark Updated Branches: refs/heads/master cd1d4110c -> 92f8f803a
[SPARK-7452] [MLLIB] fix bug in topBykey and update test the toArray function of the BoundedPriorityQueue does not necessarily preserve order. Add a counter-example as the test, which would fail the original impl. Author: Shuo Xiang <[email protected]> Closes #5990 from coderxiang/topbykey-test and squashes the following commits: 98804c9 [Shuo Xiang] fix bug in topBykey and update test Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/92f8f803 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/92f8f803 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/92f8f803 Branch: refs/heads/master Commit: 92f8f803a68e0c16771e9793098c6d76dfdf99af Parents: cd1d411 Author: Shuo Xiang <[email protected]> Authored: Thu May 7 20:55:08 2015 -0700 Committer: Joseph K. Bradley <[email protected]> Committed: Thu May 7 20:55:08 2015 -0700 ---------------------------------------------------------------------- .../org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala | 2 +- .../apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/92f8f803/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala index 5af55aa..1b93e2d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala @@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se combOp = (queue1, queue2) => { queue1 ++= queue2 } - ).mapValues(_.toArray.reverse) // This is an min-heap, so we reverse the order. + ).mapValues(_.toArray.sorted(ord.reverse)) // This is an min-heap, so we reverse the order. } } http://git-wip-us.apache.org/repos/asf/spark/blob/92f8f803/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala index cb8fe4d..57216e8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala @@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._ class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext { test("topByKey") { - val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2) - .topByKey(2) + val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5, + 1), (3, 5)), 2) + .topByKey(5) .collectAsMap() assert(topMap.size === 3) - assert(topMap(1) === Array(2, 1)) - assert(topMap(3) === Array(7, 5)) + assert(topMap(1) === Array(7, 6, 3, 2, 1)) + assert(topMap(3) === Array(7, 5, 2)) assert(topMap(5) === Array(1)) } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
