Repository: spark Updated Branches: refs/heads/master cc86fcd0d -> 96a59109a
[SPARK-18268][ML][MLLIB] ALS fail with better message if ratings is empty rdd ## What changes were proposed in this pull request? ALS.run fail with better message if ratings is empty rdd ALS.train and ALS.trainImplicit are also affected ## How was this patch tested? added new tests Author: Sandeep Singh <[email protected]> Closes #15809 from techaddict/SPARK-18268. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96a59109 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96a59109 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96a59109 Branch: refs/heads/master Commit: 96a59109a912db9d5f6fc07dedd9d8a3eee97b96 Parents: cc86fcd Author: Sandeep Singh <[email protected]> Authored: Thu Nov 10 10:33:35 2016 +0000 Committer: Sean Owen <[email protected]> Committed: Thu Nov 10 10:33:35 2016 +0000 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/ml/recommendation/ALS.scala | 1 + .../scala/org/apache/spark/mllib/recommendation/ALS.scala | 2 ++ .../scala/org/apache/spark/ml/recommendation/ALSSuite.scala | 8 ++++++++ .../org/apache/spark/mllib/recommendation/ALSSuite.scala | 7 +++++++ 4 files changed, 18 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 02e2384..6d2c59a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -678,6 +678,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { checkpointInterval: Int = 10, seed: Long = 0L)( implicit ord: Ordering[ID]): (RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = { + require(!ratings.isEmpty(), s"No ratings available from $ratings") require(intermediateRDDStorageLevel != StorageLevel.NONE, "ALS is not designed to run without persisting intermediate RDDs.") val sc = ratings.sparkContext http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index cc9ee15..0039db7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -236,6 +236,8 @@ class ALS private ( */ @Since("0.8.0") def run(ratings: RDD[Rating]): MatrixFactorizationModel = { + require(!ratings.isEmpty(), s"No ratings available from $ratings") + val sc = ratings.context val numUserBlocks = if (this.numUserBlocks == -1) { http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index d0aa2cd..b923bac 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -33,6 +33,7 @@ import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.recommendation.ALS._ +import org.apache.spark.ml.recommendation.ALS.Rating import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.util.MLlibTestSparkContext @@ -539,6 +540,13 @@ class ALSSuite }.getMessage.contains("was out of Integer range")) } } + + test("SPARK-18268: ALS with empty RDD should fail with better message") { + val ratings = sc.parallelize(Array.empty[Rating[Int]]) + intercept[IllegalArgumentException] { + ALS.train(ratings) + } + } } class ALSCleanerSuite extends SparkFunSuite { http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala index d9dc557..b08ad99 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala @@ -188,6 +188,13 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext { testALS(100, 200, 2, 15, 0.7, 0.4, false, false, false, -1, -1, false) } + test("SPARK-18268: ALS with empty RDD should fail with better message") { + val ratings = sc.parallelize(Array.empty[Rating]) + intercept[IllegalArgumentException] { + new ALS().run(ratings) + } + } + /** * Test if we can correctly factorize R = U * P where U and P are of known rank. * --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
