Repository: spark
Updated Branches:
  refs/heads/master cc86fcd0d -> 96a59109a


[SPARK-18268][ML][MLLIB] ALS fail with better message if ratings is empty rdd

## What changes were proposed in this pull request?
ALS.run fail with better message if ratings is empty rdd
ALS.train and ALS.trainImplicit are also affected

## How was this patch tested?
added new tests

Author: Sandeep Singh <[email protected]>

Closes #15809 from techaddict/SPARK-18268.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96a59109
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96a59109
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96a59109

Branch: refs/heads/master
Commit: 96a59109a912db9d5f6fc07dedd9d8a3eee97b96
Parents: cc86fcd
Author: Sandeep Singh <[email protected]>
Authored: Thu Nov 10 10:33:35 2016 +0000
Committer: Sean Owen <[email protected]>
Committed: Thu Nov 10 10:33:35 2016 +0000

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/ml/recommendation/ALS.scala  | 1 +
 .../scala/org/apache/spark/mllib/recommendation/ALS.scala    | 2 ++
 .../scala/org/apache/spark/ml/recommendation/ALSSuite.scala  | 8 ++++++++
 .../org/apache/spark/mllib/recommendation/ALSSuite.scala     | 7 +++++++
 4 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala 
b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 02e2384..6d2c59a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -678,6 +678,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
       checkpointInterval: Int = 10,
       seed: Long = 0L)(
       implicit ord: Ordering[ID]): (RDD[(ID, Array[Float])], RDD[(ID, 
Array[Float])]) = {
+    require(!ratings.isEmpty(), s"No ratings available from $ratings")
     require(intermediateRDDStorageLevel != StorageLevel.NONE,
       "ALS is not designed to run without persisting intermediate RDDs.")
     val sc = ratings.sparkContext

http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index cc9ee15..0039db7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -236,6 +236,8 @@ class ALS private (
    */
   @Since("0.8.0")
   def run(ratings: RDD[Rating]): MatrixFactorizationModel = {
+    require(!ratings.isEmpty(), s"No ratings available from $ratings")
+
     val sc = ratings.context
 
     val numUserBlocks = if (this.numUserBlocks == -1) {

http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index d0aa2cd..b923bac 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.recommendation.ALS._
+import org.apache.spark.ml.recommendation.ALS.Rating
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -539,6 +540,13 @@ class ALSSuite
       }.getMessage.contains("was out of Integer range"))
     }
   }
+
+  test("SPARK-18268: ALS with empty RDD should fail with better message") {
+    val ratings = sc.parallelize(Array.empty[Rating[Int]])
+    intercept[IllegalArgumentException] {
+      ALS.train(ratings)
+    }
+  }
 }
 
 class ALSCleanerSuite extends SparkFunSuite {

http://git-wip-us.apache.org/repos/asf/spark/blob/96a59109/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index d9dc557..b08ad99 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -188,6 +188,13 @@ class ALSSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     testALS(100, 200, 2, 15, 0.7, 0.4, false, false, false, -1, -1, false)
   }
 
+  test("SPARK-18268: ALS with empty RDD should fail with better message") {
+    val ratings = sc.parallelize(Array.empty[Rating])
+    intercept[IllegalArgumentException] {
+      new ALS().run(ratings)
+    }
+  }
+
   /**
    * Test if we can correctly factorize R = U * P where U and P are of known 
rank.
    *


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to