Repository: spark Updated Branches: refs/heads/master 11e025956 -> 257cde7c3
[SPARK-6421][MLLIB] _regression_train_wrapper does not test initialWeights correctly Weight parameters must be initialized correctly even when numpy array is passed as initial weights. Author: lewuathe <lewua...@me.com> Closes #5101 from Lewuathe/SPARK-6421 and squashes the following commits: 7795201 [lewuathe] Fix lint-python errors 21d4fe3 [lewuathe] Fix init logic of weights Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/257cde7c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/257cde7c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/257cde7c Branch: refs/heads/master Commit: 257cde7c363efb3317bfb5c13975cca9154894e2 Parents: 11e0259 Author: lewuathe <lewua...@me.com> Authored: Fri Mar 20 17:18:18 2015 -0400 Committer: Xiangrui Meng <m...@databricks.com> Committed: Fri Mar 20 17:18:18 2015 -0400 ---------------------------------------------------------------------- python/pyspark/mllib/regression.py | 3 ++- python/pyspark/mllib/tests.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/257cde7c/python/pyspark/mllib/regression.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 015a786..414a0ad 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -163,7 +163,8 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights): first = data.first() if not isinstance(first, LabeledPoint): raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first) - initial_weights = initial_weights or [0.0] * len(data.first().features) + if initial_weights is None: + initial_weights = [0.0] * len(data.first().features) weights, intercept = train_func(data, _convert_to_vector(initial_weights)) return modelClass(weights, intercept) http://git-wip-us.apache.org/repos/asf/spark/blob/257cde7c/python/pyspark/mllib/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 5328d99..1550196 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -323,6 +323,13 @@ class ListTests(PySparkTestCase): self.assertTrue(gbt_model.predict(features[2]) <= 0) self.assertTrue(gbt_model.predict(features[3]) > 0) + try: + LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0])) + LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0])) + RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0])) + except ValueError: + self.fail() + class StatTests(PySparkTestCase): # SPARK-4023 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org