Repository: spark Updated Branches: refs/heads/master f9b397f54 -> 2519dcc33
[MINOR] [MLLIB] rename some functions of PythonMLLibAPI Keep the same naming conventions for PythonMLLibAPI. Only the following three functions is different from others ```scala trainNaiveBayes trainGaussianMixture trainWord2Vec ``` So change them to ```scala trainNaiveBayesModel trainGaussianMixtureModel trainWord2VecModel ``` It does not affect any users and public APIs, only to make better understand for developer and code hacker. Author: Yanbo Liang <[email protected]> Closes #7011 from yanboliang/py-mllib-api-rename and squashes the following commits: 771ffec [Yanbo Liang] rename some functions of PythonMLLibAPI Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2519dcc3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2519dcc3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2519dcc3 Branch: refs/heads/master Commit: 2519dcc33bde3a6d341790d73b5d292ea7af961a Parents: f9b397f Author: Yanbo Liang <[email protected]> Authored: Thu Jun 25 08:13:17 2015 -0700 Committer: Xiangrui Meng <[email protected]> Committed: Thu Jun 25 08:13:17 2015 -0700 ---------------------------------------------------------------------- .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 6 +++--- python/pyspark/mllib/classification.py | 2 +- python/pyspark/mllib/clustering.py | 6 +++--- python/pyspark/mllib/feature.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index c4bea7c..b16903a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -278,7 +278,7 @@ private[python] class PythonMLLibAPI extends Serializable { /** * Java stub for NaiveBayes.train() */ - def trainNaiveBayes( + def trainNaiveBayesModel( data: JavaRDD[LabeledPoint], lambda: Double): JList[Object] = { val model = NaiveBayes.train(data.rdd, lambda) @@ -346,7 +346,7 @@ private[python] class PythonMLLibAPI extends Serializable { * Java stub for Python mllib GaussianMixture.run() * Returns a list containing weights, mean and covariance of each mixture component. */ - def trainGaussianMixture( + def trainGaussianMixtureModel( data: JavaRDD[Vector], k: Int, convergenceTol: Double, @@ -553,7 +553,7 @@ private[python] class PythonMLLibAPI extends Serializable { * @param seed initial seed for random generator * @return A handle to java Word2VecModelWrapper instance at python side */ - def trainWord2Vec( + def trainWord2VecModel( dataJRDD: JavaRDD[java.util.ArrayList[String]], vectorSize: Int, learningRate: Double, http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/python/pyspark/mllib/classification.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 2698f10..735d45b 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -581,7 +581,7 @@ class NaiveBayes(object): first = data.first() if not isinstance(first, LabeledPoint): raise ValueError("`data` should be an RDD of LabeledPoint") - labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_) + labels, pi, theta = callMLlibFunc("trainNaiveBayesModel", data, lambda_) return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta)) http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/python/pyspark/mllib/clustering.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index e6ef729..8bc0654 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -265,9 +265,9 @@ class GaussianMixture(object): initialModelWeights = initialModel.weights initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)] initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)] - weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k, - convergenceTol, maxIterations, seed, initialModelWeights, - initialModelMu, initialModelSigma) + weight, mu, sigma = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector), + k, convergenceTol, maxIterations, seed, + initialModelWeights, initialModelMu, initialModelSigma) mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)] return GaussianMixtureModel(weight, mvg_obj) http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/python/pyspark/mllib/feature.py ---------------------------------------------------------------------- diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 334f5b8..f00bb93 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -549,7 +549,7 @@ class Word2Vec(object): """ if not isinstance(data, RDD): raise TypeError("data should be an RDD of list of string") - jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize), + jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize), float(self.learningRate), int(self.numPartitions), int(self.numIterations), int(self.seed), int(self.minCount)) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
