Repository: spark Updated Branches: refs/heads/master 1bc435ae3 -> 3aff0866a
[SPARK-9774] [ML] [PYSPARK] Add python api for ml regression isotonicregression Add the Python API for isotonicregression. Author: Holden Karau <[email protected]> Closes #8214 from holdenk/SPARK-9774-add-python-api-for-ml-regression-isotonicregression. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3aff0866 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3aff0866 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3aff0866 Branch: refs/heads/master Commit: 3aff0866a8601b4daf760d6bf175f68d5a0c8912 Parents: 1bc435a Author: Holden Karau <[email protected]> Authored: Wed Oct 7 17:50:35 2015 -0700 Committer: Joseph K. Bradley <[email protected]> Committed: Wed Oct 7 17:50:35 2015 -0700 ---------------------------------------------------------------------- .../pyspark/ml/param/_shared_params_code_gen.py | 5 +- python/pyspark/ml/param/shared.py | 27 +++++ python/pyspark/ml/regression.py | 118 +++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3aff0866/python/pyspark/ml/param/_shared_params_code_gen.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index 5b39e5d..45a94e9 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -133,7 +133,10 @@ if __name__ == "__main__": ("thresholds", "Thresholds in multi-class classification to adjust the probability of " + "predicting each class. Array must have length equal to the number of classes, with " + "values >= 0. The class with largest value p/t is predicted, where p is the original " + - "probability of that class and t is the class' threshold.", None)] + "probability of that class and t is the class' threshold.", None), + ("weightCol", "weight column name. If this is not set or empty, we treat " + + "all instance weights as 1.0.", None)] + code = [] for name, doc, defaultValueStr in shared: param_code = _gen_param_header(name, doc, defaultValueStr) http://git-wip-us.apache.org/repos/asf/spark/blob/3aff0866/python/pyspark/ml/param/shared.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index af12181..8c438bc 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -570,6 +570,33 @@ class HasThresholds(Params): return self.getOrDefault(self.thresholds) +class HasWeightCol(Params): + """ + Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0.. + """ + + # a placeholder to make it appear in the generated doc + weightCol = Param(Params._dummy(), "weightCol", "weight column name. If this is not set or empty, we treat all instance weights as 1.0.") + + def __init__(self): + super(HasWeightCol, self).__init__() + #: param for weight column name. If this is not set or empty, we treat all instance weights as 1.0. + self.weightCol = Param(self, "weightCol", "weight column name. If this is not set or empty, we treat all instance weights as 1.0.") + + def setWeightCol(self, value): + """ + Sets the value of :py:attr:`weightCol`. + """ + self._paramMap[self.weightCol] = value + return self + + def getWeightCol(self): + """ + Gets the value of weightCol or its default value. + """ + return self.getOrDefault(self.weightCol) + + class DecisionTreeParams(Params): """ Mixin for Decision Tree parameters. http://git-wip-us.apache.org/repos/asf/spark/blob/3aff0866/python/pyspark/ml/regression.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index e12abeb..eb5f4bd 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -25,6 +25,7 @@ from pyspark.mllib.common import inherit_doc __all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel', 'DecisionTreeRegressor', 'DecisionTreeRegressionModel', 'GBTRegressor', 'GBTRegressionModel', + 'IsotonicRegression', 'IsotonicRegressionModel', 'LinearRegression', 'LinearRegressionModel', 'RandomForestRegressor', 'RandomForestRegressionModel'] @@ -142,6 +143,123 @@ class LinearRegressionModel(JavaModel): return self._call_java("intercept") +@inherit_doc +class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, + HasWeightCol): + """ + .. note:: Experimental + + Currently implemented using parallelized pool adjacent violators algorithm. + Only univariate (single feature) algorithm supported. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> ir = IsotonicRegression() + >>> model = ir.fit(df) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> model.boundaries + DenseVector([0.0, 1.0]) + """ + + # a placeholder to make it appear in the generated doc + isotonic = \ + Param(Params._dummy(), "isotonic", + "whether the output sequence should be isotonic/increasing (true) or" + + "antitonic/decreasing (false).") + featureIndex = \ + Param(Params._dummy(), "featureIndex", + "The index of the feature if featuresCol is a vector column, no effect otherwise.") + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + weightCol=None, isotonic=True, featureIndex=0): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + weightCol=None, isotonic=True, featureIndex=0): + """ + super(IsotonicRegression, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.regression.IsotonicRegression", self.uid) + self.isotonic = \ + Param(self, "isotonic", + "whether the output sequence should be isotonic/increasing (true) or" + + "antitonic/decreasing (false).") + self.featureIndex = \ + Param(self, "featureIndex", + "The index of the feature if featuresCol is a vector column, no effect " + + "otherwise.") + self._setDefault(isotonic=True, featureIndex=0) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + weightCol=None, isotonic=True, featureIndex=0): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + weightCol=None, isotonic=True, featureIndex=0): + Set the params for IsotonicRegression. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return IsotonicRegressionModel(java_model) + + def setIsotonic(self, value): + """ + Sets the value of :py:attr:`isotonic`. + """ + self._paramMap[self.isotonic] = value + return self + + def getIsotonic(self): + """ + Gets the value of isotonic or its default value. + """ + return self.getOrDefault(self.isotonic) + + def setFeatureIndex(self, value): + """ + Sets the value of :py:attr:`featureIndex`. + """ + self._paramMap[self.featureIndex] = value + return self + + def getFeatureIndex(self): + """ + Gets the value of featureIndex or its default value. + """ + return self.getOrDefault(self.featureIndex) + + +class IsotonicRegressionModel(JavaModel): + """ + .. note:: Experimental + + Model fitted by IsotonicRegression. + """ + + @property + def boundaries(self): + """ + Model boundaries. + """ + return self._call_java("boundaries") + + @property + def predictions(self): + """ + Predictions associated with the boundaries at the same index, monotone because of isotonic + regression. + """ + return self._call_java("predictions") + + class TreeRegressorParams(object): """ Private class to track supported impurity measures. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
