Repository: spark Updated Branches: refs/heads/branch-2.0 a432e80b8 -> 82f69594f
[SPARK-15195][PYSPARK][DOCS] Update ml.tuning PyDocs ## What changes were proposed in this pull request? Tag classes in ml.tuning as experimental, add docs for kfolds avg metric, and copy TrainValidationSplit scaladoc for more detailed explanation. ## How was this patch tested? built docs locally Author: Holden Karau <[email protected]> Closes #12967 from holdenk/SPARK-15195-pydoc-ml-tuning. (cherry picked from commit 93353b0113158c87e09f0bad91a663a92e9cf1bc) Signed-off-by: Nick Pentreath <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/82f69594 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/82f69594 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/82f69594 Branch: refs/heads/branch-2.0 Commit: 82f69594fa625bc3fb643895543b0448d2c03f3e Parents: a432e80 Author: Holden Karau <[email protected]> Authored: Tue May 10 21:20:19 2016 +0200 Committer: Nick Pentreath <[email protected]> Committed: Tue May 10 21:20:36 2016 +0200 ---------------------------------------------------------------------- python/pyspark/ml/tuning.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/82f69594/python/pyspark/ml/tuning.py ---------------------------------------------------------------------- diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index b21cf92..0920ae6 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -33,6 +33,8 @@ __all__ = ['ParamGridBuilder', 'CrossValidator', 'CrossValidatorModel', 'TrainVa class ParamGridBuilder(object): r""" + .. note:: Experimental + Builder for a param grid used in grid search-based model selection. >>> from pyspark.ml.classification import LogisticRegression @@ -143,6 +145,8 @@ class ValidatorParams(HasSeed): class CrossValidator(Estimator, ValidatorParams): """ + .. note:: Experimental + K-fold cross validation. >>> from pyspark.ml.classification import LogisticRegression @@ -260,6 +264,8 @@ class CrossValidator(Estimator, ValidatorParams): class CrossValidatorModel(Model, ValidatorParams): """ + .. note:: Experimental + Model from k-fold cross validation. .. versionadded:: 1.4.0 @@ -269,6 +275,8 @@ class CrossValidatorModel(Model, ValidatorParams): super(CrossValidatorModel, self).__init__() #: best model from cross validation self.bestModel = bestModel + #: Average cross-validation metrics for each paramMap in + #: CrossValidator.estimatorParamMaps, in the corresponding order. self.avgMetrics = avgMetrics def _transform(self, dataset): @@ -294,7 +302,11 @@ class CrossValidatorModel(Model, ValidatorParams): class TrainValidationSplit(Estimator, ValidatorParams): """ - Train-Validation-Split. + .. note:: Experimental + + Validation for hyper-parameter tuning. Randomly splits the input dataset into train and + validation sets, and uses evaluation metric on the validation set to select the best model. + Similar to :class:`CrossValidator`, but only splits the set once. >>> from pyspark.ml.classification import LogisticRegression >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator @@ -405,6 +417,8 @@ class TrainValidationSplit(Estimator, ValidatorParams): class TrainValidationSplitModel(Model, ValidatorParams): """ + .. note:: Experimental + Model from train validation split. .. versionadded:: 2.0.0 --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
