Repository: spark
Updated Branches:
refs/heads/master b9b54b1c8 -> c5f9b89dd
[SPARK-18608][ML] Fix double caching
## What changes were proposed in this pull request?
`df.rdd.getStorageLevel` => `df.storageLevel`
using cmd `find . -name '*.scala' | xargs -i bash -c 'egrep -in
"\.rdd\.getStorageLevel" {} && echo {}'` to make sure all algs involved in this
issue are fixed.
Previous discussion in other PRs: https://github.com/apache/spark/pull/19107,
https://github.com/apache/spark/pull/17014
## How was this patch tested?
existing tests
Author: Zheng RuiFeng <[email protected]>
Closes #19197 from zhengruifeng/double_caching.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5f9b89d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5f9b89d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5f9b89d
Branch: refs/heads/master
Commit: c5f9b89dda40ffaa4622a7ba2b3d0605dbe815c0
Parents: b9b54b1
Author: Zheng RuiFeng <[email protected]>
Authored: Tue Sep 12 11:37:05 2017 -0700
Committer: Joseph K. Bradley <[email protected]>
Committed: Tue Sep 12 11:37:05 2017 -0700
----------------------------------------------------------------------
.../org/apache/spark/ml/classification/LogisticRegression.scala | 2 +-
.../scala/org/apache/spark/ml/classification/OneVsRest.scala | 4 ++--
mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 2 +-
.../org/apache/spark/ml/regression/AFTSurvivalRegression.scala | 2 +-
.../org/apache/spark/ml/regression/IsotonicRegression.scala | 2 +-
.../scala/org/apache/spark/ml/regression/LinearRegression.scala | 2 +-
6 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c5f9b89d/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
----------------------------------------------------------------------
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index f491a67..cbc8f4a 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -484,7 +484,7 @@ class LogisticRegression @Since("1.2.0") (
}
override protected[spark] def train(dataset: Dataset[_]):
LogisticRegressionModel = {
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
train(dataset, handlePersistence)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/c5f9b89d/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
----------------------------------------------------------------------
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 99bb234..942e981 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -165,7 +165,7 @@ final class OneVsRestModel private[ml] (
val newDataset = dataset.withColumn(accColName, initUDF())
// persist if underlying dataset is not persistent.
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
if (handlePersistence) {
newDataset.persist(StorageLevel.MEMORY_AND_DISK)
}
@@ -358,7 +358,7 @@ final class OneVsRest @Since("1.4.0") (
}
// persist if underlying dataset is not persistent.
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
if (handlePersistence) {
multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/c5f9b89d/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index e02b532..f2af7fe 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -304,7 +304,7 @@ class KMeans @Since("1.5.0") (
override def fit(dataset: Dataset[_]): KMeansModel = {
transformSchema(dataset.schema, logging = true)
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
val instances: RDD[OldVector] =
dataset.select(col($(featuresCol))).rdd.map {
case Row(point: Vector) => OldVectors.fromML(point)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/c5f9b89d/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
----------------------------------------------------------------------
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 16821f3..4b46c38 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -213,7 +213,7 @@ class AFTSurvivalRegression @Since("1.6.0")
(@Since("1.6.0") override val uid: S
override def fit(dataset: Dataset[_]): AFTSurvivalRegressionModel = {
transformSchema(dataset.schema, logging = true)
val instances = extractAFTPoints(dataset)
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
val featuresSummarizer = {
http://git-wip-us.apache.org/repos/asf/spark/blob/c5f9b89d/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
----------------------------------------------------------------------
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 529f66e..8faab52 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -165,7 +165,7 @@ class IsotonicRegression @Since("1.5.0") (@Since("1.5.0")
override val uid: Stri
transformSchema(dataset.schema, logging = true)
// Extract columns from data. If dataset is persisted, do not persist
oldDataset.
val instances = extractWeightedLabeledPoints(dataset)
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
val instr = Instrumentation.create(this, dataset)
http://git-wip-us.apache.org/repos/asf/spark/blob/c5f9b89d/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index ed431f5..b2a9681 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -251,7 +251,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0")
override val uid: String
return lrModel
}
- val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+ val handlePersistence = dataset.storageLevel == StorageLevel.NONE
if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
val (featuresSummarizer, ySummarizer) = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]