This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a3146c83d98 [SPARK-45663][CORE][MLLIB] Replace
`IterableOnceOps#aggregate` with `IterableOnceOps#foldLeft`
a3146c83d98 is described below
commit a3146c83d98fe76aeb6880a40b61fcdd257685ce
Author: yangjie01 <[email protected]>
AuthorDate: Thu Oct 26 13:20:56 2023 +0800
[SPARK-45663][CORE][MLLIB] Replace `IterableOnceOps#aggregate` with
`IterableOnceOps#foldLeft`
### What changes were proposed in this pull request?
This pr replace `IterableOnceOps#aggregate` with `IterableOnceOps#foldLeft`
due to `aggregate` has been marked as deprecated since Scala 2.13.0.
```scala
deprecated("`aggregate` is not relevant for sequential collections. Use
`foldLeft(z)(seqop)` instead.", "2.13.0")
def aggregate[B](z: => B)(seqop: (B, A) => B, combop: (B, B) => B): B =
foldLeft(z)(seqop)
```
### Why are the changes needed?
Clean up deprecated API usage.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass GitHub Actions
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43527 from LuciferYang/SPARK-45663.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
core/src/main/scala/org/apache/spark/rdd/RDD.scala | 5 ++---
.../scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala | 2 +-
.../scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala | 5 ++---
3 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index c6770c77b92..5dc666c62d1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1219,8 +1219,7 @@ abstract class RDD[T: ClassTag](
// Clone the zero value since we will also be serializing it as part of
tasks
var jobResult = Utils.clone(zeroValue, sc.env.serializer.newInstance())
val cleanSeqOp = sc.clean(seqOp)
- val cleanCombOp = sc.clean(combOp)
- val aggregatePartition = (it: Iterator[T]) =>
it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
+ val aggregatePartition = (it: Iterator[T]) =>
it.foldLeft(zeroValue)(cleanSeqOp)
val mergeResult = (_: Int, taskResult: U) => jobResult = combOp(jobResult,
taskResult)
sc.runJob(this, aggregatePartition, mergeResult)
jobResult
@@ -1258,7 +1257,7 @@ abstract class RDD[T: ClassTag](
val cleanSeqOp = context.clean(seqOp)
val cleanCombOp = context.clean(combOp)
val aggregatePartition =
- (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp)
+ (it: Iterator[T]) => it.foldLeft(zeroValue)(cleanSeqOp)
var partiallyAggregated: RDD[U] = mapPartitions(it =>
Iterator(aggregatePartition(it)))
var numPartitions = partiallyAggregated.partitions.length
val scale = math.max(math.ceil(math.pow(numPartitions, 1.0 /
depth)).toInt, 2)
diff --git
a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
index ce46fc8f201..f08cf44e4e1 100644
---
a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
+++
b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
@@ -69,7 +69,7 @@ private[spark] object StratifiedSamplingUtils extends Logging
{
val rng = new RandomDataGenerator()
rng.reSeed(seed + partition)
val seqOp = getSeqOp(withReplacement, fractions, rng, counts)
- Iterator(iter.aggregate(zeroU)(seqOp, combOp))
+ Iterator(iter.foldLeft(zeroU)(seqOp))
}
mappedPartitionRDD.reduce(combOp)
}
diff --git
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
index cbe2776f664..2b86c7cd344 100644
---
a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
+++
b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
@@ -78,9 +78,8 @@ private[evaluation] object AreaUnderCurve {
* @param curve an iterator over ordered 2D points stored in pairs
representing a curve
*/
def of(curve: Iterable[(Double, Double)]): Double = {
- curve.iterator.sliding(2).withPartial(false).aggregate(0.0)(
- seqop = (auc: Double, points: Seq[(Double, Double)]) => auc +
trapezoid(points),
- combop = _ + _
+ curve.iterator.sliding(2).withPartial(false).foldLeft(0.0)(
+ op = (auc: Double, points: Seq[(Double, Double)]) => auc +
trapezoid(points)
)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]