This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1b7df31fa9e8 [SPARK-52406][CORE][TESTS] Move the test case named
"benchmark" from `PercentileHeapSuite` to `PercentileHeapBenchmark`
1b7df31fa9e8 is described below
commit 1b7df31fa9e81319cde30b78ccdc979c762ad662
Author: yangjie01 <[email protected]>
AuthorDate: Sun Jun 8 15:37:58 2025 +0800
[SPARK-52406][CORE][TESTS] Move the test case named "benchmark" from
`PercentileHeapSuite` to `PercentileHeapBenchmark`
### What changes were proposed in this pull request?
This pr moved a test case named `benchmark` (which was previously ignored)
from the `PercentileHeapSuite` to a separate file called
`PercentileHeapBenchmark`. At the same time, it cleaned up the original test
case code.
Compared to the original test, the new benchmark expands more test
combination scenarios across two dimensions: `Percentile` and `Input Size`.
### Why are the changes needed?
These changes are beneficial for continuously ensuring the performance of
this `benchmark` scenario.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- Pass GitHub Actions.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51099 from LuciferYang/PercentileHeap-bench.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../PercentileHeapBenchmark-jdk21-results.txt | 41 ++++++++++++
.../benchmarks/PercentileHeapBenchmark-results.txt | 41 ++++++++++++
.../util/collection/PercentileHeapBenchmark.scala | 74 ++++++++++++++++++++++
.../util/collection/PercentileHeapSuite.scala | 24 -------
4 files changed, 156 insertions(+), 24 deletions(-)
diff --git a/core/benchmarks/PercentileHeapBenchmark-jdk21-results.txt
b/core/benchmarks/PercentileHeapBenchmark-jdk21-results.txt
new file mode 100644
index 000000000000..32dac72c2fca
--- /dev/null
+++ b/core/benchmarks/PercentileHeapBenchmark-jdk21-results.txt
@@ -0,0 +1,41 @@
+================================================================================================
+PercentileHeap Operations
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 10000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 59 59
0 0.2 5916.3 1.0X
+Percentile: 0.9 59 59
0 0.2 5871.2 1.0X
+Percentile: 0.95 59 59
0 0.2 5866.8 1.0X
+Percentile: 0.99 59 59
1 0.2 5861.6 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 50000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 1465 1466
1 0.0 29294.4 1.0X
+Percentile: 0.9 1459 1461
3 0.0 29170.8 1.0X
+Percentile: 0.95 1456 1458
2 0.0 29127.4 1.0X
+Percentile: 0.99 1455 1458
2 0.0 29106.2 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 100000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 5834 5836
2 0.0 58336.8 1.0X
+Percentile: 0.9 5830 5838
9 0.0 58295.2 1.0X
+Percentile: 0.95 5830 5832
2 0.0 58302.2 1.0X
+Percentile: 0.99 5819 5822
3 0.0 58190.4 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 200000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 23300 23326
31 0.0 116497.8 1.0X
+Percentile: 0.9 23303 23336
31 0.0 116515.2 1.0X
+Percentile: 0.95 23297 23328
35 0.0 116483.0 1.0X
+Percentile: 0.99 23276 23292
15 0.0 116381.6 1.0X
+
+
diff --git a/core/benchmarks/PercentileHeapBenchmark-results.txt
b/core/benchmarks/PercentileHeapBenchmark-results.txt
new file mode 100644
index 000000000000..1c0f4761ee14
--- /dev/null
+++ b/core/benchmarks/PercentileHeapBenchmark-results.txt
@@ -0,0 +1,41 @@
+================================================================================================
+PercentileHeap Operations
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 10000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 1 1
0 10.0 100.4 1.0X
+Percentile: 0.9 1 1
0 18.0 55.6 1.8X
+Percentile: 0.95 0 0
0 23.2 43.2 2.3X
+Percentile: 0.99 0 0
0 31.1 32.1 3.1X
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 50000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 5 6
0 9.2 108.2 1.0X
+Percentile: 0.9 3 3
0 15.7 63.8 1.7X
+Percentile: 0.95 2 2
0 20.4 49.0 2.2X
+Percentile: 0.99 2 2
0 29.4 34.0 3.2X
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 100000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 12 13
0 8.2 122.5 1.0X
+Percentile: 0.9 6 7
0 15.6 64.3 1.9X
+Percentile: 0.95 5 5
0 20.9 47.7 2.6X
+Percentile: 0.99 4 4
0 27.9 35.8 3.4X
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 200000: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5 26 26
1 7.7 129.7 1.0X
+Percentile: 0.9 13 14
1 14.9 67.3 1.9X
+Percentile: 0.95 10 10
0 19.6 51.0 2.5X
+Percentile: 0.99 7 7
0 28.4 35.2 3.7X
+
+
diff --git
a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapBenchmark.scala
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapBenchmark.scala
new file mode 100644
index 000000000000..4d88d8336331
--- /dev/null
+++
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapBenchmark.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.util.Random
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Benchmark for PercentileHeap performance.
+ * Measures heap insertion and percentile calculation performance
+ * under various heap sizes and percentile values.
+ * {{{
+ * To run this benchmark:
+ * 1. without sbt: bin/spark-submit --class <this class> <spark core test
jar>
+ * 2. build/sbt "core/Test/runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
"core/Test/runMain <this class>"
+ * Results will be written to
"benchmarks/PercentileHeapBenchmark-results.txt".
+ * }}}
+ */
+object PercentileHeapBenchmark extends BenchmarkBase {
+
+ override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+
+ runBenchmark("PercentileHeap Operations") {
+ percentileHeapBenchmark()
+ }
+ }
+
+ private def percentileHeapBenchmark(): Unit = {
+
+ for (inputSize <- Seq(10000, 50000, 100000, 200000)) {
+ val benchmark = new Benchmark(s"PercentileHeap Operations - Input Size:
$inputSize",
+ inputSize, output = output)
+ for (percentile <- Seq(0.5, 0.9, 0.95, 0.99)) {
+ benchmark.addTimerCase(s"Percentile: $percentile", 3) { timer =>
+ performPercentileHeapOperations(inputSize, percentile, timer)
+ }
+ }
+ benchmark.run()
+ }
+ }
+
+ private def performPercentileHeapOperations(
+ inputSize: Int, percentile: Double, timer: Benchmark.Timer): Unit = {
+ val input: Seq[Int] = 0 until inputSize
+ val shuffled = Random.shuffle(input).toArray
+ val h = new PercentileHeap(percentile)
+
+ timer.startTiming()
+ shuffled.foreach { x =>
+ h.insert(x)
+ for (_ <- 0 until h.size()) {
+ h.percentile()
+ }
+ }
+ timer.stopTiming()
+ }
+}
diff --git
a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
index 20def45a17c1..41e40f77e6ec 100644
---
a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
+++
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
@@ -57,28 +57,4 @@ class PercentileHeapSuite extends SparkFunSuite {
}
}
}
-
- ignore("benchmark") {
- val input: Seq[Int] = 0 until 1000
- val numRuns = 1000
-
- def kernel(): Long = {
- val shuffled = Random.shuffle(input).toArray
- val start = System.nanoTime()
- val h = new PercentileHeap(0.95)
- shuffled.foreach { x =>
- h.insert(x)
- for (_ <- 0 until h.size()) h.percentile()
- }
- System.nanoTime() - start
- }
- for (_ <- 0 until numRuns) kernel() // warmup
-
- var elapsed: Long = 0
- for (_ <- 0 until numRuns) elapsed += kernel()
- val perOp = elapsed / (numRuns * input.length)
- // scalastyle:off println
- println(s"$perOp ns per op on heaps of size ${input.length}")
- // scalastyle:on println
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]