This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b7df31fa9e8 [SPARK-52406][CORE][TESTS] Move the test case named 
"benchmark" from `PercentileHeapSuite` to `PercentileHeapBenchmark`
1b7df31fa9e8 is described below

commit 1b7df31fa9e81319cde30b78ccdc979c762ad662
Author: yangjie01 <[email protected]>
AuthorDate: Sun Jun 8 15:37:58 2025 +0800

    [SPARK-52406][CORE][TESTS] Move the test case named "benchmark" from 
`PercentileHeapSuite` to `PercentileHeapBenchmark`
    
    ### What changes were proposed in this pull request?
    
    This pr moved a test case named `benchmark` (which was previously ignored) 
from the `PercentileHeapSuite` to a separate file called 
`PercentileHeapBenchmark`. At the same time, it cleaned up the original test 
case code.
    
    Compared to the original test, the new benchmark expands more test 
combination scenarios across two dimensions: `Percentile` and `Input Size`.
    
    ### Why are the changes needed?
    These changes are beneficial for continuously ensuring the performance of 
this `benchmark` scenario.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    - Pass GitHub Actions.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #51099 from LuciferYang/PercentileHeap-bench.
    
    Authored-by: yangjie01 <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 .../PercentileHeapBenchmark-jdk21-results.txt      | 41 ++++++++++++
 .../benchmarks/PercentileHeapBenchmark-results.txt | 41 ++++++++++++
 .../util/collection/PercentileHeapBenchmark.scala  | 74 ++++++++++++++++++++++
 .../util/collection/PercentileHeapSuite.scala      | 24 -------
 4 files changed, 156 insertions(+), 24 deletions(-)

diff --git a/core/benchmarks/PercentileHeapBenchmark-jdk21-results.txt 
b/core/benchmarks/PercentileHeapBenchmark-jdk21-results.txt
new file mode 100644
index 000000000000..32dac72c2fca
--- /dev/null
+++ b/core/benchmarks/PercentileHeapBenchmark-jdk21-results.txt
@@ -0,0 +1,41 @@
+================================================================================================
+PercentileHeap Operations
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 10000:  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                           59             59    
       0          0.2        5916.3       1.0X
+Percentile: 0.9                                           59             59    
       0          0.2        5871.2       1.0X
+Percentile: 0.95                                          59             59    
       0          0.2        5866.8       1.0X
+Percentile: 0.99                                          59             59    
       1          0.2        5861.6       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 50000:  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                         1465           1466    
       1          0.0       29294.4       1.0X
+Percentile: 0.9                                         1459           1461    
       3          0.0       29170.8       1.0X
+Percentile: 0.95                                        1456           1458    
       2          0.0       29127.4       1.0X
+Percentile: 0.99                                        1455           1458    
       2          0.0       29106.2       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 100000:  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                          5834           5836   
        2          0.0       58336.8       1.0X
+Percentile: 0.9                                          5830           5838   
        9          0.0       58295.2       1.0X
+Percentile: 0.95                                         5830           5832   
        2          0.0       58302.2       1.0X
+Percentile: 0.99                                         5819           5822   
        3          0.0       58190.4       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 200000:  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                         23300          23326   
       31          0.0      116497.8       1.0X
+Percentile: 0.9                                         23303          23336   
       31          0.0      116515.2       1.0X
+Percentile: 0.95                                        23297          23328   
       35          0.0      116483.0       1.0X
+Percentile: 0.99                                        23276          23292   
       15          0.0      116381.6       1.0X
+
+
diff --git a/core/benchmarks/PercentileHeapBenchmark-results.txt 
b/core/benchmarks/PercentileHeapBenchmark-results.txt
new file mode 100644
index 000000000000..1c0f4761ee14
--- /dev/null
+++ b/core/benchmarks/PercentileHeapBenchmark-results.txt
@@ -0,0 +1,41 @@
+================================================================================================
+PercentileHeap Operations
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 10000:  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                            1              1    
       0         10.0         100.4       1.0X
+Percentile: 0.9                                            1              1    
       0         18.0          55.6       1.8X
+Percentile: 0.95                                           0              0    
       0         23.2          43.2       2.3X
+Percentile: 0.99                                           0              0    
       0         31.1          32.1       3.1X
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 50000:  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                            5              6    
       0          9.2         108.2       1.0X
+Percentile: 0.9                                            3              3    
       0         15.7          63.8       1.7X
+Percentile: 0.95                                           2              2    
       0         20.4          49.0       2.2X
+Percentile: 0.99                                           2              2    
       0         29.4          34.0       3.2X
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 100000:  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                            12             13   
        0          8.2         122.5       1.0X
+Percentile: 0.9                                             6              7   
        0         15.6          64.3       1.9X
+Percentile: 0.95                                            5              5   
        0         20.9          47.7       2.6X
+Percentile: 0.99                                            4              4   
        0         27.9          35.8       3.4X
+
+OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.11.0-1015-azure
+AMD EPYC 7763 64-Core Processor
+PercentileHeap Operations - Input Size: 200000:  Best Time(ms)   Avg Time(ms)  
 Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Percentile: 0.5                                            26             26   
        1          7.7         129.7       1.0X
+Percentile: 0.9                                            13             14   
        1         14.9          67.3       1.9X
+Percentile: 0.95                                           10             10   
        0         19.6          51.0       2.5X
+Percentile: 0.99                                            7              7   
        0         28.4          35.2       3.7X
+
+
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapBenchmark.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapBenchmark.scala
new file mode 100644
index 000000000000..4d88d8336331
--- /dev/null
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapBenchmark.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.util.Random
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Benchmark for PercentileHeap performance.
+ * Measures heap insertion and percentile calculation performance
+ * under various heap sizes and percentile values.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class> <spark core test 
jar>
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"core/Test/runMain <this class>"
+ *      Results will be written to 
"benchmarks/PercentileHeapBenchmark-results.txt".
+ * }}}
+ */
+object PercentileHeapBenchmark extends BenchmarkBase {
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+
+    runBenchmark("PercentileHeap Operations") {
+      percentileHeapBenchmark()
+    }
+  }
+
+  private def percentileHeapBenchmark(): Unit = {
+
+    for (inputSize <- Seq(10000, 50000, 100000, 200000)) {
+      val benchmark = new Benchmark(s"PercentileHeap Operations - Input Size: 
$inputSize",
+        inputSize, output = output)
+      for (percentile <- Seq(0.5, 0.9, 0.95, 0.99)) {
+        benchmark.addTimerCase(s"Percentile: $percentile", 3) { timer =>
+          performPercentileHeapOperations(inputSize, percentile, timer)
+        }
+      }
+      benchmark.run()
+    }
+  }
+
+  private def performPercentileHeapOperations(
+       inputSize: Int, percentile: Double, timer: Benchmark.Timer): Unit = {
+    val input: Seq[Int] = 0 until inputSize
+    val shuffled = Random.shuffle(input).toArray
+    val h = new PercentileHeap(percentile)
+
+    timer.startTiming()
+    shuffled.foreach { x =>
+      h.insert(x)
+      for (_ <- 0 until h.size()) {
+        h.percentile()
+      }
+    }
+    timer.stopTiming()
+  }
+}
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
index 20def45a17c1..41e40f77e6ec 100644
--- 
a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
@@ -57,28 +57,4 @@ class PercentileHeapSuite extends SparkFunSuite {
       }
     }
   }
-
-  ignore("benchmark") {
-    val input: Seq[Int] = 0 until 1000
-    val numRuns = 1000
-
-    def kernel(): Long = {
-      val shuffled = Random.shuffle(input).toArray
-      val start = System.nanoTime()
-      val h = new PercentileHeap(0.95)
-      shuffled.foreach { x =>
-        h.insert(x)
-        for (_ <- 0 until h.size()) h.percentile()
-      }
-      System.nanoTime() - start
-    }
-    for (_ <- 0 until numRuns) kernel()  // warmup
-
-    var elapsed: Long = 0
-    for (_ <- 0 until numRuns) elapsed += kernel()
-    val perOp = elapsed / (numRuns * input.length)
-    // scalastyle:off println
-    println(s"$perOp ns per op on heaps of size ${input.length}")
-    // scalastyle:on println
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to