This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git

commit 0f68f194cc92558e15ca46c933f38760bf565825
Author: Alex Herbert <aherb...@apache.org>
AuthorDate: Fri Mar 14 13:23:09 2025 +0000

    STATISTICS-89: Update descriptive statistics array creation benchmark
    
    Support all the current double statistic implementations. Support
    creation of statistics using part of the array range.
---
 .../descriptive/StatisticCreationPerformance.java  | 369 +++++++++++++++++++--
 1 file changed, 344 insertions(+), 25 deletions(-)

diff --git 
a/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java
 
b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java
index fbe88a6..f293d38 100644
--- 
a/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java
+++ 
b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java
@@ -19,9 +19,24 @@ package 
org.apache.commons.statistics.examples.jmh.descriptive;
 
 import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
-import java.util.function.DoubleConsumer;
 import java.util.function.DoubleSupplier;
+import java.util.function.Function;
+import java.util.function.Supplier;
 import org.apache.commons.rng.simple.RandomSource;
+import org.apache.commons.statistics.descriptive.DoubleStatistic;
+import org.apache.commons.statistics.descriptive.GeometricMean;
+import org.apache.commons.statistics.descriptive.Kurtosis;
+import org.apache.commons.statistics.descriptive.Max;
+import org.apache.commons.statistics.descriptive.Mean;
+import org.apache.commons.statistics.descriptive.Min;
+import org.apache.commons.statistics.descriptive.Product;
+import org.apache.commons.statistics.descriptive.Skewness;
+import org.apache.commons.statistics.descriptive.StandardDeviation;
+import org.apache.commons.statistics.descriptive.Statistic;
+import org.apache.commons.statistics.descriptive.Sum;
+import org.apache.commons.statistics.descriptive.SumOfLogs;
+import org.apache.commons.statistics.descriptive.SumOfSquares;
+import org.apache.commons.statistics.descriptive.Variance;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@@ -64,44 +79,261 @@ public class StatisticCreationPerformance {
             return data;
         }
 
+        /**
+         * @return the start inclusive of the sub-range.
+         */
+        public int from() {
+            // Approximately 1/4
+            return data.length >> 2;
+        }
+
+        /**
+         * @return the end exclusive of the sub-range.
+         */
+        public int to() {
+            // Approximately 3/4
+            return (data.length >> 1) + (data.length >> 2);
+        }
+
         /**
          * Create the data.
          */
         @Setup(Level.Iteration)
         public void setup() {
-            // Data will be randomized per iteration
-            data = 
RandomSource.XO_RO_SHI_RO_128_PP.create().doubles(length).toArray();
+            // Data will be randomized per iteration.
+            // Ideally the product should not underflow/overflow.
+            // A product of 1 would have a sum of logs of 0.
+            // Create a uniform sum of logs around zero and transform:
+            // log x in [-0.5, 0.5) => x in [0.607, 1.649)
+            data = RandomSource.XO_RO_SHI_RO_128_PP.create().doubles(length)
+                .map(x -> Math.exp(x - 0.5)).toArray();
         }
     }
 
     /**
-     * A sum of {@code double} data.
+     * Source of a {@code Statistic}.
      */
-    static class SimpleSum implements DoubleConsumer, DoubleSupplier {
-        /** The sum. */
-        private double sum;
+    @State(Scope.Benchmark)
+    public static class StatisticSource {
+        /** The statistic to create. */
+        @Param()
+        private Statistic statistic;
+
+        /** Statistic factory. */
+        private Supplier<DoubleStatistic> supplier;
+
+        /** Statistic factory using input data. */
+        private Function<double[], DoubleStatistic> factory;
+
+        /**
+         * @return a statistic instance
+         */
+        public DoubleStatistic create() {
+            return supplier.get();
+        }
 
-        @Override
-        public void accept(double value) {
-            sum += value;
+        /**
+         * @param x Values.
+         * @return a statistic instance
+         */
+        public DoubleStatistic create(double[] x) {
+            return factory.apply(x);
         }
 
-        @Override
-        public double getAsDouble() {
-            return sum;
+        /**
+         * Create the factory functions.
+         */
+        @Setup(Level.Trial)
+        public void setup() {
+            switch (statistic) {
+            case GEOMETRIC_MEAN:
+                supplier = GeometricMean::create;
+                factory = GeometricMean::of;
+                break;
+            case KURTOSIS:
+                supplier = Kurtosis::create;
+                factory = Kurtosis::of;
+                break;
+            case MAX:
+                supplier = Max::create;
+                factory = Max::of;
+                break;
+            case MEAN:
+                supplier = Mean::create;
+                factory = Mean::of;
+                break;
+            case MIN:
+                supplier = Min::create;
+                factory = Min::of;
+                break;
+            case PRODUCT:
+                supplier = Product::create;
+                factory = Product::of;
+                break;
+            case SKEWNESS:
+                supplier = Skewness::create;
+                factory = Skewness::of;
+                break;
+            case STANDARD_DEVIATION:
+                supplier = StandardDeviation::create;
+                factory = StandardDeviation::of;
+                break;
+            case SUM:
+                supplier = Sum::create;
+                factory = Sum::of;
+                break;
+            case SUM_OF_LOGS:
+                supplier = SumOfLogs::create;
+                factory = SumOfLogs::of;
+                break;
+            case SUM_OF_SQUARES:
+                supplier = SumOfSquares::create;
+                factory = SumOfSquares::of;
+                break;
+            case VARIANCE:
+                supplier = Variance::create;
+                factory = Variance::of;
+                break;
+            default:
+                throw new IllegalStateException("Unsupported statistic: " + 
statistic);
+            }
         }
     }
 
+    /**
+     * Source of a {@code Statistic} created using a custom implementation.
+     * This contains alternative version of creating statistics from an array
+     * for benchmarking performance.
+     */
+    @State(Scope.Benchmark)
+    public static class CustomStatisticSource {
+        /** The statistic to create. */
+        @Param({"min", "product"})
+        private String statistic;
+
+        /** Statistic factory using input data. */
+        private Function<double[], DoubleSupplier> factory;
+
+        /**
+         * @param x Values.
+         * @return a statistic instance
+         */
+        public DoubleSupplier create(double[] x) {
+            return factory.apply(x);
+        }
+
+        /**
+         * Create the factory functions.
+         */
+        @Setup(Level.Trial)
+        public void setup() {
+            if ("min".equals(statistic)) {
+                factory = CMin::of;
+            } else if ("product".equals(statistic)) {
+                factory = CProduct::of;
+            } else {
+                throw new IllegalStateException("Unsupported custom statistic: 
" + statistic);
+            }
+        }
+
+        /** Compute the minimum. */
+        static final class CMin implements DoubleSupplier {
+            /** Current statistic. */
+            private double s;
+
+            /**
+             * Create an instance.
+             * @param s Statistic value.
+             */
+            private CMin(double s) {
+                this.s = s;
+            }
+
+            /**
+             * @param values Values.
+             * @return instance.
+             */
+            static CMin of(double... values) {
+                double s = Double.POSITIVE_INFINITY;
+                for (final double x : values) {
+                    s = Math.min(s, x);
+                }
+                return new CMin(s);
+            }
+
+            @Override
+            public double getAsDouble() {
+                return s;
+            }
+        }
+
+        /** Compute the product. */
+        static final class CProduct implements DoubleSupplier {
+            /** Current statistic. */
+            private double s;
+
+            /**
+             * Create an instance.
+             * @param s Statistic value.
+             */
+            private CProduct(double s) {
+                this.s = s;
+            }
+
+            /**
+             * @param values Values.
+             * @return instance.
+             */
+            static CProduct of(double... values) {
+                double s = 1;
+                for (final double x : values) {
+                    s *= x;
+                }
+                return new CProduct(s);
+            }
+
+            @Override
+            public double getAsDouble() {
+                return s;
+            }
+        }
+    }
+
+    /**
+     * Create the statistic using an array.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double array(DataSource dataSource, StatisticSource 
statisticSource) {
+        return statisticSource.create(dataSource.getData()).getAsDouble();
+    }
+
+    /**
+     * Create the statistic using an array.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double customArray(DataSource dataSource, CustomStatisticSource 
statisticSource) {
+        return statisticSource.create(dataSource.getData()).getAsDouble();
+    }
+
     /**
      * Create the statistic using a for loop.
      *
-     * @param source Source of the data.
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
      * @return the statistic
      */
     @Benchmark
-    public double forLoop(DataSource source) {
-        final double[] data = source.getData();
-        final SimpleSum s = new SimpleSum();
+    public double forLoop(DataSource dataSource, StatisticSource 
statisticSource) {
+        final double[] data = dataSource.getData();
+        final DoubleStatistic s = statisticSource.create();
         for (int i = 0; i < data.length; i++) {
             s.accept(data[i]);
         }
@@ -111,13 +343,14 @@ public class StatisticCreationPerformance {
     /**
      * Create the statistic using a for-each loop.
      *
-     * @param source Source of the data.
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
      * @return the statistic
      */
     @Benchmark
-    public double forEachLoop(DataSource source) {
-        final double[] data = source.getData();
-        final SimpleSum s = new SimpleSum();
+    public double forEachLoop(DataSource dataSource, StatisticSource 
statisticSource) {
+        final double[] data = dataSource.getData();
+        final DoubleStatistic s = statisticSource.create();
         for (final double x : data) {
             s.accept(x);
         }
@@ -127,14 +360,100 @@ public class StatisticCreationPerformance {
     /**
      * Create the statistic using a stream.
      *
-     * @param source Source of the data.
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
      * @return the statistic
      */
     @Benchmark
-    public double streamForEach(DataSource source) {
-        final double[] data = source.getData();
-        final SimpleSum s = new SimpleSum();
+    public double streamForEach(DataSource dataSource, StatisticSource 
statisticSource) {
+        final double[] data = dataSource.getData();
+        final DoubleStatistic s = statisticSource.create();
         Arrays.stream(data).forEach(s::accept);
         return s.getAsDouble();
     }
+
+    /**
+     * Create the statistic using an array.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double arrayRange(DataSource dataSource, StatisticSource 
statisticSource) {
+        final int from = dataSource.from();
+        final int to = dataSource.to();
+        final double[] data = Arrays.copyOfRange(dataSource.getData(), from, 
to);
+        return statisticSource.create(data).getAsDouble();
+    }
+
+    /**
+     * Create the statistic using an array.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double customArrayRange(DataSource dataSource, 
CustomStatisticSource statisticSource) {
+        final int from = dataSource.from();
+        final int to = dataSource.to();
+        final double[] data = Arrays.copyOfRange(dataSource.getData(), from, 
to);
+        return statisticSource.create(data).getAsDouble();
+    }
+
+    /**
+     * Create the statistic using a for loop on a range of the data.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double forLoopRange(DataSource dataSource, StatisticSource 
statisticSource) {
+        final int from = dataSource.from();
+        final int to = dataSource.to();
+        final double[] data = dataSource.getData();
+        final DoubleStatistic s = statisticSource.create();
+        for (int i = from; i < to; i++) {
+            s.accept(data[i]);
+        }
+        return s.getAsDouble();
+    }
+
+    /**
+     * Create the statistic using a for-each loop on a range of the data.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double forEachLoopRange(DataSource dataSource, StatisticSource 
statisticSource) {
+        final int from = dataSource.from();
+        final int to = dataSource.to();
+        final double[] data = Arrays.copyOfRange(dataSource.getData(), from, 
to);
+        final DoubleStatistic s = statisticSource.create();
+        for (final double x : data) {
+            s.accept(x);
+        }
+        return s.getAsDouble();
+    }
+
+    /**
+     * Create the statistic using a stream on a range of the data.
+     *
+     * @param dataSource Source of the data.
+     * @param statisticSource Source of the statistic.
+     * @return the statistic
+     */
+    @Benchmark
+    public double streamForEachRange(DataSource dataSource, StatisticSource 
statisticSource) {
+        final int from = dataSource.from();
+        final int to = dataSource.to();
+        final double[] data = dataSource.getData();
+        final DoubleStatistic s = statisticSource.create();
+        Arrays.stream(data, from, to).forEach(s::accept);
+        return s.getAsDouble();
+    }
 }

Reply via email to