This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 0f68f194cc92558e15ca46c933f38760bf565825 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Fri Mar 14 13:23:09 2025 +0000 STATISTICS-89: Update descriptive statistics array creation benchmark Support all the current double statistic implementations. Support creation of statistics using part of the array range. --- .../descriptive/StatisticCreationPerformance.java | 369 +++++++++++++++++++-- 1 file changed, 344 insertions(+), 25 deletions(-) diff --git a/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java index fbe88a6..f293d38 100644 --- a/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java +++ b/commons-statistics-examples/examples-jmh/src/main/java/org/apache/commons/statistics/examples/jmh/descriptive/StatisticCreationPerformance.java @@ -19,9 +19,24 @@ package org.apache.commons.statistics.examples.jmh.descriptive; import java.util.Arrays; import java.util.concurrent.TimeUnit; -import java.util.function.DoubleConsumer; import java.util.function.DoubleSupplier; +import java.util.function.Function; +import java.util.function.Supplier; import org.apache.commons.rng.simple.RandomSource; +import org.apache.commons.statistics.descriptive.DoubleStatistic; +import org.apache.commons.statistics.descriptive.GeometricMean; +import org.apache.commons.statistics.descriptive.Kurtosis; +import org.apache.commons.statistics.descriptive.Max; +import org.apache.commons.statistics.descriptive.Mean; +import org.apache.commons.statistics.descriptive.Min; +import org.apache.commons.statistics.descriptive.Product; +import org.apache.commons.statistics.descriptive.Skewness; +import org.apache.commons.statistics.descriptive.StandardDeviation; +import org.apache.commons.statistics.descriptive.Statistic; +import org.apache.commons.statistics.descriptive.Sum; +import org.apache.commons.statistics.descriptive.SumOfLogs; +import org.apache.commons.statistics.descriptive.SumOfSquares; +import org.apache.commons.statistics.descriptive.Variance; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -64,44 +79,261 @@ public class StatisticCreationPerformance { return data; } + /** + * @return the start inclusive of the sub-range. + */ + public int from() { + // Approximately 1/4 + return data.length >> 2; + } + + /** + * @return the end exclusive of the sub-range. + */ + public int to() { + // Approximately 3/4 + return (data.length >> 1) + (data.length >> 2); + } + /** * Create the data. */ @Setup(Level.Iteration) public void setup() { - // Data will be randomized per iteration - data = RandomSource.XO_RO_SHI_RO_128_PP.create().doubles(length).toArray(); + // Data will be randomized per iteration. + // Ideally the product should not underflow/overflow. + // A product of 1 would have a sum of logs of 0. + // Create a uniform sum of logs around zero and transform: + // log x in [-0.5, 0.5) => x in [0.607, 1.649) + data = RandomSource.XO_RO_SHI_RO_128_PP.create().doubles(length) + .map(x -> Math.exp(x - 0.5)).toArray(); } } /** - * A sum of {@code double} data. + * Source of a {@code Statistic}. */ - static class SimpleSum implements DoubleConsumer, DoubleSupplier { - /** The sum. */ - private double sum; + @State(Scope.Benchmark) + public static class StatisticSource { + /** The statistic to create. */ + @Param() + private Statistic statistic; + + /** Statistic factory. */ + private Supplier<DoubleStatistic> supplier; + + /** Statistic factory using input data. */ + private Function<double[], DoubleStatistic> factory; + + /** + * @return a statistic instance + */ + public DoubleStatistic create() { + return supplier.get(); + } - @Override - public void accept(double value) { - sum += value; + /** + * @param x Values. + * @return a statistic instance + */ + public DoubleStatistic create(double[] x) { + return factory.apply(x); } - @Override - public double getAsDouble() { - return sum; + /** + * Create the factory functions. + */ + @Setup(Level.Trial) + public void setup() { + switch (statistic) { + case GEOMETRIC_MEAN: + supplier = GeometricMean::create; + factory = GeometricMean::of; + break; + case KURTOSIS: + supplier = Kurtosis::create; + factory = Kurtosis::of; + break; + case MAX: + supplier = Max::create; + factory = Max::of; + break; + case MEAN: + supplier = Mean::create; + factory = Mean::of; + break; + case MIN: + supplier = Min::create; + factory = Min::of; + break; + case PRODUCT: + supplier = Product::create; + factory = Product::of; + break; + case SKEWNESS: + supplier = Skewness::create; + factory = Skewness::of; + break; + case STANDARD_DEVIATION: + supplier = StandardDeviation::create; + factory = StandardDeviation::of; + break; + case SUM: + supplier = Sum::create; + factory = Sum::of; + break; + case SUM_OF_LOGS: + supplier = SumOfLogs::create; + factory = SumOfLogs::of; + break; + case SUM_OF_SQUARES: + supplier = SumOfSquares::create; + factory = SumOfSquares::of; + break; + case VARIANCE: + supplier = Variance::create; + factory = Variance::of; + break; + default: + throw new IllegalStateException("Unsupported statistic: " + statistic); + } } } + /** + * Source of a {@code Statistic} created using a custom implementation. + * This contains alternative version of creating statistics from an array + * for benchmarking performance. + */ + @State(Scope.Benchmark) + public static class CustomStatisticSource { + /** The statistic to create. */ + @Param({"min", "product"}) + private String statistic; + + /** Statistic factory using input data. */ + private Function<double[], DoubleSupplier> factory; + + /** + * @param x Values. + * @return a statistic instance + */ + public DoubleSupplier create(double[] x) { + return factory.apply(x); + } + + /** + * Create the factory functions. + */ + @Setup(Level.Trial) + public void setup() { + if ("min".equals(statistic)) { + factory = CMin::of; + } else if ("product".equals(statistic)) { + factory = CProduct::of; + } else { + throw new IllegalStateException("Unsupported custom statistic: " + statistic); + } + } + + /** Compute the minimum. */ + static final class CMin implements DoubleSupplier { + /** Current statistic. */ + private double s; + + /** + * Create an instance. + * @param s Statistic value. + */ + private CMin(double s) { + this.s = s; + } + + /** + * @param values Values. + * @return instance. + */ + static CMin of(double... values) { + double s = Double.POSITIVE_INFINITY; + for (final double x : values) { + s = Math.min(s, x); + } + return new CMin(s); + } + + @Override + public double getAsDouble() { + return s; + } + } + + /** Compute the product. */ + static final class CProduct implements DoubleSupplier { + /** Current statistic. */ + private double s; + + /** + * Create an instance. + * @param s Statistic value. + */ + private CProduct(double s) { + this.s = s; + } + + /** + * @param values Values. + * @return instance. + */ + static CProduct of(double... values) { + double s = 1; + for (final double x : values) { + s *= x; + } + return new CProduct(s); + } + + @Override + public double getAsDouble() { + return s; + } + } + } + + /** + * Create the statistic using an array. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double array(DataSource dataSource, StatisticSource statisticSource) { + return statisticSource.create(dataSource.getData()).getAsDouble(); + } + + /** + * Create the statistic using an array. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double customArray(DataSource dataSource, CustomStatisticSource statisticSource) { + return statisticSource.create(dataSource.getData()).getAsDouble(); + } + /** * Create the statistic using a for loop. * - * @param source Source of the data. + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. * @return the statistic */ @Benchmark - public double forLoop(DataSource source) { - final double[] data = source.getData(); - final SimpleSum s = new SimpleSum(); + public double forLoop(DataSource dataSource, StatisticSource statisticSource) { + final double[] data = dataSource.getData(); + final DoubleStatistic s = statisticSource.create(); for (int i = 0; i < data.length; i++) { s.accept(data[i]); } @@ -111,13 +343,14 @@ public class StatisticCreationPerformance { /** * Create the statistic using a for-each loop. * - * @param source Source of the data. + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. * @return the statistic */ @Benchmark - public double forEachLoop(DataSource source) { - final double[] data = source.getData(); - final SimpleSum s = new SimpleSum(); + public double forEachLoop(DataSource dataSource, StatisticSource statisticSource) { + final double[] data = dataSource.getData(); + final DoubleStatistic s = statisticSource.create(); for (final double x : data) { s.accept(x); } @@ -127,14 +360,100 @@ public class StatisticCreationPerformance { /** * Create the statistic using a stream. * - * @param source Source of the data. + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. * @return the statistic */ @Benchmark - public double streamForEach(DataSource source) { - final double[] data = source.getData(); - final SimpleSum s = new SimpleSum(); + public double streamForEach(DataSource dataSource, StatisticSource statisticSource) { + final double[] data = dataSource.getData(); + final DoubleStatistic s = statisticSource.create(); Arrays.stream(data).forEach(s::accept); return s.getAsDouble(); } + + /** + * Create the statistic using an array. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double arrayRange(DataSource dataSource, StatisticSource statisticSource) { + final int from = dataSource.from(); + final int to = dataSource.to(); + final double[] data = Arrays.copyOfRange(dataSource.getData(), from, to); + return statisticSource.create(data).getAsDouble(); + } + + /** + * Create the statistic using an array. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double customArrayRange(DataSource dataSource, CustomStatisticSource statisticSource) { + final int from = dataSource.from(); + final int to = dataSource.to(); + final double[] data = Arrays.copyOfRange(dataSource.getData(), from, to); + return statisticSource.create(data).getAsDouble(); + } + + /** + * Create the statistic using a for loop on a range of the data. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double forLoopRange(DataSource dataSource, StatisticSource statisticSource) { + final int from = dataSource.from(); + final int to = dataSource.to(); + final double[] data = dataSource.getData(); + final DoubleStatistic s = statisticSource.create(); + for (int i = from; i < to; i++) { + s.accept(data[i]); + } + return s.getAsDouble(); + } + + /** + * Create the statistic using a for-each loop on a range of the data. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double forEachLoopRange(DataSource dataSource, StatisticSource statisticSource) { + final int from = dataSource.from(); + final int to = dataSource.to(); + final double[] data = Arrays.copyOfRange(dataSource.getData(), from, to); + final DoubleStatistic s = statisticSource.create(); + for (final double x : data) { + s.accept(x); + } + return s.getAsDouble(); + } + + /** + * Create the statistic using a stream on a range of the data. + * + * @param dataSource Source of the data. + * @param statisticSource Source of the statistic. + * @return the statistic + */ + @Benchmark + public double streamForEachRange(DataSource dataSource, StatisticSource statisticSource) { + final int from = dataSource.from(); + final int to = dataSource.to(); + final double[] data = dataSource.getData(); + final DoubleStatistic s = statisticSource.create(); + Arrays.stream(data, from, to).forEach(s::accept); + return s.getAsDouble(); + } }