This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 79cf97bc8526206e7ed5186855113c239303472b Author: Alex Herbert <[email protected]> AuthorDate: Sun Oct 15 09:43:55 2023 +0100 STATISTICS-71: Add SumOfSquares statistic --- .../statistics/descriptive/SumOfSquares.java | 116 +++++++++++++++++++++ .../statistics/descriptive/SumOfSquaresTest.java | 81 ++++++++++++++ 2 files changed, 197 insertions(+) diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquares.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquares.java new file mode 100644 index 0000000..8364a94 --- /dev/null +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquares.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +/** + * Returns the sum of the squares of the available values. Uses the following definition: + * + * <p>\[ \sum_{i=1}^n x_i^2 \] + * + * <p>where \( n \) is the number of samples. + * + * <ul> + * <li>The result is zero if no values are observed. + * <li>The result is {@code NaN} if any of the values is {@code NaN}. + * <li>The result is {@code +infinity} if any of the values is {@code infinity}, + * or the sum overflows. + * </ul> + * + * <p>This class is designed to work with (though does not require) + * {@linkplain java.util.stream streams}. + * + * <p><strong>This instance is not thread safe.</strong> + * If multiple threads access an instance of this class concurrently, + * and at least one of the threads invokes the {@link java.util.function.DoubleConsumer#accept(double) accept} or + * {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} method, it must be synchronized externally. + * + * <p>However, it is safe to use {@link java.util.function.DoubleConsumer#accept(double) accept} + * and {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} + * as {@code accumulator} and {@code combiner} functions of + * {@link java.util.stream.Collector Collector} on a parallel stream, + * because the parallel instance of {@link java.util.stream.Stream#collect Stream.collect()} + * provides the necessary partitioning, isolation, and merging of results for + * safe and efficient parallel execution. + * + * @since 1.1 + */ +public final class SumOfSquares implements DoubleStatistic, DoubleStatisticAccumulator<SumOfSquares> { + + /** Sum of squares of all values. */ + private double ss; + + /** + * Create an instance. + */ + private SumOfSquares() { + // No-op + } + + /** + * Creates an instance. + * + * <p>The initial result is zero. + * + * @return {@code SumOfSquares} instance. + */ + public static SumOfSquares create() { + return new SumOfSquares(); + } + + /** + * Returns an instance populated using the input {@code values}. + * + * <p>The result is {@code NaN} if any of the values is {@code NaN} + * or the product at any point is a {@code NaN}. + * + * <p>When the input is an empty array, the result is zero. + * + * @param values Values. + * @return {@code SumOfSquares} instance. + */ + public static SumOfSquares of(double... values) { + return Statistics.add(new SumOfSquares(), values); + } + + /** + * Updates the state of the statistic to reflect the addition of {@code value}. + * + * @param value Value. + */ + @Override + public void accept(double value) { + ss += value * value; + } + + /** + * Gets the sum of squares of all input values. + * + * <p>When no values have been added, the result is zero. + * + * @return sum of squares of all values. + */ + @Override + public double getAsDouble() { + return ss; + } + + @Override + public SumOfSquares combine(SumOfSquares other) { + ss += other.ss; + return this; + } +} diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/SumOfSquaresTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/SumOfSquaresTest.java new file mode 100644 index 0000000..69eb99c --- /dev/null +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/SumOfSquaresTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import java.util.Arrays; +import java.util.stream.Stream; +import org.apache.commons.statistics.distribution.DoubleTolerance; +import org.apache.commons.statistics.distribution.DoubleTolerances; + +/** + * Test for {@link SumOfSquares}. + */ +final class SumOfSquaresTest extends BaseDoubleStatisticTest<SumOfSquares> { + + @Override + protected SumOfSquares create() { + return SumOfSquares.create(); + } + + @Override + protected SumOfSquares create(double... values) { + return SumOfSquares.of(values); + } + + @Override + protected double getEmptyValue() { + return 0; + } + + @Override + protected double getExpectedValue(double[] values) { + // The sum is not high precision. + // Use the extended precision result from the stream sum as the reference. + return Arrays.stream(values) + .map(x -> x * x) + .sum(); + } + + @Override + protected double getExpectedNonFiniteValue(double[] values) { + return getExpectedValue(values); + } + + @Override + protected DoubleTolerance getTolerance() { + return DoubleTolerances.ulps(8); + } + + @Override + protected Stream<StatisticTestData> streamTestData() { + final Stream.Builder<StatisticTestData> builder = Stream.builder(); + // Python Numpy v1.25.1: numpy.dot(x, x) + builder.accept(addReference(30.0, DoubleTolerances.ulps(1), 1, 2, 3, 4)); + builder.accept(addReference(1422.0, DoubleTolerances.ulps(1), 5, 9, 13, 14, 10, 12, 11, 15, 19)); + // numpy.set_printoptions(precision=17) + // x = numpy.random.rand(10) + builder.accept(addReference(4.725085909331556, DoubleTolerances.ulps(5), + 0.8824732476946039, 0.5101077911923941, 0.002185806195693085, + 0.755680536591656, 0.9065277163160296, 0.006879254422025083, + 0.3942119161179829, 0.7421088683881211, 0.92742739469409, + 0.8526248074611704)); + // Matlab v2023a: sumsqr(x) + builder.accept(addReference(2885.0625, DoubleTolerances.ulps(3), 0.25, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50)); + builder.accept(addReference(2.9999999999999997e-05, DoubleTolerances.ulps(3), 0.001, 0.002, 0.003, 0.004)); + return builder.build(); + } +}
