This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 868dd707708750211034b2acab80c7159ed01e90 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Wed Dec 27 08:55:27 2023 +0000 STATISTICS-81: Add integer standard deviation implementation --- ...{IntVariance.java => IntStandardDeviation.java} | 109 ++++++---------- .../statistics/descriptive/IntVariance.java | 18 ++- ...ongVariance.java => LongStandardDeviation.java} | 99 +++++---------- .../statistics/descriptive/LongVariance.java | 20 ++- ...anceTest.java => IntStandardDeviationTest.java} | 131 ++++++++++---------- .../statistics/descriptive/IntVarianceTest.java | 53 ++++---- ...nceTest.java => LongStandardDeviationTest.java} | 137 +++++++++++---------- .../statistics/descriptive/LongVarianceTest.java | 53 ++++---- .../descriptive/StandardDeviationTest.java | 20 ++- 9 files changed, 300 insertions(+), 340 deletions(-) diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntStandardDeviation.java similarity index 61% copy from commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java copy to commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntStandardDeviation.java index 70d108b..9460df5 100644 --- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntStandardDeviation.java @@ -16,13 +16,11 @@ */ package org.apache.commons.statistics.descriptive; -import java.math.BigInteger; - /** - * Computes the variance of the available values. The default implementation uses the - * following definition of the <em>sample variance</em>: + * Computes the standard deviation of the available values. The default implementation uses the + * following definition of the <em>sample standard deviation</em>: * - * <p>\[ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 \] + * <p>\[ \sqrt{ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 } \] * * <p>where \( \overline{x} \) is the sample mean, and \( n \) is the number of samples. * @@ -31,10 +29,13 @@ import java.math.BigInteger; * <li>The result is zero if there is one value in the data set. * </ul> * - * <p>The use of the term \( n − 1 \) is called Bessel's correction. This is an unbiased - * estimator of the variance of a hypothetical infinite population. If the + * <p>The use of the term \( n − 1 \) is called Bessel's correction. Omitting the square root, + * this provides an unbiased estimator of the variance of a hypothetical infinite population. If the * {@link #setBiased(boolean) biased} option is enabled the normalisation factor is * changed to \( \frac{1}{n} \) for a biased estimator of the <em>sample variance</em>. + * Note however that square root is a concave function and thus introduces negative bias + * (by Jensen's inequality), which depends on the distribution, and thus the corrected sample + * standard deviation (using Bessel's correction) is less biased, but still biased. * * <p>The implementation uses an exact integer sum to compute the scaled (by \( n \)) * sum of squared deviations from the mean; this is normalised by the scaled correction factor. @@ -60,16 +61,13 @@ import java.math.BigInteger; * provides the necessary partitioning, isolation, and merging of results for * safe and efficient parallel execution. * - * @see <a href="https://en.wikipedia.org/wiki/variance">variance (Wikipedia)</a> - * @see <a href="https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance"> - * Algorithms for computing the variance (Wikipedia)</a> + * @see <a href="https://en.wikipedia.org/wiki/Standard_deviation">Standard deviation (Wikipedia)</a> * @see <a href="https://en.wikipedia.org/wiki/Bessel%27s_correction">Bessel's correction</a> + * @see <a href="https://en.wikipedia.org/wiki/Jensen%27s_inequality">Jensen's inequality</a> + * @see IntVariance * @since 1.1 */ -public final class IntVariance implements IntStatistic, StatisticAccumulator<IntVariance> { - /** Small array sample size. - * Used to avoid computing with UInt96 then converting to UInt128. */ - private static final int SMALL_SAMPLE = 10; +public final class IntStandardDeviation implements IntStatistic, StatisticAccumulator<IntStandardDeviation> { /** Sum of the squared values. */ private final UInt128 sumSq; @@ -84,7 +82,7 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int /** * Create an instance. */ - private IntVariance() { + private IntStandardDeviation() { this(UInt128.create(), Int128.create(), 0); } @@ -95,7 +93,7 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int * @param sum Sum of the values. * @param n Count of values that have been added. */ - private IntVariance(UInt128 sumSq, Int128 sum, int n) { + private IntStandardDeviation(UInt128 sumSq, Int128 sum, int n) { this.sumSq = sumSq; this.sum = sum; this.n = n; @@ -106,22 +104,22 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int * * <p>The initial result is {@code NaN}. * - * @return {@code IntVariance} instance. + * @return {@code IntStandardDeviation} instance. */ - public static IntVariance create() { - return new IntVariance(); + public static IntStandardDeviation create() { + return new IntStandardDeviation(); } /** * Returns an instance populated using the input {@code values}. * * @param values Values. - * @return {@code IntVariance} instance. + * @return {@code IntStandardDeviation} instance. */ - public static IntVariance of(int... values) { + public static IntStandardDeviation of(int... values) { // Small arrays can be processed using the object - if (values.length < SMALL_SAMPLE) { - final IntVariance stat = new IntVariance(); + if (values.length < IntVariance.SMALL_SAMPLE) { + final IntStandardDeviation stat = new IntStandardDeviation(); for (final int x : values) { stat.accept(x); } @@ -148,7 +146,7 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int } // Convert - return new IntVariance(UInt128.of(ss), Int128.of(s), values.length); + return new IntStandardDeviation(UInt128.of(ss), Int128.of(s), values.length); } /** @@ -164,11 +162,11 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int } /** - * Gets the variance of all input values. + * Gets the standard deviation of all input values. * * <p>When no values have been added, the result is {@code NaN}. * - * @return variance of all values. + * @return standard deviation of all values. */ @Override public double getAsDouble() { @@ -179,42 +177,11 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int if (n == 1) { return 0; } - final long n0 = biased ? n : n - 1; - - // Sum-of-squared deviations: sum(x^2) - sum(x)^2 / n - // Sum-of-squared deviations precursor: n * sum(x^2) - sum(x)^2 - // The precursor is computed in integer precision. - // The divide uses double precision. - // This ensures we avoid cancellation in the difference and use a fast divide. - // The result is limited to by the rounding in the double computation. - - // Compute the term if possible using fast integer arithmetic. - // 128-bit sum(x^2) * n will be OK when the upper 32-bits are zero. - // 128-bit sum(x)^2 will be OK when the upper 64-bits are zero. - // Both are safe when n < 2^32. - double diff; - if ((n >>> Integer.SIZE) == 0) { - diff = sumSq.unsignedMultiply((int) n).subtract(sum.squareLow()).toDouble(); - } else { - diff = sumSq.toBigInteger().multiply(BigInteger.valueOf(n)) - .subtract(square(sum.toBigInteger())).doubleValue(); - } - // Compute the divide in double precision - return diff / IntMath.unsignedMultiplyToDouble(n, n0); - } - - /** - * Convenience method to square a BigInteger. - * - * @param x Value - * @return x^2 - */ - private static BigInteger square(BigInteger x) { - return x.multiply(x); + return Math.sqrt(IntVariance.computeVariance(sumSq, sum, n, biased)); } @Override - public IntVariance combine(IntVariance other) { + public IntStandardDeviation combine(IntStandardDeviation other) { sumSq.add(other.sumSq); sum.add(other.sum); n += other.n; @@ -222,26 +189,20 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int } /** - * Sets the value of the biased flag. The default value is {@code false}. - * - * <p>If {@code false} the sum of squared deviations from the sample mean is normalised by - * {@code n - 1} where {@code n} is the number of samples. This is Bessel's correction - * for an unbiased estimator of the variance of a hypothetical infinite population. - * - * <p>If {@code true} the sum of squared deviations is normalised by the number of samples - * {@code n}. - * - * <p>Note: This option only applies when {@code n > 1}. The variance of {@code n = 1} is - * always 0. + * Sets the value of the biased flag. The default value is {@code false}. The bias + * term refers to the computation of the variance; the standard deviation is returned + * as the square root of the biased or unbiased <em>sample variance</em>. For further + * details see {@link IntVariance#setBiased(boolean) IntVarianceVariance.setBiased}. * - * <p>This flag only controls the final computation of the statistic. The value of this flag - * will not affect compatibility between instances during a {@link #combine(IntVariance) combine} - * operation. + * <p>This flag only controls the final computation of the statistic. The value of + * this flag will not affect compatibility between instances during a + * {@link #combine(IntStandardDeviation) combine} operation. * * @param v Value. * @return {@code this} instance + * @see IntVariance#setBiased(boolean) */ - public IntVariance setBiased(boolean v) { + public IntStandardDeviation setBiased(boolean v) { biased = v; return this; } diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java index 70d108b..05875f5 100644 --- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/IntVariance.java @@ -69,7 +69,7 @@ import java.math.BigInteger; public final class IntVariance implements IntStatistic, StatisticAccumulator<IntVariance> { /** Small array sample size. * Used to avoid computing with UInt96 then converting to UInt128. */ - private static final int SMALL_SAMPLE = 10; + static final int SMALL_SAMPLE = 10; /** Sum of the squared values. */ private final UInt128 sumSq; @@ -179,8 +179,21 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int if (n == 1) { return 0; } - final long n0 = biased ? n : n - 1; + return computeVariance(sumSq, sum, n, biased); + } + /** + * Compute the variance. + * + * <p>It is assumes the count {@code n} is greater than 1. + * + * @param sumSq Sum of the squared values. + * @param sum Sum of the values. + * @param n Count of values that have been added. + * @param biased Flag to control if the statistic is biased, or should use a bias correction. + * @return the variance + */ + static double computeVariance(UInt128 sumSq, Int128 sum, long n, boolean biased) { // Sum-of-squared deviations: sum(x^2) - sum(x)^2 / n // Sum-of-squared deviations precursor: n * sum(x^2) - sum(x)^2 // The precursor is computed in integer precision. @@ -199,6 +212,7 @@ public final class IntVariance implements IntStatistic, StatisticAccumulator<Int diff = sumSq.toBigInteger().multiply(BigInteger.valueOf(n)) .subtract(square(sum.toBigInteger())).doubleValue(); } + final long n0 = biased ? n : n - 1; // Compute the divide in double precision return diff / IntMath.unsignedMultiplyToDouble(n, n0); } diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongStandardDeviation.java similarity index 60% copy from commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java copy to commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongStandardDeviation.java index a6cbe72..9231b1b 100644 --- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongStandardDeviation.java @@ -16,13 +16,11 @@ */ package org.apache.commons.statistics.descriptive; -import java.math.BigInteger; - /** - * Computes the variance of the available values. The default implementation uses the - * following definition of the <em>sample variance</em>: + * Computes the standard deviation of the available values. The default implementation uses the + * following definition of the <em>sample standard deviation</em>: * - * <p>\[ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 \] + * <p>\[ \sqrt{ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 } \] * * <p>where \( \overline{x} \) is the sample mean, and \( n \) is the number of samples. * @@ -31,10 +29,13 @@ import java.math.BigInteger; * <li>The result is zero if there is one value in the data set. * </ul> * - * <p>The use of the term \( n − 1 \) is called Bessel's correction. This is an unbiased - * estimator of the variance of a hypothetical infinite population. If the + * <p>The use of the term \( n − 1 \) is called Bessel's correction. Omitting the square root, + * this provides an unbiased estimator of the variance of a hypothetical infinite population. If the * {@link #setBiased(boolean) biased} option is enabled the normalisation factor is * changed to \( \frac{1}{n} \) for a biased estimator of the <em>sample variance</em>. + * Note however that square root is a concave function and thus introduces negative bias + * (by Jensen's inequality), which depends on the distribution, and thus the corrected sample + * standard deviation (using Bessel's correction) is less biased, but still biased. * * <p>The implementation uses an exact integer sum to compute the scaled (by \( n \)) * sum of squared deviations from the mean; this is normalised by the scaled correction factor. @@ -60,13 +61,13 @@ import java.math.BigInteger; * provides the necessary partitioning, isolation, and merging of results for * safe and efficient parallel execution. * - * @see <a href="https://en.wikipedia.org/wiki/variance">variance (Wikipedia)</a> - * @see <a href="https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance"> - * Algorithms for computing the variance (Wikipedia)</a> + * @see <a href="https://en.wikipedia.org/wiki/Standard_deviation">Standard deviation (Wikipedia)</a> * @see <a href="https://en.wikipedia.org/wiki/Bessel%27s_correction">Bessel's correction</a> + * @see <a href="https://en.wikipedia.org/wiki/Jensen%27s_inequality">Jensen's inequality</a> + * @see LongVariance * @since 1.1 */ -public final class LongVariance implements LongStatistic, StatisticAccumulator<LongVariance> { +public final class LongStandardDeviation implements LongStatistic, StatisticAccumulator<LongStandardDeviation> { /** Sum of the squared values. */ private final UInt192 sumSq; @@ -81,7 +82,7 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L /** * Create an instance. */ - private LongVariance() { + private LongStandardDeviation() { this(UInt192.create(), Int128.create(), 0); } @@ -92,7 +93,7 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L * @param sum Sum of the values. * @param n Count of values that have been added. */ - private LongVariance(UInt192 sumSq, Int128 sum, int n) { + private LongStandardDeviation(UInt192 sumSq, Int128 sum, int n) { this.sumSq = sumSq; this.sum = sum; this.n = n; @@ -103,19 +104,19 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L * * <p>The initial result is {@code NaN}. * - * @return {@code IntVariance} instance. + * @return {@code LongStandardDeviation} instance. */ - public static LongVariance create() { - return new LongVariance(); + public static LongStandardDeviation create() { + return new LongStandardDeviation(); } /** * Returns an instance populated using the input {@code values}. * * @param values Values. - * @return {@code IntVariance} instance. + * @return {@code LongStandardDeviation} instance. */ - public static LongVariance of(long... values) { + public static LongStandardDeviation of(long... values) { // Note: Arrays could be processed using specialised counts knowing the maximum limit // for an array is 2^31 values. Requires a UInt160. @@ -125,7 +126,7 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L s.add(x); ss.addSquare(x); } - return new LongVariance(ss, s, values.length); + return new LongStandardDeviation(ss, s, values.length); } /** @@ -156,43 +157,12 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L if (n == 1) { return 0; } - final long n0 = biased ? n : n - 1; - - // Sum-of-squared deviations: sum(x^2) - sum(x)^2 / n - // Sum-of-squared deviations precursor: n * sum(x^2) - sum(x)^2 - // The precursor is computed in integer precision. - // The divide uses double precision. - // This ensures we avoid cancellation in the difference and use a fast divide. - // The result is limited to max 4 ulp by the rounding in the double computation - // When n0*n is < 2^53 the max error is reduced to two roundings. - - // Compute the term if possible using fast integer arithmetic. - // 192-bit sum(x^2) * n will be OK when the upper 32-bits are zero. - // 128-bit sum(x)^2 will be OK when the upper 64-bits are zero. - // The first is safe when n < 2^32 but we must check the sum high bits. - double diff; - if (((n >>> Integer.SIZE) | sum.hi64()) == 0) { - diff = sumSq.unsignedMultiply((int) n).subtract(sum.squareLow()).toDouble(); - } else { - diff = sumSq.toBigInteger().multiply(BigInteger.valueOf(n)) - .subtract(square(sum.toBigInteger())).doubleValue(); - } - // Compute the divide in double precision - return diff / IntMath.unsignedMultiplyToDouble(n, n0); - } + return Math.sqrt(LongVariance.computeVariance(sumSq, sum, n, biased)); - /** - * Convenience method to square a BigInteger. - * - * @param x Value - * @return x^2 - */ - private static BigInteger square(BigInteger x) { - return x.multiply(x); } @Override - public LongVariance combine(LongVariance other) { + public LongStandardDeviation combine(LongStandardDeviation other) { sumSq.add(other.sumSq); sum.add(other.sum); n += other.n; @@ -200,26 +170,21 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L } /** - * Sets the value of the biased flag. The default value is {@code false}. - * - * <p>If {@code false} the sum of squared deviations from the sample mean is normalised by - * {@code n - 1} where {@code n} is the number of samples. This is Bessel's correction - * for an unbiased estimator of the variance of a hypothetical infinite population. + * Sets the value of the biased flag. The default value is {@code false}. The bias + * term refers to the computation of the variance; the standard deviation is returned + * as the square root of the biased or unbiased <em>sample variance</em>. For further + * details see {@link LongVariance#setBiased(boolean) LongStandardDeviationVariance.setBiased}. * - * <p>If {@code true} the sum of squared deviations is normalised by the number of samples - * {@code n}. - * - * <p>Note: This option only applies when {@code n > 1}. The variance of {@code n = 1} is - * always 0. - * - * <p>This flag only controls the final computation of the statistic. The value of this flag - * will not affect compatibility between instances during a {@link #combine(LongVariance) combine} - * operation. + * <p>This flag only controls the final computation of the statistic. The value of + * this flag will not affect compatibility between instances during a + * {@link #combine(LongStandardDeviation) combine} operation. * * @param v Value. * @return {@code this} instance + * @see LongStandardDeviation#setBiased(boolean) */ - public LongVariance setBiased(boolean v) { + + public LongStandardDeviation setBiased(boolean v) { biased = v; return this; } diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java index a6cbe72..a078da4 100644 --- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/LongVariance.java @@ -103,7 +103,7 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L * * <p>The initial result is {@code NaN}. * - * @return {@code IntVariance} instance. + * @return {@code LongVariance} instance. */ public static LongVariance create() { return new LongVariance(); @@ -113,7 +113,7 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L * Returns an instance populated using the input {@code values}. * * @param values Values. - * @return {@code IntVariance} instance. + * @return {@code LongVariance} instance. */ public static LongVariance of(long... values) { // Note: Arrays could be processed using specialised counts knowing the maximum limit @@ -156,8 +156,21 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L if (n == 1) { return 0; } - final long n0 = biased ? n : n - 1; + return computeVariance(sumSq, sum, n, biased); + } + /** + * Compute the variance. + * + * <p>It is assumes the count {@code n} is greater than 1. + * + * @param sumSq Sum of the squared values. + * @param sum Sum of the values. + * @param n Count of values that have been added. + * @param biased Flag to control if the statistic is biased, or should use a bias correction. + * @return the variance + */ + static double computeVariance(UInt192 sumSq, Int128 sum, long n, boolean biased) { // Sum-of-squared deviations: sum(x^2) - sum(x)^2 / n // Sum-of-squared deviations precursor: n * sum(x^2) - sum(x)^2 // The precursor is computed in integer precision. @@ -177,6 +190,7 @@ public final class LongVariance implements LongStatistic, StatisticAccumulator<L diff = sumSq.toBigInteger().multiply(BigInteger.valueOf(n)) .subtract(square(sum.toBigInteger())).doubleValue(); } + final long n0 = biased ? n : n - 1; // Compute the divide in double precision return diff / IntMath.unsignedMultiplyToDouble(n, n0); } diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntStandardDeviationTest.java similarity index 58% copy from commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java copy to commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntStandardDeviationTest.java index 5eea9e8..52ed47c 100644 --- a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntStandardDeviationTest.java @@ -27,27 +27,26 @@ import org.apache.commons.statistics.distribution.TestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; /** - * Test for {@link IntVariance}. + * Test for {@link IntStandardDeviation}. */ -final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { +final class IntStandardDeviationTest extends BaseIntStatisticTest<IntStandardDeviation> { @Override - protected IntVariance create() { - return IntVariance.create(); + protected IntStandardDeviation create() { + return IntStandardDeviation.create(); } @Override - protected IntVariance create(int... values) { - return IntVariance.of(values); + protected IntStandardDeviation create(int... values) { + return IntStandardDeviation.of(values); } @Override protected DoubleStatistic createAsDoubleStatistic(int... values) { - return Variance.of(Arrays.stream(values).asDoubleStream().toArray()); + return StandardDeviation.of(Arrays.stream(values).asDoubleStream().toArray()); } @Override @@ -62,24 +61,7 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { @Override protected StatisticResult getExpectedValue(int[] values) { - if (values.length == 1) { - return createStatisticResult(0.0); - } - final long s = Arrays.stream(values).asLongStream().sum(); - final BigInteger ss = Arrays.stream(values) - .mapToObj(i -> BigInteger.valueOf((long) i * i)) - .reduce(BigInteger.ZERO, BigInteger::add); - final MathContext mc = MathContext.DECIMAL128; - final int n = values.length; - // var = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) - // Exact numerator - final BigInteger num = ss.multiply(BigInteger.valueOf(n)).subtract( - BigInteger.valueOf(s).pow(2)); - // Exact divide - final double x = new BigDecimal(num) - .divide(BigDecimal.valueOf(n * (n - 1L)), mc) - .doubleValue(); - return createStatisticResult(x); + return createStatisticResult(Math.sqrt(IntVarianceTest.computeExpectedVariance(values))); } @Override @@ -93,23 +75,60 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { builder.accept(addCase(Integer.MAX_VALUE - 1, Integer.MAX_VALUE)); builder.accept(addCase(Integer.MIN_VALUE + 1, Integer.MIN_VALUE)); - // Same cases as for the DoubleStatistic Variance but the tolerance is exact + // Same cases as for the DoubleStatistic StandardDeviation but the tolerance is exact final DoubleTolerance tol = DoubleTolerances.equals(); - // Python Numpy v1.25.1: numpy.var(x, ddof=1) - builder.accept(addReference(1.6666666666666667, tol, 1, 2, 3, 4)); - builder.accept(addReference(7.454545454545454, tol, + // Python Numpy v1.25.1: numpy.std(x, ddof=1) + builder.accept(addReference(1.2909944487358056, tol, 1, 2, 3, 4)); + builder.accept(addReference(2.73030134866931, tol, 14, 8, 11, 10, 7, 9, 10, 11, 10, 15, 5, 10)); - // R v4.3.1: var(x) - builder.accept(addReference(9.166666666666666, tol, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); - builder.accept(addReference(178.75, tol, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50)); + // R v4.3.1: sd(x) + builder.accept(addReference(3.0276503540974917, tol, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + builder.accept(addReference(13.369741957120938, tol, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50)); return builder.build(); } + @ParameterizedTest + @MethodSource("testAccept") + void testConsistentWithVarianceAccept(int[] values) { + assertConsistentWithVariance(Statistics.add(IntVariance.create(), values), + Statistics.add(IntStandardDeviation.create(), values)); + } + + @ParameterizedTest + @MethodSource("testArray") + void testConsistentWithVarianceArray(int[] values) { + assertConsistentWithVariance(IntVariance.of(values), + IntStandardDeviation.of(values)); + } + + @ParameterizedTest + @MethodSource("testAcceptAndCombine") + void testConsistentWithVarianceCombine(int[][] values) { + // Assume the sequential stream will combine in the same order. + // Do not use a parallel stream which may be stochastic. + final IntVariance variance = Arrays.stream(values) + .map(IntVariance::of) + .reduce(IntVariance::combine) + .orElseGet(IntVariance::create); + final IntStandardDeviation std = Arrays.stream(values) + .map(IntStandardDeviation::of) + .reduce(IntStandardDeviation::combine) + .orElseGet(IntStandardDeviation::create); + assertConsistentWithVariance(variance, std); + } + + private static void assertConsistentWithVariance(IntVariance variance, IntStandardDeviation std) { + Assertions.assertEquals(Math.sqrt(variance.getAsDouble()), std.getAsDouble(), "Unbiased"); + variance.setBiased(true); + std.setBiased(true); + Assertions.assertEquals(Math.sqrt(variance.getAsDouble()), std.getAsDouble(), "Biased"); + } + @ParameterizedTest @MethodSource void testBiased(int[] values, double biased, double unbiased, DoubleTolerance tol) { - final IntVariance stat = IntVariance.of(values); + final IntStandardDeviation stat = IntStandardDeviation.of(values); // Default is unbiased final double actualUnbiased = stat.getAsDouble(); TestUtils.assertEquals(unbiased, actualUnbiased, tol, () -> "Unbiased: " + format(values)); @@ -125,28 +144,15 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { static Stream<Arguments> testBiased() { final Stream.Builder<Arguments> builder = Stream.builder(); - // Same cases as for the DoubleStatistic Variance but the tolerance is exact - final DoubleTolerance tol = DoubleTolerances.equals(); - - // Note: Biased variance is ((10-5.5)**2 + (1-5.5)**2)/2 = 20.25 - // Scale by (2 * 512 * 512) / (2 * 512 * 512 - 1) - // The variance is invariant to shift - final int shift = 253674678; - final int[] a = new int[2 * 512 * 512]; - Arrays.fill(a, 0, a.length / 2, 10 + shift); - Arrays.fill(a, a.length / 2, a.length, 1 + shift); - builder.accept(Arguments.of(a, 20.25, 20.250038623883484, tol)); - - // Python Numpy v1.25.1: numpy.var(x, ddof=0/1) - // Note: Numpy allows other degrees of freedom adjustment than 0 or 1. - builder.accept(Arguments.of(new int[] {1, 2, 3}, 0.6666666666666666, 1, tol)); - builder.accept(Arguments.of(new int[] {1, 2}, 0.25, 0.5, tol)); - // Matlab R2023s: var(x, 1/0) - // Matlab only allows turning the biased option on (1) or off (0). - // Note: Numpy will return NaN for ddof=1 when the array length is 1 (since 0 / 0 = NaN). - // This implementation matches the behaviour of Matlab which returns zero. - builder.accept(Arguments.of(new int[] {1}, 0, 0, tol)); - builder.accept(Arguments.of(new int[] {1, 2, 4, 8}, 7.1875, 9.583333333333334, tol)); + // Repack the same cases from variance + IntVarianceTest.testBiased().forEach(arg -> { + final Object[] args = arg.get(); + final Object a = args[0]; + final double biased = ((Number) args[1]).doubleValue(); + final double unbiased = ((Number) args[2]).doubleValue(); + final Object d = args[3]; + builder.accept(Arguments.of(a, Math.sqrt(biased), Math.sqrt(unbiased), d)); + }); return builder.build(); } @@ -159,14 +165,9 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { * will be incorrect so the test is limited to {@code n < 2^63}. */ @ParameterizedTest - @CsvSource({ - "-1628367811, -516725738, 60", - "627834682, 456456670, 61", - "2147483647, 2147483646, 61", - "-2147483648, -2147483647, 61", - }) + @MethodSource(value = "org.apache.commons.statistics.descriptive.IntSumTest#testLongOverflow") void testLongOverflow(int x, int y, int exp) { - final IntVariance s = IntVariance.of(x, y); + final IntStandardDeviation s = IntStandardDeviation.of(x, y); // var = sum((x - mean)^2) / (n-1) // = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) long n = 2; @@ -179,12 +180,12 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { n <<= 1; term1 = term1.add(term1); term2 = term2.add(term2); - final double expected = new BigDecimal( + final double expected = Math.sqrt(new BigDecimal( term1.multiply(BigInteger.valueOf(n)).subtract(term2.pow(2))) .divide( new BigDecimal(BigInteger.valueOf(n).multiply(BigInteger.valueOf(n - 1))), MathContext.DECIMAL128) - .doubleValue(); + .doubleValue()); TestUtils.assertEquals(expected, s.getAsDouble(), tol); } } diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java index 5eea9e8..5ac63e7 100644 --- a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java @@ -27,7 +27,6 @@ import org.apache.commons.statistics.distribution.TestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; /** @@ -62,24 +61,7 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { @Override protected StatisticResult getExpectedValue(int[] values) { - if (values.length == 1) { - return createStatisticResult(0.0); - } - final long s = Arrays.stream(values).asLongStream().sum(); - final BigInteger ss = Arrays.stream(values) - .mapToObj(i -> BigInteger.valueOf((long) i * i)) - .reduce(BigInteger.ZERO, BigInteger::add); - final MathContext mc = MathContext.DECIMAL128; - final int n = values.length; - // var = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) - // Exact numerator - final BigInteger num = ss.multiply(BigInteger.valueOf(n)).subtract( - BigInteger.valueOf(s).pow(2)); - // Exact divide - final double x = new BigDecimal(num) - .divide(BigDecimal.valueOf(n * (n - 1L)), mc) - .doubleValue(); - return createStatisticResult(x); + return createStatisticResult(computeExpectedVariance(values)); } @Override @@ -106,6 +88,32 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { return builder.build(); } + /** + * Helper function to compute the expected variance using BigDecimal. + * + * @param values Values. + * @return Variance of values + */ + static double computeExpectedVariance(int[] values) { + if (values.length == 1) { + return 0; + } + final long s = Arrays.stream(values).asLongStream().sum(); + final BigInteger ss = Arrays.stream(values) + .mapToObj(i -> BigInteger.valueOf((long) i * i)) + .reduce(BigInteger.ZERO, BigInteger::add); + final MathContext mc = MathContext.DECIMAL128; + final int n = values.length; + // var = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) + // Exact numerator + final BigInteger num = ss.multiply(BigInteger.valueOf(n)).subtract( + BigInteger.valueOf(s).pow(2)); + // Exact divide + return new BigDecimal(num) + .divide(BigDecimal.valueOf(n * (n - 1L)), mc) + .doubleValue(); + } + @ParameterizedTest @MethodSource void testBiased(int[] values, double biased, double unbiased, DoubleTolerance tol) { @@ -159,12 +167,7 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { * will be incorrect so the test is limited to {@code n < 2^63}. */ @ParameterizedTest - @CsvSource({ - "-1628367811, -516725738, 60", - "627834682, 456456670, 61", - "2147483647, 2147483646, 61", - "-2147483648, -2147483647, 61", - }) + @MethodSource(value = "org.apache.commons.statistics.descriptive.IntSumTest#testLongOverflow") void testLongOverflow(int x, int y, int exp) { final IntVariance s = IntVariance.of(x, y); // var = sum((x - mean)^2) / (n-1) diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongStandardDeviationTest.java similarity index 55% copy from commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java copy to commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongStandardDeviationTest.java index 5eea9e8..7478a34 100644 --- a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/IntVarianceTest.java +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongStandardDeviationTest.java @@ -27,27 +27,26 @@ import org.apache.commons.statistics.distribution.TestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; /** - * Test for {@link IntVariance}. + * Test for {@link LongStandardDeviation}. */ -final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { +final class LongStandardDeviationTest extends BaseLongStatisticTest<LongStandardDeviation> { @Override - protected IntVariance create() { - return IntVariance.create(); + protected LongStandardDeviation create() { + return LongStandardDeviation.create(); } @Override - protected IntVariance create(int... values) { - return IntVariance.of(values); + protected LongStandardDeviation create(long... values) { + return LongStandardDeviation.of(values); } @Override - protected DoubleStatistic createAsDoubleStatistic(int... values) { - return Variance.of(Arrays.stream(values).asDoubleStream().toArray()); + protected DoubleStatistic createAsDoubleStatistic(long... values) { + return StandardDeviation.of(Arrays.stream(values).asDoubleStream().toArray()); } @Override @@ -61,25 +60,8 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { } @Override - protected StatisticResult getExpectedValue(int[] values) { - if (values.length == 1) { - return createStatisticResult(0.0); - } - final long s = Arrays.stream(values).asLongStream().sum(); - final BigInteger ss = Arrays.stream(values) - .mapToObj(i -> BigInteger.valueOf((long) i * i)) - .reduce(BigInteger.ZERO, BigInteger::add); - final MathContext mc = MathContext.DECIMAL128; - final int n = values.length; - // var = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) - // Exact numerator - final BigInteger num = ss.multiply(BigInteger.valueOf(n)).subtract( - BigInteger.valueOf(s).pow(2)); - // Exact divide - final double x = new BigDecimal(num) - .divide(BigDecimal.valueOf(n * (n - 1L)), mc) - .doubleValue(); - return createStatisticResult(x); + protected StatisticResult getExpectedValue(long[] values) { + return createStatisticResult(Math.sqrt(LongVarianceTest.computeExpectedVariance(values))); } @Override @@ -93,23 +75,60 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { builder.accept(addCase(Integer.MAX_VALUE - 1, Integer.MAX_VALUE)); builder.accept(addCase(Integer.MIN_VALUE + 1, Integer.MIN_VALUE)); - // Same cases as for the DoubleStatistic Variance but the tolerance is exact + // Same cases as for the DoubleStatistic StandardDeviation but the tolerance is exact final DoubleTolerance tol = DoubleTolerances.equals(); - // Python Numpy v1.25.1: numpy.var(x, ddof=1) - builder.accept(addReference(1.6666666666666667, tol, 1, 2, 3, 4)); - builder.accept(addReference(7.454545454545454, tol, + // Python Numpy v1.25.1: numpy.std(x, ddof=1) + builder.accept(addReference(1.2909944487358056, tol, 1, 2, 3, 4)); + builder.accept(addReference(2.73030134866931, tol, 14, 8, 11, 10, 7, 9, 10, 11, 10, 15, 5, 10)); - // R v4.3.1: var(x) - builder.accept(addReference(9.166666666666666, tol, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); - builder.accept(addReference(178.75, tol, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50)); + // R v4.3.1: sd(x) + builder.accept(addReference(3.0276503540974917, tol, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + builder.accept(addReference(13.369741957120938, tol, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50)); return builder.build(); } + @ParameterizedTest + @MethodSource("testAccept") + void testConsistentWithVarianceAccept(long[] values) { + assertConsistentWithVariance(Statistics.add(LongVariance.create(), values), + Statistics.add(LongStandardDeviation.create(), values)); + } + + @ParameterizedTest + @MethodSource("testArray") + void testConsistentWithVarianceArray(long[] values) { + assertConsistentWithVariance(LongVariance.of(values), + LongStandardDeviation.of(values)); + } + + @ParameterizedTest + @MethodSource("testAcceptAndCombine") + void testConsistentWithVarianceCombine(long[][] values) { + // Assume the sequential stream will combine in the same order. + // Do not use a parallel stream which may be stochastic. + final LongVariance variance = Arrays.stream(values) + .map(LongVariance::of) + .reduce(LongVariance::combine) + .orElseGet(LongVariance::create); + final LongStandardDeviation std = Arrays.stream(values) + .map(LongStandardDeviation::of) + .reduce(LongStandardDeviation::combine) + .orElseGet(LongStandardDeviation::create); + assertConsistentWithVariance(variance, std); + } + + private static void assertConsistentWithVariance(LongVariance variance, LongStandardDeviation std) { + Assertions.assertEquals(Math.sqrt(variance.getAsDouble()), std.getAsDouble(), "Unbiased"); + variance.setBiased(true); + std.setBiased(true); + Assertions.assertEquals(Math.sqrt(variance.getAsDouble()), std.getAsDouble(), "Biased"); + } + @ParameterizedTest @MethodSource - void testBiased(int[] values, double biased, double unbiased, DoubleTolerance tol) { - final IntVariance stat = IntVariance.of(values); + void testBiased(long[] values, double biased, double unbiased, DoubleTolerance tol) { + final LongStandardDeviation stat = LongStandardDeviation.of(values); // Default is unbiased final double actualUnbiased = stat.getAsDouble(); TestUtils.assertEquals(unbiased, actualUnbiased, tol, () -> "Unbiased: " + format(values)); @@ -125,28 +144,15 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { static Stream<Arguments> testBiased() { final Stream.Builder<Arguments> builder = Stream.builder(); - // Same cases as for the DoubleStatistic Variance but the tolerance is exact - final DoubleTolerance tol = DoubleTolerances.equals(); - - // Note: Biased variance is ((10-5.5)**2 + (1-5.5)**2)/2 = 20.25 - // Scale by (2 * 512 * 512) / (2 * 512 * 512 - 1) - // The variance is invariant to shift - final int shift = 253674678; - final int[] a = new int[2 * 512 * 512]; - Arrays.fill(a, 0, a.length / 2, 10 + shift); - Arrays.fill(a, a.length / 2, a.length, 1 + shift); - builder.accept(Arguments.of(a, 20.25, 20.250038623883484, tol)); - - // Python Numpy v1.25.1: numpy.var(x, ddof=0/1) - // Note: Numpy allows other degrees of freedom adjustment than 0 or 1. - builder.accept(Arguments.of(new int[] {1, 2, 3}, 0.6666666666666666, 1, tol)); - builder.accept(Arguments.of(new int[] {1, 2}, 0.25, 0.5, tol)); - // Matlab R2023s: var(x, 1/0) - // Matlab only allows turning the biased option on (1) or off (0). - // Note: Numpy will return NaN for ddof=1 when the array length is 1 (since 0 / 0 = NaN). - // This implementation matches the behaviour of Matlab which returns zero. - builder.accept(Arguments.of(new int[] {1}, 0, 0, tol)); - builder.accept(Arguments.of(new int[] {1, 2, 4, 8}, 7.1875, 9.583333333333334, tol)); + // Repack the same cases from variance + LongVarianceTest.testBiased().forEach(arg -> { + final Object[] args = arg.get(); + final Object a = args[0]; + final double biased = ((Number) args[1]).doubleValue(); + final double unbiased = ((Number) args[2]).doubleValue(); + final Object d = args[3]; + builder.accept(Arguments.of(a, Math.sqrt(biased), Math.sqrt(unbiased), d)); + }); return builder.build(); } @@ -159,14 +165,9 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { * will be incorrect so the test is limited to {@code n < 2^63}. */ @ParameterizedTest - @CsvSource({ - "-1628367811, -516725738, 60", - "627834682, 456456670, 61", - "2147483647, 2147483646, 61", - "-2147483648, -2147483647, 61", - }) + @MethodSource(value = "org.apache.commons.statistics.descriptive.IntSumTest#testLongOverflow") void testLongOverflow(int x, int y, int exp) { - final IntVariance s = IntVariance.of(x, y); + final LongStandardDeviation s = LongStandardDeviation.of(x, y); // var = sum((x - mean)^2) / (n-1) // = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) long n = 2; @@ -179,12 +180,12 @@ final class IntVarianceTest extends BaseIntStatisticTest<IntVariance> { n <<= 1; term1 = term1.add(term1); term2 = term2.add(term2); - final double expected = new BigDecimal( + final double expected = Math.sqrt(new BigDecimal( term1.multiply(BigInteger.valueOf(n)).subtract(term2.pow(2))) .divide( new BigDecimal(BigInteger.valueOf(n).multiply(BigInteger.valueOf(n - 1))), MathContext.DECIMAL128) - .doubleValue(); + .doubleValue()); TestUtils.assertEquals(expected, s.getAsDouble(), tol); } } diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongVarianceTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongVarianceTest.java index b17c2f4..67f6b5b 100644 --- a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongVarianceTest.java +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/LongVarianceTest.java @@ -27,7 +27,6 @@ import org.apache.commons.statistics.distribution.TestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; /** @@ -74,24 +73,7 @@ final class LongVarianceTest extends BaseLongStatisticTest<LongVariance> { @Override protected StatisticResult getExpectedValue(long[] values) { - if (values.length == 1) { - return createStatisticResult(0.0); - } - final BigInteger s = Arrays.stream(values).mapToObj(BigInteger::valueOf) - .reduce(BigInteger.ZERO, BigInteger::add); - final BigInteger ss = Arrays.stream(values) - .mapToObj(i -> BigInteger.valueOf(i).pow(2)) - .reduce(BigInteger.ZERO, BigInteger::add); - final MathContext mc = MathContext.DECIMAL128; - final int n = values.length; - // var = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) - // Exact numerator - final BigInteger num = ss.multiply(BigInteger.valueOf(n)).subtract(s.pow(2)); - // Exact divide - final double x = new BigDecimal(num) - .divide(BigDecimal.valueOf(n * (n - 1L)), mc) - .doubleValue(); - return createStatisticResult(x); + return createStatisticResult(computeExpectedVariance(values)); } @Override @@ -118,6 +100,32 @@ final class LongVarianceTest extends BaseLongStatisticTest<LongVariance> { return builder.build(); } + /** + * Helper function to compute the expected variance using BigDecimal. + * + * @param values Values. + * @return Variance of values + */ + static double computeExpectedVariance(long[] values) { + if (values.length == 1) { + return 0; + } + final BigInteger s = Arrays.stream(values).mapToObj(BigInteger::valueOf) + .reduce(BigInteger.ZERO, BigInteger::add); + final BigInteger ss = Arrays.stream(values) + .mapToObj(i -> BigInteger.valueOf(i).pow(2)) + .reduce(BigInteger.ZERO, BigInteger::add); + final MathContext mc = MathContext.DECIMAL128; + final int n = values.length; + // var = (n * sum(x^2) - sum(x)^2) / (n * (n-1)) + // Exact numerator + final BigInteger num = ss.multiply(BigInteger.valueOf(n)).subtract(s.pow(2)); + // Exact divide + return new BigDecimal(num) + .divide(BigDecimal.valueOf(n * (n - 1L)), mc) + .doubleValue(); + } + @ParameterizedTest @MethodSource void testBiased(long[] values, double biased, double unbiased, DoubleTolerance tol) { @@ -171,12 +179,7 @@ final class LongVarianceTest extends BaseLongStatisticTest<LongVariance> { * will be incorrect so the test is limited to {@code n < 2^63}. */ @ParameterizedTest - @CsvSource({ - "-1628367672438123811, -97927322516725738, 60", - "3279208082627834682, 4234564566706285432, 61", - "9223372036854775807, 9223372036854775806, 61", - "-9223372036854775808, -9223372036854775807, 61", - }) + @MethodSource(value = "org.apache.commons.statistics.descriptive.IntSumTest#testLongOverflow") void testLongOverflow(long x, long y, int exp) { final LongVariance s = LongVariance.of(x, y); // var = sum((x - mean)^2) / (n-1) diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java index 47158d5..dd35ecd 100644 --- a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java @@ -175,17 +175,15 @@ final class StandardDeviationTest extends BaseDoubleStatisticTest<StandardDeviat static Stream<Arguments> testBiased() { final Stream.Builder<Arguments> builder = Stream.builder(); - final DoubleTolerance tol = DoubleTolerances.ulps(1); - // Python Numpy v1.25.1: numpy.std(x, ddof=0/1) - // Note: Numpy allows other degrees of freedom adjustment than 0 or 1. - builder.accept(Arguments.of(new double[] {1, 2, 3}, 0.816496580927726, 1, tol)); - builder.accept(Arguments.of(new double[] {1, 2}, 0.5, 0.7071067811865476, tol)); - // Matlab R2023s: std(x, 1/0) - // Matlab only allows turning the biased option on (1) or off (0). - // Note: Numpy will return NaN for ddof=1 when the array length is 1 (since 0 / 0 = NaN). - // This implementation matches the behaviour of Matlab which returns zero. - builder.accept(Arguments.of(new double[] {1}, 0, 0, tol)); - builder.accept(Arguments.of(new double[] {1, 2, 4, 8}, 2.680951323690902, 3.095695936834452, tol)); + // Repack the same cases from variance + VarianceTest.testBiased().forEach(arg -> { + final Object[] args = arg.get(); + final Object a = args[0]; + final double biased = ((Number) args[1]).doubleValue(); + final double unbiased = ((Number) args[2]).doubleValue(); + final Object d = args[3]; + builder.accept(Arguments.of(a, Math.sqrt(biased), Math.sqrt(unbiased), d)); + }); return builder.build(); } }