This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 4b18caee0b82cf6b326b4dcf66a5a161598db384 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Mon Dec 18 11:19:20 2023 +0000 STATISTICS-81: Add StatisticResult interface The result of a statistic is supported as int/long/double/BigInteger. --- .../statistics/descriptive/DoubleStatistic.java | 7 +- .../statistics/descriptive/StatisticResult.java | 101 +++++++++++++ .../commons/statistics/descriptive/Statistics.java | 30 +++- .../descriptive/StatisticResultTest.java | 158 +++++++++++++++++++++ src/conf/pmd/pmd-ruleset.xml | 7 + 5 files changed, 297 insertions(+), 6 deletions(-) diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java index 4baa9c9..6be034c 100644 --- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java @@ -17,15 +17,12 @@ package org.apache.commons.statistics.descriptive; import java.util.function.DoubleConsumer; -import java.util.function.DoubleSupplier; /** - * Represents a state object for computing a single {@code Statistic} over {@code double} valued input(s). - * - * <p>Base interface implemented by all statistics. + * Represents a state object for computing a statistic over {@code double} valued input(s). * * @since 1.1 */ -public interface DoubleStatistic extends DoubleConsumer, DoubleSupplier { +public interface DoubleStatistic extends DoubleConsumer, StatisticResult { // Composite interface } diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StatisticResult.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StatisticResult.java new file mode 100644 index 0000000..56108e7 --- /dev/null +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StatisticResult.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.function.DoubleSupplier; +import java.util.function.IntSupplier; +import java.util.function.LongSupplier; + +/** + * Represents the result of a statistic computed over a set of values. + * + * <p>Base interface implemented by all statistics. + * + * @since 1.1 + */ +@FunctionalInterface +public interface StatisticResult extends DoubleSupplier, IntSupplier, LongSupplier { + /** + * {@inheritDoc} + * + * <p>The default implementation uses the closest representable {@code int} value of + * the {@link #getAsDouble()} result. In the event of ties the result is towards + * positive infinity. This will raise an {@link ArithmeticException} if the closest + * integer result is not within the range {@code [-2^31, 2^31)}. + * + * @throws ArithmeticException if the {@code result} overflows an int, or is not + * finite + */ + @Override + default int getAsInt() { + // Note: Do not use (int) getAsLong() to avoid a narrowing primitive conversion. + final double x = getAsDouble(); + final double r = Statistics.roundToInteger(x); + if (r >= -0x1.0p31 && r < 0x1.0p31) { + return (int) r; + } + throw new ArithmeticException("integer overflow: " + x); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation uses the closest representable {@code long} value of + * the {@link #getAsDouble()} result. In the event of ties the result is rounded + * positive infinity. This will raise an {@link ArithmeticException} if the closest + * long integer result is not within the range {@code [-2^63, 2^63)}. + * + * @throws ArithmeticException if the {@code result} overflows a long, or is not + * finite + */ + @Override + default long getAsLong() { + final double x = getAsDouble(); + final double r = Statistics.roundToInteger(x); + if (r >= -0x1.0p63 && r < 0x1.0p63) { + return (long) r; + } + throw new ArithmeticException("long integer overflow: " + x); + } + + /** + * Gets a result as a {@link BigInteger}. + * + * <p>The default implementation uses the closest representable {@code BigInteger} + * value of the {@link #getAsDouble()} result. In the event of ties the result is + * rounded positive infinity. This will raise an {@link ArithmeticException} if the + * result is not finite. + * + * @return a result + * @throws ArithmeticException if the {@code result} is not finite + */ + default BigInteger getAsBigInteger() { + final double x = getAsDouble(); + if (!Double.isFinite(x)) { + throw new ArithmeticException("BigInteger overflow: " + x); + } + final double r = Statistics.roundToInteger(x); + if (r >= -0x1.0p63 && r < 0x1.0p63) { + // Representable as a long + return BigInteger.valueOf((long) r); + } + // Large result + return new BigDecimal(r).toBigInteger(); + } +} diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistics.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistics.java index 63c7ab9..34d5df8 100644 --- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistics.java +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistics.java @@ -22,6 +22,8 @@ import java.util.function.DoubleConsumer; * Utility methods for statistics. */ final class Statistics { + /** 0.5. */ + private static final double HALF = 0.5; /** No instances. */ private Statistics() {} @@ -45,7 +47,7 @@ final class Statistics { * Returns {@code true} if the second central moment {@code m2} is effectively * zero given the magnitude of the first raw moment {@code m1}. * - * <p>This method shares the logic for detecting a zero variance among implmentations + * <p>This method shares the logic for detecting a zero variance among implementations * that divide by the variance (e.g. skewness, kurtosis). * * @param m1 First raw moment (mean). @@ -61,4 +63,30 @@ final class Statistics { // (1e-15 ~ 4.5 eps where eps = 2^-52). return m2 <= Math.pow(1e-15 * m1, 2); } + + /** + * Get the whole number that is the nearest to x, with ties rounding towards positive infinity. + * + * <p>This method is intended to perform the equivalent of + * {@link Math#round(double)} without converting to a {@code long} primitive type. + * This allows the domain of the result to be checked against the range {@code [-2^63, 2^63)}. + * + * <p>Note: Adapted from {@code o.a.c.math4.AccurateMath.rint} and + * modified to perform rounding towards positive infinity. + * + * @param x Number from which nearest whole number is requested. + * @return a double number r such that r is an integer {@code r - 0.5 <= x < r + 0.5} + */ + static double roundToInteger(double x) { + final double y = Math.floor(x); + final double d = x - y; + if (d >= HALF) { + // Here we do not preserve the sign of the operand in the case + // of -0.5 < x <= -0.0 since the rounded result is required as an integer. + // if y == -1.0: + // return -0.0 + return y + 1.0; + } + return y; + } } diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StatisticResultTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StatisticResultTest.java new file mode 100644 index 0000000..a6a13af --- /dev/null +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StatisticResultTest.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.RoundingMode; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +/** + * Test for {@link StatisticResult}. + * + * <p>Note: Rounding + * + * <p>Use of {@link Math#rint(double)} will round ties to the nearest even number. + * This may not be desirable. {@link Math#round(double)} will round ties to positive infinity + * which leads to different rounding for positive and negatives; it also returns a {@code long}. + * Casting will discard the fractional part and may not return the closest integer. + * Using a rounding mode of half-up is consistent across signs and ties. However it does not + * create a result invariant to integer shift. + * <pre> + * value rint round cast RoundingMode.HALF_UP + * -3.5 4 -3 -3 -4 + * -2.5 2 -2 -2 -3 + * -1.5 2 -1 -1 -2 + * -0.5 0 0 0 -1 + * 0.0 0 0 0 0 + * 0.5 0 1 0 1 + * 1.5 2 2 1 2 + * 2.5 2 3 2 3 + * 3.5 4 4 3 4 + * </pre> + * + * <p>Rounding uses the equivalent of {@link Math#round(double)}. A test asserts that the result + * is invariant to shift. + */ +final class StatisticResultTest { + private static final int[] SIGNS = {-1, 1}; + + @ParameterizedTest + @ValueSource(doubles = {Double.NaN, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY}) + void testNonFinite(double x) { + final StatisticResult r = () -> x; + Assertions.assertThrows(ArithmeticException.class, () -> r.getAsInt(), "int result: " + x); + Assertions.assertThrows(ArithmeticException.class, () -> r.getAsLong(), "long result: " + x); + Assertions.assertThrows(ArithmeticException.class, () -> r.getAsBigInteger(), "BigInteger result: " + x); + } + + @ParameterizedTest + @ValueSource(doubles = {0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5, 2.75, 3, + Integer.MAX_VALUE - 0.75, Integer.MAX_VALUE - 0.5, Integer.MAX_VALUE - 0.25, + Integer.MAX_VALUE, Integer.MAX_VALUE + 0.25, + Integer.MIN_VALUE - 0.5, Integer.MIN_VALUE - 0.25, Integer.MIN_VALUE, + Integer.MIN_VALUE + 0.25, Integer.MIN_VALUE + 0.5, Integer.MIN_VALUE + 0.75 }) + void testRepresentableInt(double x) { + // Do not test both signs for the large negative integers + final int[] signs = x < 0 ? new int[] {1} : SIGNS; + for (final int sign : signs) { + final double y = x * sign; + final StatisticResult r = () -> y; + final BigDecimal expected = round(y); + Assertions.assertEquals(expected.intValue(), r.getAsInt(), () -> "int result: " + y); + Assertions.assertEquals(expected.longValue(), r.getAsLong(), () -> "long result: " + y); + Assertions.assertEquals(expected.toBigInteger(), r.getAsBigInteger(), () -> "BigInteger result: " + y); + } + } + + @ParameterizedTest + @ValueSource(doubles = { + Integer.MAX_VALUE + 0.5, Integer.MAX_VALUE + 1.25, + Integer.MIN_VALUE - 0.75, Integer.MIN_VALUE - 1.75, + Long.MIN_VALUE, + // Cannot represent Long.MAX_VALUE. Use the next value down: + // Math.nextDown(0x1.0p63) + 9.223372036854775E18}) + void testNonRepresentableInt(double x) { + final StatisticResult r = () -> x; + final BigDecimal expected = round(x); + Assertions.assertThrows(ArithmeticException.class, () -> r.getAsInt(), () -> "int result: " + x); + Assertions.assertEquals(expected.longValue(), r.getAsLong(), () -> "long result: " + x); + Assertions.assertEquals(expected.toBigInteger(), r.getAsBigInteger(), () -> "BigInteger result: " + x); + } + + @ParameterizedTest + @ValueSource(doubles = { + Long.MAX_VALUE, + // Math.nextDown((double) Long.MIN_VALUE) + -9.223372036854778E18, + Double.MAX_VALUE, + -Double.MAX_VALUE, + }) + void testNonRepresentableLong(double x) { + final StatisticResult r = () -> x; + final BigDecimal expected = round(x); + Assertions.assertThrows(ArithmeticException.class, () -> r.getAsInt(), () -> "int result: " + x); + Assertions.assertThrows(ArithmeticException.class, () -> r.getAsLong(), () -> "long result: " + x); + Assertions.assertEquals(expected.toBigInteger(), r.getAsBigInteger(), () -> "BigInteger result: " + x); + } + + /** + * Round the value to the nearest integer, with ties rounding towards positive infinity. + * This matches the rounding of {@link Math#round(double)} but returns a floating-point result. + * + * @param x Value. + * @return the rounded result + */ + private static BigDecimal round(double x) { + return x < 0 ? + new BigDecimal(x).setScale(0, RoundingMode.HALF_DOWN) : + new BigDecimal(x).setScale(0, RoundingMode.HALF_UP); + } + + /** + * Test the rounding of the double is invariant to shift. This is true if the rounding of + * ties is in a consistent direction. + */ + @ParameterizedTest + @ValueSource(doubles = {-1.75, -1.5, -1.25, -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75}) + void testShift(double x) { + final StatisticResult expected = () -> x; + for (final int shift : new int[] {-566, 42}) { + final StatisticResult r = () -> x + shift; + Assertions.assertEquals(expected.getAsInt() + shift, r.getAsInt(), () -> "int result: " + x + " + " + shift); + Assertions.assertEquals(expected.getAsLong() + shift, r.getAsLong(), () -> "long result: " + x + " + " + shift); + Assertions.assertEquals(expected.getAsBigInteger().add(BigInteger.valueOf(shift)), + r.getAsBigInteger(), () -> "BigInteger result: " + x + " + " + shift); + } + } + + /** + * Test the rounding of a double is equivalent to {@link Math#round(double)}. + */ + @ParameterizedTest + @ValueSource(doubles = {-1.75, -1.5, -1.25, -1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75}) + void testMatchesMathRound(double x) { + final StatisticResult r = () -> x; + final long expected = Math.round(x); + Assertions.assertEquals((int) expected, r.getAsInt(), () -> "int result: " + x); + Assertions.assertEquals(expected, r.getAsLong(), () -> "long result: " + x); + Assertions.assertEquals(BigInteger.valueOf(expected), r.getAsBigInteger(), () -> "BigInteger result: " + x); + } +} diff --git a/src/conf/pmd/pmd-ruleset.xml b/src/conf/pmd/pmd-ruleset.xml index e008d05..ff41429 100644 --- a/src/conf/pmd/pmd-ruleset.xml +++ b/src/conf/pmd/pmd-ruleset.xml @@ -208,6 +208,13 @@ value=".*'BinomialTest'.*|.*'ChiSquareTest'.*|.*'FisherExactTest'.*|.*'GTest'.*|.*'KolmogorovSmirnovTest'.*|.*'MannWhitneyUTest'.*|.*'TTest'.*|.*'WilcoxonSignedRankTest'.*|.*'UnconditionedExactTest'.*"/> </properties> </rule> + <rule ref="category/java/errorprone.xml/AvoidDecimalLiteralsInBigDecimalConstructor"> + <properties> + <!-- Exact conversion from double to BigInteger is required. --> + <property name="violationSuppressXPath" + value="./ancestor-or-self::MethodDeclaration[@Name='getAsBigInteger']"/> + </properties> + </rule> <rule ref="category/java/performance.xml/AvoidArrayLoops"> <properties>