This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit c9ed0a61e16e8083dc0e4b1f8afa33086d195f40 Author: AJoshi <[email protected]> AuthorDate: Sun Jul 16 08:59:34 2023 +0100 STATISTICS-71: Add base interface for statistic implementations STATISTICS-72: Add Min implementation Closes #46 --- commons-statistics-descriptive/pom.xml | 13 ++ .../statistics/descriptive/DoubleStatistic.java | 31 ++++ .../descriptive/DoubleStatisticAccumulator.java | 35 ++++ .../apache/commons/statistics/descriptive/Min.java | 134 +++++++++++++++ .../statistics/descriptive/package-info.java | 23 +++ .../commons/statistics/descriptive/MinTest.java | 187 +++++++++++++++++++++ .../commons/statistics/descriptive/TestHelper.java | 71 ++++++++ src/conf/pmd/pmd-ruleset.xml | 2 +- 8 files changed, 495 insertions(+), 1 deletion(-) diff --git a/commons-statistics-descriptive/pom.xml b/commons-statistics-descriptive/pom.xml index c83b295..f584a0e 100644 --- a/commons-statistics-descriptive/pom.xml +++ b/commons-statistics-descriptive/pom.xml @@ -43,6 +43,19 @@ </properties> <dependencies> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-rng-simple</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-rng-sampling</artifactId> + <scope>test</scope> + </dependency> + </dependencies> </project> diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java new file mode 100644 index 0000000..4baa9c9 --- /dev/null +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistic.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import java.util.function.DoubleConsumer; +import java.util.function.DoubleSupplier; + +/** + * Represents a state object for computing a single {@code Statistic} over {@code double} valued input(s). + * + * <p>Base interface implemented by all statistics. + * + * @since 1.1 + */ +public interface DoubleStatistic extends DoubleConsumer, DoubleSupplier { + // Composite interface +} diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatisticAccumulator.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatisticAccumulator.java new file mode 100644 index 0000000..6823628 --- /dev/null +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatisticAccumulator.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +/** + * A mutable result container that accumulates a {@code DoubleStatistic}. + * + * @param <T> {@code DoubleStatistic} being accumulated. + * + * @since 1.1 + */ +public interface DoubleStatisticAccumulator<T extends DoubleStatistic> { + + /** + * Combines the state of another {@code DoubleStatistic} into this one. + * + * @param other Another {@code DoubleStatistic} to be combined. + * @return {@code this} instance after combining {@code other}. + */ + T combine(T other); +} diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java new file mode 100644 index 0000000..7db81fb --- /dev/null +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Min.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import java.util.Arrays; + +/** + * Returns the minimum of the available values. + * + * <p>The result is <code>NaN</code> if any of the values is <code>NaN</code>. + * + * <p>The result is <code>POSITIVE_INFINITY</code> if no values are added. + * + * <p>This class is designed to work with (though does not require) + * {@linkplain java.util.stream streams}. + * + * <p><strong>This implementation is not thread safe.</strong> + * If multiple threads access an instance of this class concurrently, + * and at least one of the threads invokes the <code>accept()</code> or + * <code>combine()</code> method, it must be synchronized externally. + * + * <p>However, it is safe to use <code>accept()</code> and <code>combine()</code> + * as <code>accumulator</code> and <code>combiner</code> functions of + * {@link java.util.stream.Collector Collector} on a parallel stream, + * because the parallel implementation of {@link java.util.stream.Stream#collect Stream.collect()} + * provides the necessary partitioning, isolation, and merging of results for + * safe and efficient parallel execution. + * + * @since 1.1 + */ +public abstract class Min implements DoubleStatistic, DoubleStatisticAccumulator<Min> { + + /** + * Create a Min instance. + */ + Min() { + // No-op + } + + /** + * Creates a {@code Min} implementation which does not store the input value(s) it consumes. + * + * <p>The result is <code>NaN</code> if any of the values is <code>NaN</code>. + * + * <p>The result is {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY} + * if no values have been added. + * + * @return {@code Min} implementation. + */ + public static Min create() { + return new StorelessMin(); + } + + /** + * Returns a {@code Min} instance that has the minimum of all input value(s). + * + * <p>The result is <code>NaN</code> if any of the values is <code>NaN</code>. + * + * <p>When the input is an empty array, the result is + * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY}. + * + * @param values Values. + * @return {@code Min} instance. + */ + public static Min of(double... values) { + final StorelessMin min = new StorelessMin(); + Arrays.stream(values).forEach(min); + return min; + } + + /** + * Updates the state of the statistic to reflect the addition of {@code value}. + * @param value Value. + */ + @Override + public abstract void accept(double value); + + /** + * Gets the minimum of all input values. + * + * <p>When no values have been added, the result is + * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY}. + * + * @return {@code Minimum} of all values seen so far. + */ + @Override + public abstract double getAsDouble(); + + /** {@inheritDoc} */ + @Override + public abstract Min combine(Min other); + + /** + * {@code Min} implementation that does not store the input value(s) processed so far. + * + * <p>Uses JDK's {@link Math#min Math.min} as an underlying function + * to compute the {@code minimum}. + */ + private static class StorelessMin extends Min { + + /** Current min. */ + private double min = Double.POSITIVE_INFINITY; + + @Override + public void accept(double value) { + min = Double.min(min, value); + } + + @Override + public double getAsDouble() { + return min; + } + + @Override + public Min combine(Min other) { + accept(other.getAsDouble()); + return this; + } + } +} diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/package-info.java b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/package-info.java new file mode 100644 index 0000000..ef5c4cc --- /dev/null +++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Implementations of univariate statistics. + * + * @since 1.1 + */ +package org.apache.commons.statistics.descriptive; diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MinTest.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MinTest.java new file mode 100644 index 0000000..680a6f6 --- /dev/null +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MinTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import java.util.Arrays; +import java.util.function.DoubleSupplier; +import java.util.stream.Stream; +import org.apache.commons.rng.UniformRandomProvider; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Test for {@link Min}. + */ +final class MinTest { + + @Test + void testEmpty() { + Min min = Min.create(); + Assertions.assertEquals(Double.POSITIVE_INFINITY, min.getAsDouble()); + } + + @Test + void testIncrement() { + // Test the min after each incremental update + // First parameter of testArray is the value that would be added + // Second parameter of testArray is the min we expect after adding the value + double[][] testArray = { + {1729.22, 1729.22}, + {153.75, 153.75}, + {370.371, 153.75}, + {0.0, 0.0}, + {+0.0, 0.0}, + {-0.0, -0.0}, + {Double.POSITIVE_INFINITY, -0.0}, + {Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY}, + {Double.MIN_VALUE, Double.NEGATIVE_INFINITY} + }; + + Min stat = Min.create(); + for (final double[] valueAndExpected: testArray) { + final double value = valueAndExpected[0]; + final double expected = valueAndExpected[1]; + stat.accept(value); + Assertions.assertEquals(expected, stat.getAsDouble()); + } + } + + @Test + void testNaN() { + // Test non-nan values cannot revert a NaN + double[] testArray = {Double.NaN, +0.0d, -0.0d, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY}; + Min stat = Min.create(); + for (final double x : testArray) { + stat.accept(x); + Assertions.assertEquals(Double.NaN, stat.getAsDouble()); + } + } + + @ParameterizedTest + @MethodSource + void testMin(double[] values, double expected) { + Min stat = Min.create(); + Arrays.stream(values).forEach(stat); + double actual = stat.getAsDouble(); + Assertions.assertEquals(expected, actual, "min"); + Assertions.assertEquals(expected, Min.of(values).getAsDouble(), "min"); + } + + static Stream<Arguments> testMin() { + return Stream.of( + Arguments.of(new double[] {}, Double.POSITIVE_INFINITY), + Arguments.of(new double[] {3.14}, 3.14), + Arguments.of(new double[] {12.34, 56.78, -2.0}, -2.0), + Arguments.of(new double[] {Double.NaN, 3.14, Double.NaN, Double.NaN}, Double.NaN), + Arguments.of(new double[] {-1d, 1d, Double.NaN}, Double.NaN), + Arguments.of(new double[] {Double.NaN, Double.NaN, Double.NaN}, Double.NaN), + Arguments.of(new double[] {0.0d, Double.NaN, +0.0d, -0.0d, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY}, Double.NaN), + Arguments.of(new double[] {+0.0d, -0.0d, 1.0, 3.14}, -0.0d), + Arguments.of(new double[] {-0.0, +0.0}, -0.0), + Arguments.of(new double[] {0.0, -0.0}, -0.0), + Arguments.of(new double[] {0.0, +0.0}, +0.0), + Arguments.of(new double[] {1.2, -34.56, 456.789, -5678.9012}, -5678.9012), + Arguments.of(new double[] {-23467824, 23648, 2368, 23749, -23424, -23492, -92397747}, -92397747), + Arguments.of(new double[] {0.0d, +0.0d, -0.0d, Double.POSITIVE_INFINITY, Double.MIN_VALUE}, -0.0), + Arguments.of(new double[] {0.0d, +0.0d, -0.0d, Double.POSITIVE_INFINITY, -Double.MIN_VALUE}, -Double.MIN_VALUE), + Arguments.of(new double[] {0.0d, +0.0d, -0.0d, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.MIN_VALUE}, Double.NEGATIVE_INFINITY) + ); + } + + @ParameterizedTest + @MethodSource(value = "testMin") + void testParallelStream(double[] values, double expected) { + double actual = Arrays.stream(values).parallel().collect(Min::create, Min::accept, Min::combine).getAsDouble(); + Assertions.assertEquals(expected, actual); + } + + @ParameterizedTest + @MethodSource(value = "testMin") + void testMinRandomOrder(double[] values, double expected) { + UniformRandomProvider rng = TestHelper.createRNG(); + for (int i = 0; i < 10; i++) { + testMin(TestHelper.shuffle(rng, values), expected); + } + } + + @ParameterizedTest + @MethodSource + void testCombine(double[] first, double[] second, double expected) { + Min firstMin = Min.create(); + Min secondMin = Min.create(); + + Arrays.stream(first).forEach(firstMin); + Arrays.stream(second).forEach(secondMin); + + double secondMinBeforeCombine = secondMin.getAsDouble(); + firstMin.combine(secondMin); + Assertions.assertEquals(expected, firstMin.getAsDouble()); + Assertions.assertEquals(secondMinBeforeCombine, secondMin.getAsDouble()); + } + + static Stream<Arguments> testCombine() { + return Stream.of( + Arguments.of(new double[] {}, new double[] {}, Double.POSITIVE_INFINITY), + Arguments.of(new double[] {3.14}, new double[] {}, 3.14), + Arguments.of(new double[] {}, new double[] {2.718}, 2.718), + Arguments.of(new double[] {}, new double[] {Double.NaN}, Double.NaN), + Arguments.of(new double[] {Double.NaN, Double.NaN}, new double[] {}, Double.NaN), + Arguments.of(new double[] {3.14}, new double[] {2.718}, 2.718), + Arguments.of(new double[] {-1, 0, 1}, new double[] {1.1, 2.2, 3.3}, -1), + Arguments.of(new double[] {3.14, 1.1, 22.22}, new double[] {2.718, 1.1, 333.333}, 1.1), + Arguments.of(new double[] {12.34, 56.78, -2.0}, new double[] {0.0, 23.45}, -2.0), + Arguments.of(new double[] {-2023.79, 11.11, 333.333}, new double[] {1.1}, -2023.79), + Arguments.of(new double[] {1.1, +0.0, 3.14}, new double[] {22.22, 2.718, -0.0}, -0.0), + Arguments.of(new double[] {0.0, -Double.MIN_VALUE, Double.POSITIVE_INFINITY}, + new double[] {Double.NEGATIVE_INFINITY, -0.0, Double.NEGATIVE_INFINITY, Double.MIN_VALUE}, + Double.NEGATIVE_INFINITY), + Arguments.of(new double[] {0.0, Double.NaN, -Double.MIN_VALUE, Double.POSITIVE_INFINITY}, + new double[] {Double.NaN, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.MIN_VALUE}, + Double.NaN) + ); + } + + @ParameterizedTest + @MethodSource + void testArrayOfArrays(double[][] input, double expected) { + double actual = Arrays.stream(input) + .map(Min::of) + .reduce(Min::combine) + .map(DoubleSupplier::getAsDouble) + .orElseThrow(RuntimeException::new); + + Assertions.assertEquals(expected, actual); + } + + static Stream<Arguments> testArrayOfArrays() { + return Stream.of( + Arguments.of(new double[][] {{}, {}, {}}, Double.POSITIVE_INFINITY), + Arguments.of(new double[][] {{}, {Double.NaN}, {-1.7}}, Double.NaN), + Arguments.of(new double[][] {{}, {Double.NaN}, {}}, Double.NaN), + Arguments.of(new double[][] {{}, {1.1, 2}, {-1.7}}, -1.7), + Arguments.of(new double[][] {{1, 2}, {3, 4}}, 1), + Arguments.of(new double[][] {{+0.0, 2.0}, {1.0, -0.0, 3.14}}, -0.0), + Arguments.of(new double[][] {{+0.0, Double.NEGATIVE_INFINITY}, {-0.0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY}}, Double.NEGATIVE_INFINITY), + Arguments.of(new double[][] {{1.1, 22.22}, {34.56, -5678.9, 2.718}, {Double.NaN, 0}}, + Double.NaN), + Arguments.of(new double[][] {{Double.NaN, Double.NaN}, {Double.NaN}, {Double.NaN, Double.NaN, Double.NaN}}, Double.NaN) + ); + } +} diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java new file mode 100644 index 0000000..496b058 --- /dev/null +++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.descriptive; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.simple.RandomSource; + +/** + * Helper class for tests in {@code o.a.c.s.descriptive} module. + */ +final class TestHelper { + + /** Class contains only static methods. */ + private TestHelper() {} + + /** + * Creates a RNG instance. + * + * @return A new RNG instance. + */ + static UniformRandomProvider createRNG() { + return RandomSource.SPLIT_MIX_64.create(); + } + + /** + * Shuffles the entries of the given array. + * + * <p>Uses Fisher-Yates shuffle copied from + * <a href="https://github.com/apache/commons-rng/blob/master/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java"> + * RNG ArraySampler.</a> + * + * <p>This can be removed when {@code commons-rng-sampling 1.6} is released. + * + * @param rng Source of randomness. + * @param array Array whose entries will be shuffled (in-place). + * @return Shuffled input array. + */ + static double[] shuffle(UniformRandomProvider rng, double[] array) { + for (int i = array.length; i > 1; i--) { + swap(array, i - 1, rng.nextInt(i)); + } + return array; + } + + /** + * Swaps the two specified elements in the array. + * + * @param array Array. + * @param i First index. + * @param j Second index. + */ + private static void swap(double[] array, int i, int j) { + final double tmp = array[i]; + array[i] = array[j]; + array[j] = tmp; + } +} diff --git a/src/conf/pmd/pmd-ruleset.xml b/src/conf/pmd/pmd-ruleset.xml index 7d1db2f..8bfff93 100644 --- a/src/conf/pmd/pmd-ruleset.xml +++ b/src/conf/pmd/pmd-ruleset.xml @@ -90,7 +90,7 @@ <properties> <property name="violationSuppressXPath" value="./ancestor-or-self::ClassOrInterfaceDeclaration[@SimpleName='DD' - or @SimpleName='Two' or @SimpleName='One']"/> + or @SimpleName='Two' or @SimpleName='One' or @SimpleName='Min']"/> </properties> </rule> <rule ref="category/java/codestyle.xml/PrematureDeclaration">
