This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git


The following commit(s) were added to refs/heads/master by this push:
     new 490c8bb  STATISTICS-91: Confidence intervals for a normal population
490c8bb is described below

commit 490c8bb91e4b4bae39bb34977561812ccd2661c2
Author: Alex Herbert <aherb...@apache.org>
AuthorDate: Fri Jun 13 14:44:24 2025 +0100

    STATISTICS-91: Confidence intervals for a normal population
---
 commons-statistics-interval/pom.xml                |   7 ++
 .../commons/statistics/interval/ArgumentUtils.java |  41 ++++++++
 .../interval/BinomialConfidenceInterval.java       |   5 +-
 .../interval/NormalConfidenceInterval.java         |  97 ++++++++++++++++++
 .../interval/NormalConfidenceIntervalTest.java     | 110 +++++++++++++++++++++
 .../commons/statistics/interval/UserGuideTest.java |  29 ++++++
 src/changes/changes.xml                            |   4 +
 src/conf/checkstyle/checkstyle-suppressions.xml    |   1 +
 src/site/xdoc/userguide/index.xml                  |  29 ++++++
 9 files changed, 319 insertions(+), 4 deletions(-)

diff --git a/commons-statistics-interval/pom.xml 
b/commons-statistics-interval/pom.xml
index fc04b91..92b7a4d 100644
--- a/commons-statistics-interval/pom.xml
+++ b/commons-statistics-interval/pom.xml
@@ -50,6 +50,13 @@
       <version>1.2-SNAPSHOT</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-statistics-descriptive</artifactId>
+      <version>1.2-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
 
 </project>
diff --git 
a/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/ArgumentUtils.java
 
b/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/ArgumentUtils.java
new file mode 100644
index 0000000..668bfbf
--- /dev/null
+++ 
b/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/ArgumentUtils.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.interval;
+
+/**
+ * Utilities for argument validation.
+ *
+ * @since 1.2
+ */
+final class ArgumentUtils {
+    /** No instances. */
+    private ArgumentUtils() {}
+
+    /**
+     * Check the error rate {@code alpha} is in the open interval {@code (0, 
1)}.
+     *
+     * @param alpha Error rate.
+     * @throws IllegalArgumentException if {@code alpha} is not in the open 
interval {@code (0, 1)}.
+     */
+    static void checkErrorRate(double alpha) {
+        if (alpha > 0 && alpha < 1) {
+            return;
+        }
+        // Out-of-range or NaN
+        throw new IllegalArgumentException("Error rate is not in (0, 1): " + 
alpha);
+    }
+}
diff --git 
a/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/BinomialConfidenceInterval.java
 
b/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/BinomialConfidenceInterval.java
index 078cbc8..2b0eb88 100644
--- 
a/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/BinomialConfidenceInterval.java
+++ 
b/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/BinomialConfidenceInterval.java
@@ -177,10 +177,7 @@ public enum BinomialConfidenceInterval {
                 String.format("Number of successes (%d) must be less than or 
equal to number of trials (%d)",
                     numberOfSuccesses, numberOfTrials));
         }
-        // Negation of alpha inside the interval (0, 1) detects NaN
-        if (!(alpha > 0 && alpha < 1)) {
-            throw new IllegalArgumentException("Error rate is not in (0, 1): " 
+ alpha);
-        }
+        ArgumentUtils.checkErrorRate(alpha);
         return create(numberOfTrials, numberOfSuccesses, alpha);
     }
 
diff --git 
a/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/NormalConfidenceInterval.java
 
b/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/NormalConfidenceInterval.java
new file mode 100644
index 0000000..2906688
--- /dev/null
+++ 
b/commons-statistics-interval/src/main/java/org/apache/commons/statistics/interval/NormalConfidenceInterval.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.interval;
+
+import org.apache.commons.statistics.distribution.ChiSquaredDistribution;
+import org.apache.commons.statistics.distribution.TDistribution;
+
+/**
+ * Generate confidence intervals for a normally distributed population.
+ *
+ * @see <a
+ * 
href="https://en.wikipedia.org/wiki/Normal_distribution#Confidence_intervals";>Normal
+ * distribution confidence interval (Wikipedia)</a>
+ *
+ * @since 1.2
+ */
+public enum NormalConfidenceInterval {
+    /**
+     * Create a confidence interval for the true mean of an unknown normally 
distributed population.
+     */
+    MEAN {
+        @Override
+        Interval create(double mean, double variance, long n, double alpha) {
+            final double c = TDistribution.of(n - 
1).inverseSurvivalProbability(alpha * 0.5);
+            final double distance = c * Math.sqrt(variance / n);
+            return new BaseInterval(mean - distance, mean + distance);
+        }
+    },
+    /**
+     * Create a confidence interval for the true variance of an unknown 
normally distributed population.
+     */
+    VARIANCE {
+        @Override
+        Interval create(double mean, double variance, long n, double alpha) {
+            final ChiSquaredDistribution d = ChiSquaredDistribution.of(n - 1);
+            final double f = variance * (n - 1.0);
+            final double lower = f / d.inverseSurvivalProbability(alpha * 0.5);
+            final double upper = f / d.inverseCumulativeProbability(alpha * 
0.5);
+            return new BaseInterval(lower, upper);
+        }
+    };
+
+    /**
+     * Create a confidence interval from an independent sample from an unknown 
normally
+     * distributed population with the given error rate.
+     *
+     * <p>The error rate {@code alpha} is related to the confidence level that 
the
+     * interval contains the true probability of success as
+     * {@code alpha = 1 - confidence}, where {@code confidence} is the 
confidence level
+     * in {@code [0, 1]}. For example a 95% confidence level is an {@code 
alpha} of 0.05.
+     *
+     * <p>The unbiased variance is the sum of the squared deviations from the 
mean divided
+     * by {@code n - 1}.
+     *
+     * @param mean Sample mean.
+     * @param variance Unbiased sample variance.
+     * @param n Sample size.
+     * @param alpha Desired error rate that the true probability of success 
falls
+     * <em>outside</em> the returned interval.
+     * @return Confidence interval containing the target with error rate 
{@code alpha}
+     * @throws IllegalArgumentException if {@code n <= 1}, or if {@code alpha} 
is not in
+     * the open interval {@code (0, 1)}.
+     */
+    public Interval fromErrorRate(double mean, double variance, long n, double 
alpha) {
+        if (n <= 1) {
+            throw new IllegalArgumentException("Sample size is not above one: 
" + n);
+        }
+        ArgumentUtils.checkErrorRate(alpha);
+        return create(mean, variance, n, alpha);
+    }
+
+    /**
+     * Create a confidence interval from an independent sample from an unknown 
normally
+     * distributed population with the given error rate.
+     *
+     * @param mean Sample mean.
+     * @param variance Unbiased sample variance.
+     * @param n Sample size.
+     * @param alpha Desired error rate.
+     * @return Confidence interval
+     */
+    abstract Interval create(double mean, double variance, long n, double 
alpha);
+}
diff --git 
a/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/NormalConfidenceIntervalTest.java
 
b/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/NormalConfidenceIntervalTest.java
new file mode 100644
index 0000000..7efa61b
--- /dev/null
+++ 
b/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/NormalConfidenceIntervalTest.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.interval;
+
+import java.util.stream.Stream;
+import java.util.stream.Stream.Builder;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.EnumSource;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test cases for {@link NormalConfidenceInterval}.
+ */
+class NormalConfidenceIntervalTest {
+    @ParameterizedTest
+    @EnumSource
+    void testInvalidArgumentsThrow(NormalConfidenceInterval method) {
+        double mean = 0.1;
+        double variance = 1.23;
+        int n = 42;
+        double alpha = 0.05;
+        Assertions.assertDoesNotThrow(() -> method.fromErrorRate(mean, 
variance, n, alpha));
+        // n < 2
+        Assertions.assertDoesNotThrow(() -> method.fromErrorRate(mean, 
variance, 2, alpha));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, 1, alpha));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, 0, alpha));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, -1, alpha));
+        // alpha not in (0, 1)
+        Assertions.assertDoesNotThrow(() -> method.fromErrorRate(mean, 
variance, n, Math.nextUp(0.0)));
+        Assertions.assertDoesNotThrow(() -> method.fromErrorRate(mean, 
variance, n, Math.nextDown(1.0)));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, n, 0.0));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, n, 1.0));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, n, -0.01));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, n, 1.01));
+        Assertions.assertThrows(IllegalArgumentException.class, () -> 
method.fromErrorRate(mean, variance, n, Double.NaN));
+    }
+
+    @ParameterizedTest
+    @MethodSource()
+    void testInterval(NormalConfidenceInterval method, double mean, double 
variance, int n, double alpha,
+            double lower, double upper, double relativeError) {
+        final Interval i = method.fromErrorRate(mean, variance, n, alpha);
+        Assertions.assertEquals(lower, i.getLowerBound(), lower * 
relativeError, "lower");
+        Assertions.assertEquals(upper, i.getUpperBound(), upper * 
relativeError, "upper");
+    }
+
+    static Stream<Arguments> testInterval() {
+        final Builder<Arguments> builder = Stream.builder();
+
+        // mean cases generated using R 4.4.3, e.g.
+        // options(digits=17)
+        // x -> c(1, 2, 3, 4)
+        // mean(x); var(x); length(x)
+        // t.test(x, conf.level=0.95)$conf.int
+        // Data generated in r using random numbers, e.g.
+        // x = runif(100); x = rnorm(50, mean=3, sd=2)
+        NormalConfidenceInterval method;
+        method = NormalConfidenceInterval.MEAN;
+        add(builder, method, 2.5, 1.6666666666666667, 4, 0.05, 
0.44573974323947924, 4.55426025676052060, 1e-14);
+        add(builder, method, 0.5263914421340451, 0.079384303412544904, 100, 
0.05, 0.47048569257011025,
+            0.58229719169798000, 1e-14);
+        add(builder, method, 2.9535381946732131, 5.2628380291790835, 50, 0.1, 
2.4096097871064539, 3.4974666022399732,
+            1e-14);
+
+        // variance cases manually computed in R 4.4.3 (data x as above) e.g.
+        // alpha=0.05; n=length(x); v=var(x); (n-1)*v/qchisq(alpha/2, n-1, 
lower.tail=F); (n-1)*v/qchisq(alpha/2, n-1)
+        method = NormalConfidenceInterval.VARIANCE;
+        add(builder, method, 2.5, 1.6666666666666667, 4, 0.05, 
0.53485067734936409, 23.170107980137484, 1e-14);
+        add(builder, method, 0.5263914421340451, 0.079384303412544904, 100, 
0.05, 0.061197043596933121,
+            0.10712827588348012, 1e-14);
+        add(builder, method, 2.9535381946732131, 5.2628380291790835, 50, 0.1, 
3.887312567406342, 7.6002576083181186,
+            1e-14);
+
+        // Approximate formula for asymptotic distributions at large n uses z 
critical value
+        // from a normal distribution, here z_{0.025} = 1.96
+        final double z = 1.96;
+        final double mean = 1.23;
+        final double variance = 3.45;
+        final int n = 100_000;
+        double dist = z * Math.sqrt(variance / n);
+        add(builder, NormalConfidenceInterval.MEAN, mean, variance, n, 0.05, 
mean - dist, mean + dist, 1e-6);
+        dist = z * Math.sqrt(2.0 / n) * variance;
+        add(builder, NormalConfidenceInterval.VARIANCE, mean, variance, n, 
0.05, variance - dist, variance + dist,
+            1e-4);
+
+        return builder.build();
+    }
+
+    private static void add(Builder<Arguments> builder, 
NormalConfidenceInterval method,
+            double mean, double variance, int n, double alpha,
+            double lower, double upper, double relativeError) {
+        builder.accept(Arguments.of(method, mean, variance, n, alpha, lower, 
upper, relativeError));
+    }
+}
diff --git 
a/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/UserGuideTest.java
 
b/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/UserGuideTest.java
index d7b4d01..a0aa990 100644
--- 
a/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/UserGuideTest.java
+++ 
b/commons-statistics-interval/src/test/java/org/apache/commons/statistics/interval/UserGuideTest.java
@@ -17,6 +17,9 @@
 
 package org.apache.commons.statistics.interval;
 
+import java.util.EnumSet;
+import org.apache.commons.statistics.descriptive.DoubleStatistics;
+import org.apache.commons.statistics.descriptive.Statistic;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
@@ -41,6 +44,32 @@ class UserGuideTest {
         assertInterval(method.fromErrorRate(10000, 5000, alpha), 0.49020, 
0.50980, 1e-5);
     }
 
+    @Test
+    void testInterval2() {
+        // Results generated using R 4.4.3, e.g.
+        // options(digits=3)
+        // rnorm(15, 1.45, 0.1)
+        // Create data using the rounded sample:
+        // x = c(1.47, 1.40, 1.55, 1.44, 1.41, 1.38, 1.53, 1.42, 1.55, 1.55, 
1.31, 1.37, 1.53, 1.47, 1.51)
+        // options(digits=17)
+        // mean(x); var(x); length(x)
+        // t.test(x, conf.level=0.95)$conf.int
+        double[] x = {1.47, 1.40, 1.55, 1.44, 1.41, 1.38, 1.53, 1.42, 1.55, 
1.55, 1.31, 1.37, 1.53, 1.47, 1.51};
+        DoubleStatistics stats = 
DoubleStatistics.of(EnumSet.of(Statistic.MEAN, Statistic.VARIANCE), x);
+
+        double mean = stats.getAsDouble(Statistic.MEAN);
+        double variance = stats.getAsDouble(Statistic.VARIANCE);
+        long n = stats.getCount();
+        double alpha = 0.05;
+
+        Assertions.assertEquals(1.46, mean, 1e-2);
+        Assertions.assertEquals(0.0058, variance, 1e-4);
+
+        Interval interval = NormalConfidenceInterval.MEAN.fromErrorRate(mean, 
variance, n, alpha);
+        Assertions.assertEquals(1.4170, interval.getLowerBound(), 1e-4);
+        Assertions.assertEquals(1.5017, interval.getUpperBound(), 1e-4);
+    }
+
     private static void assertInterval(Interval interval, double lower, double 
upper, double relError) {
         Assertions.assertEquals(lower, interval.getLowerBound(), lower * 
relError, "lower");
         Assertions.assertEquals(upper, interval.getUpperBound(), upper * 
relError, "upper");
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index d837c90..b35754c 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -56,6 +56,10 @@ If the output is not quite correct, check for invisible 
trailing spaces!
     <release version="1.2" date="TBD" description="
 New features, updates and bug fixes (requires Java 8).
 ">
+      <action dev="aherbert" type="add" issue="STATISTICS-91">
+        "NormalConfidenceInterval": Support confidence intervals for a normally
+        distributed population.
+      </action>
       <action dev="aherbert" type="add" issue="STATISTICS-6">
         Add a commons-statistics-interval module for statistical intervals. 
This ports and
         updates functionality in org.apache.commons.math4.stat.interval.
diff --git a/src/conf/checkstyle/checkstyle-suppressions.xml 
b/src/conf/checkstyle/checkstyle-suppressions.xml
index 7e47f14..1da396b 100644
--- a/src/conf/checkstyle/checkstyle-suppressions.xml
+++ b/src/conf/checkstyle/checkstyle-suppressions.xml
@@ -41,6 +41,7 @@
   <suppress checks="ParameterNumber" 
files=".*[/\\]WilcoxonSignedRankTestTest.java" />
   <suppress checks="ParameterNumber" 
files=".*[/\\]UnconditionedExactTestTest.java" />
   <suppress checks="ParameterNumber" 
files=".*[/\\]BinomialConfidenceIntervalTest.java" />
+  <suppress checks="ParameterNumber" 
files=".*[/\\]NormalConfidenceIntervalTest.java" />
   <suppress checks="MethodLength" 
files=".*[/\\]WilcoxonSignedRankTestTest.java" />
   <suppress checks="IllegalCatch" files=".*[/\\]TestHelper.java" 
lines="295-410" />
   <suppress checks="IllegalCatch" files=".*[/\\]BaseStatisticTest.java" 
lines="280-400" />
diff --git a/src/site/xdoc/userguide/index.xml 
b/src/site/xdoc/userguide/index.xml
index cff4d81..cc6a08c 100644
--- a/src/site/xdoc/userguide/index.xml
+++ b/src/site/xdoc/userguide/index.xml
@@ -763,6 +763,35 @@ interval.getUpperBound();   // 0.76341
 method.fromErrorRate(100, 50, alpha);       // 0.40383, 0.59617
 method.fromErrorRate(1000, 500, alpha);     // 0.46907, 0.53093
 method.fromErrorRate(10000, 5000, alpha);   // 0.49020, 0.50980
+</source>
+      <p>
+        The <code>NormalConfidenceInterval</code> enumeration provides methods
+        to create a confidence interval for a normally distributed population.
+        Intervals can be created for the mean or the variance from a sample of
+        the population.
+      </p>
+      <p>
+        The following example demonstrates how to generate a 95% confidence 
interval
+        for the mean given a sample. The mean and variance of the sample are
+        required for the interval; here they are generated using the 
descriptive
+        statistics API.
+      </p>
+<source class="prettyprint">
+double[] x = {
+    1.47, 1.40, 1.55, 1.44, 1.41,
+    1.38, 1.53, 1.42, 1.55, 1.55,
+    1.31, 1.37, 1.53, 1.47, 1.51
+};
+DoubleStatistics stats = DoubleStatistics.of(EnumSet.of(Statistic.MEAN, 
Statistic.VARIANCE), x);
+
+double mean = stats.getAsDouble(Statistic.MEAN);          // 1.46
+double variance = stats.getAsDouble(Statistic.VARIANCE);  // 0.0058
+long n = stats.getCount();                                // 15
+double alpha = 0.05;
+
+Interval interval = NormalConfidenceInterval.MEAN.fromErrorRate(mean, 
variance, n, alpha);
+interval.getLowerBound();   // 1.4170
+interval.getUpperBound();   // 1.5017
 </source>
     </section>
     <section name="Ranking" id="ranking">

Reply via email to