This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit cd24fa9ec02e712794360f81b6753cc9638a9c10 Author: Marko Malenic <mmalen...@gmail.com> AuthorDate: Mon Feb 22 09:42:12 2021 +1100 STATISTICS-27: Add truncated normal distribution. --- .../distribution/DistributionException.java | 4 + .../distribution/TruncatedNormalDistribution.java | 182 ++++++++++++++++ .../TruncatedNormalDistributionTest.java | 232 +++++++++++++++++++++ 3 files changed, 418 insertions(+) diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DistributionException.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DistributionException.java index 447fee2..8aa7d82 100644 --- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DistributionException.java +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/DistributionException.java @@ -28,10 +28,14 @@ class DistributionException extends IllegalArgumentException { static final String TOO_SMALL = "%s < %s"; /** Error message for "out of range" condition. */ static final String OUT_OF_RANGE = "Number %s is out of range [%s, %s]"; + /** Error message for "invalid range" condition. */ + static final String INVALID_RANGE = "Lower bound %s is not below the upper bound %s"; /** Error message for "invalid probability" condition. */ static final String INVALID_PROBABILITY = "Not a probability: %s is out of range [0, 1]"; /** Error message for "out of range" condition. */ static final String NEGATIVE = "Number %s is negative"; + /** Error message for "not strictly positive" condition. */ + static final String NOT_STRICTLY_POSITIVE = "Number %s is not greater than 0"; /** Error message for "mismatch" condition. */ static final String MISMATCH = "Expected %s but was %s"; diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TruncatedNormalDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TruncatedNormalDistribution.java new file mode 100644 index 0000000..935b63a --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TruncatedNormalDistribution.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +/** + * Implementation of the truncated normal distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Truncated_normal_distribution"> + * Truncated normal distribution (Wikipedia)</a> + */ +public class TruncatedNormalDistribution extends AbstractContinuousDistribution { + /** Mean of parent normal distribution. */ + private final double parentMean; + /** Standard deviation of parent normal distribution. */ + private final double parentSd; + /** Mean of this distribution. */ + private final double mean; + /** Variance of this distribution. */ + private final double variance; + /** Lower bound of this distribution. */ + private final double lower; + /** Upper bound of this distribution. */ + private final double upper; + + /** A standard normal distribution used for calculations. */ + private final NormalDistribution standardNormal; + /** Stored value of @{code standardNormal.cumulativeProbability((lower - mean) / sd)} for faster computations. */ + private final double cdfAlpha; + /** + * Stored value of @{code standardNormal.cumulativeProbability((upper - mean) / sd) - cdfAlpha} + * for faster computations. + */ + private final double cdfDelta; + + /** + * Creates a truncated normal distribution. + * Note that the {@code mean} and {@code sd} is of the parent normal distribution, + * and not the true mean and standard deviation of the truncated normal distribution. + * + * @param mean mean for this distribution. + * @param sd standard deviation for this distribution. + * @param lower lower bound (inclusive) of the distribution, can be {@link Double#NEGATIVE_INFINITY}. + * @param upper upper bound (inclusive) of the distribution, can be {@link Double#POSITIVE_INFINITY}. + * @throws IllegalArgumentException if {@code sd <= 0} or if {@code upper <= lower}. + */ + public TruncatedNormalDistribution(double mean, double sd, double lower, double upper) { + if (sd <= 0) { + throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, sd); + } + if (upper <= lower) { + throw new DistributionException(DistributionException.INVALID_RANGE, lower, upper); + } + + this.lower = lower; + this.upper = upper; + + parentMean = mean; + parentSd = sd; + standardNormal = new NormalDistribution(0, 1); + + final double alpha = (lower - mean) / sd; + final double beta = (upper - mean) / sd; + + final double cdfBeta = standardNormal.cumulativeProbability(beta); + cdfAlpha = standardNormal.cumulativeProbability(alpha); + cdfDelta = cdfBeta - cdfAlpha; + + // Calculation of variance and mean. + final double pdfAlpha = standardNormal.density(alpha); + final double pdfBeta = standardNormal.density(beta); + final double pdfCdfDelta = (pdfAlpha - pdfBeta) / cdfDelta; + final double alphaBetaDelta = (alpha * pdfAlpha - beta * pdfBeta) / cdfDelta; + + if (lower == Double.NEGATIVE_INFINITY) { + if (upper == Double.POSITIVE_INFINITY) { + // No truncation + this.mean = mean; + variance = sd * sd; + } else { + // One-sided lower tail truncation + final double betaRatio = pdfBeta / cdfBeta; + this.mean = mean - sd * betaRatio; + variance = sd * sd * (1 - beta * betaRatio - betaRatio * betaRatio); + } + } else { + if (upper == Double.POSITIVE_INFINITY) { + // One-sided upper tail truncation + final double alphaRatio = pdfAlpha / cdfDelta; + this.mean = mean + sd * alphaRatio; + variance = sd * sd * (1 + alpha * alphaRatio - alphaRatio * alphaRatio); + } else { + // Two-sided truncation + this.mean = mean + pdfCdfDelta * parentSd; + variance = sd * sd * (1 + alphaBetaDelta - pdfCdfDelta * pdfCdfDelta); + } + } + } + + /** {@inheritDoc} */ + @Override + public double density(double x) { + if (x < lower || x > upper) { + return 0; + } + return standardNormal.density((x - parentMean) / parentSd) / (parentSd * cdfDelta); + } + + /** {@inheritDoc} */ + @Override + public double cumulativeProbability(double x) { + if (x <= lower) { + return 0; + } else if (x >= upper) { + return 1; + } + return (standardNormal.cumulativeProbability((x - parentMean) / parentSd) - cdfAlpha) / cdfDelta; + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) { + if (p < 0 || p > 1) { + throw new DistributionException(DistributionException.INVALID_PROBABILITY, p); + } + return standardNormal.inverseCumulativeProbability(cdfAlpha + p * cdfDelta) * parentSd + parentMean; + } + + /** + * {@inheritDoc} + * + * Represents the true mean of the truncated normal distribution rather + * than the parent normal distribution mean. + */ + @Override + public double getMean() { + return mean; + } + + /** + * {@inheritDoc} + * + * Represents the true variance of the truncated normal distribution rather + * than the parent normal distribution variance. + */ + @Override + public double getVariance() { + return variance; + } + + /** {@inheritDoc} */ + @Override + public double getSupportLowerBound() { + return lower; + } + + /** {@inheritDoc} */ + @Override + public double getSupportUpperBound() { + return upper; + } + + /** {@inheritDoc} */ + @Override + public boolean isSupportConnected() { + return true; + } +} diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TruncatedNormalDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TruncatedNormalDistributionTest.java new file mode 100644 index 0000000..ce49e46 --- /dev/null +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TruncatedNormalDistributionTest.java @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Test class for {@link TruncatedNormalDistribution}. + * All test values were computed using Python with SciPy v1.6.0. + */ +class TruncatedNormalDistributionTest extends ContinuousDistributionAbstractTest { + /** Distribution to test with. */ + private TruncatedNormalDistribution distribution = new TruncatedNormalDistribution(1.9, 1.3, -1.1, 3.4); + /** Percentiles to test with. */ + private double[] ppfValues = new double[]{0, 0.0001, 0.001, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, + 0.975, 0.99, 0.999, 0.9999, 1}; + /** Expected cumulative values for percentiles. */ + private double[] cdfValues = new double[]{-1.1, -1.09597275767544, -1.0609616183922, -0.79283350106842, + -0.505331829887808, -0.192170173599874, 0.21173317261645, + 0.925791281910463, 1.71399518338879, 2.43413009451536, 2.94473113856785, + 3.15310057075828, 3.27036798398733, 3.34641874981679, 3.39452729074341, + 3.39945153287941, 3.4}; + /** Expected density values for percentiles. */ + private double[] pdfValues = new double[]{0.0247422752302618, 0.0249196707321102, 0.0265057408263321, + 0.0415071096500185, 0.0640403254340905, 0.0971457789636, + 0.152622492901864, 0.267853863255995, 0.35107475879338, 0.325977522502844, + 0.25680502248913, 0.222886115806507, 0.203494915087054, 0.190997946666992, + 0.183167918885238, 0.182370706542209, 0.182281965373914}; + /** Expected distribution mean. */ + private double mean = 1.63375792365723; + /** Expected distribution variance. */ + private double variance = 1.03158703914439; + + /** Overrides tolerance and sets up distribution. */ + @BeforeEach + void setUp() { + setTolerance(1e-7); + super.setUp(); + } + + /** {@inheritDoc} */ + @Override + public ContinuousDistribution makeDistribution() { + return distribution; + } + + /** {@inheritDoc} */ + @Override + public double[] makeCumulativeTestPoints() { + return cdfValues; + } + + /** {@inheritDoc} */ + @Override + public double[] makeCumulativeTestValues() { + return ppfValues; + } + + /** {@inheritDoc} */ + @Override + public double[] makeDensityTestValues() { + return pdfValues; + } + + /** + * Configures new test values and runs relevant tests in this class and {@link ContinuousDistributionAbstractTest}. + * + * @param testDistribution distribution to test with. + * @param expectedPpfValues expected percentiles to test with. + * @param expectedCdfValues expected cumulative values for percentiles. + * @param expectedPdfValues expected density values for percentiles. + * @param expectedMean expected mean. + * @param expectedVariance expected variance. + */ + private void testAdditionalDistribution( + TruncatedNormalDistribution testDistribution, + double[] expectedPpfValues, + double[] expectedCdfValues, + double[] expectedPdfValues, + double expectedMean, + double expectedVariance) { + this.distribution = testDistribution; + this.ppfValues = expectedPpfValues; + this.cdfValues = expectedCdfValues; + this.pdfValues = expectedPdfValues; + this.mean = expectedMean; + this.variance = expectedVariance; + + setUp(); + + testMoments(); + + testConsistency(); + testSampler(); + testOutsideSupport(); + testDensities(); + testLogDensities(); + testOutsideSupport(); + testInverseCumulativeProbabilities(); + testDensityIntegrals(); + testCumulativeProbabilities(); + testIsSupportConnected(); + testPrecondition1(); + testPrecondition2(); + testPrecondition3(); + } + + /** Test a one-sided truncation with a lower tail. */ + @Test + void testOneSidedLowerTail() { + testAdditionalDistribution( + new TruncatedNormalDistribution(12, 2.4, Double.NEGATIVE_INFINITY, 7.1), + new double[]{0, 0.00108276414971883, 0.00433032247708514, 0.0155754809421998, 0.0504271331622245, + 0.147106879016387, 0.387159643321778, 0.920668099879139, 1}, + new double[]{Double.NEGATIVE_INFINITY, 2.20249292901062, 3.00511196424565, 3.80773099948069, + 4.61035003471573, 5.41296906995077, 6.21558810518581, 7.01820714042084, 7.1}, + new double[]{0, 0.00194181137319567, 0.00719165311538403, 0.0238165586714952, 0.0705273999981105, + 0.186752027463317, 0.442182309739316, 0.936194292830215, 1.00423817618302}, + 6.21558810518581, + 0.644197315721623); + } + + /** Test a one-sided truncation with an upper tail. */ + @Test + void testOneSidedUpperTail() { + testAdditionalDistribution( + new TruncatedNormalDistribution(-9.6, 17, -15, Double.POSITIVE_INFINITY), + new double[]{0, 0.164539974698729, 0.564800349576255, 0.836443289017693, 0.957226746540945, + 0.992394081771774, 0.999093968560336, 0.999928403010774, 1}, + new double[]{-15, -10.5314720401464, 0.723583450712814, 11.978638941572, 23.2336944324312, + 34.4887499232902, 45.7438054141485, 56.9988609050074, Double.POSITIVE_INFINITY}, + new double[]{0.035721742043989, 0.0375137766818179, 0.0312438063187719, 0.0167870518464031, + 0.00581865051705663, 0.00130109036611494, 0.000187685186297558, 1.74658560715427e-05, 0}, + 0.723583450712812, + 126.676274102319); + } + + /** Test no truncation. */ + @Test + void testNoTruncation() { + testAdditionalDistribution( + new TruncatedNormalDistribution(3, 1.1, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), + new double[]{0, 2.86651571879193e-07, 3.16712418331199e-05, 0.00134989803163009, 0.0227501319481792, + 0.158655253931457, 0.5, 0.841344746068543, 0.977249868051821, 0.99865010196837, + 0.999968328758167, 0.999999713348428, 1}, + new double[]{Double.NEGATIVE_INFINITY, -2.5, -1.4, -0.300000000000001, 0.799999999999999, 1.9, 3, + 4.1, 5.2, 6.3, 7.4, 8.49999999996719, Double.POSITIVE_INFINITY}, + new double[]{0, 1.35156319521299e-06, 0.000121663841604441, 0.00402895310176182, 0.0490826968301709, + 0.219973385926494, 0.362674800364939, 0.219973385926494, 0.0490826968301709, + 0.00402895310176184, 0.000121663841604441, 1.35156319541454e-06, 0}, + 3, + 1.21); + } + + /** Test a truncation range that is completely below the mean. */ + @Test + void testLowerTailOnly() { + testAdditionalDistribution( + new TruncatedNormalDistribution(0, 1, Double.NEGATIVE_INFINITY, -5), + new double[]{0, 0.00196196451357246, 0.00597491488512203, 0.0176247203066899, 0.0503595643590926, + 0.139390045971621, 0.373761183487683, 0.970943041215359, 1}, + new double[]{Double.NEGATIVE_INFINITY, -6.09061174025149, -5.90979018562636, -5.72896863100123, + -5.54814707637611, -5.36732552175098, -5.18650396712585, -5.00568241250073, -5}, + new double[]{0, 0.0122562922051934, 0.0362705138555484, 0.103883943928261, 0.287967362544455, + 0.772570689127439, 2.00601097433085, 5.04113700754108, 5.18650396712585}, + -5.18650396712585, + 0.0326964346170475); + } + + /** Test a truncation range that is completely above the mean. */ + @Test + void testUpperTailOnly() { + testAdditionalDistribution( + new TruncatedNormalDistribution(0, 1, 5, Double.POSITIVE_INFINITY), + new double[]{0, 0.0290569590230917, 0.626238816822898, 0.860609954247549, 0.949640435971243, + 0.982375279755296, 0.994025085266282, 0.998038035551444, 1}, + new double[]{5, 5.00568241254803, 5.18650396728068, 5.36732552203467, 5.54814707752324, + 5.72896863159791, 5.90979018980065, 6.09061174555624, Double.POSITIVE_INFINITY}, + new double[]{5.18650396712585, 5.04113700634745, 2.00601097272001, 0.772570687951075, + 0.287967360711704, 0.103883943573147, 0.0362705129607846, 0.0122562918092027, 0}, + 5.18650396712585, + 0.0326964346170475); + } + + /** Test a narrow truncation range. */ + @Test + void testNarrowTruncatedRange() { + testAdditionalDistribution( + new TruncatedNormalDistribution(7.1, 9.9, 7.0999999, 7.1000001), + new double[]{0, 0.5, 1}, + new double[]{7.0999999, 7.1, 7.1000001}, + new double[]{5000000.00238838, 5000000.00238838, 5000000.00238838}, + 7.1, + 1.13584123966337e-07); + } + + /** Test mean and variance moments. */ + @Test + void testMoments() { + Assertions.assertEquals(mean, distribution.getMean(), getTolerance()); + Assertions.assertEquals(variance, distribution.getVariance(), getTolerance()); + } + + /** Test constructor precondition when the standard deviation is less than or equal to 0. */ + @Test + void testConstructorSdPrecondition() { + Assertions.assertThrows(DistributionException.class, () -> new TruncatedNormalDistribution(1, 0, -1, 1)); + } + + /** Test constructor precondition when the lower bound is greater than the upper bound. */ + @Test + void testConstructorBoundsPrecondition() { + Assertions.assertThrows(DistributionException.class, () -> new TruncatedNormalDistribution(1, 1, 1, -1)); + } +}