This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 52395c9739b75d2f70a3de56f6f4d5026e5e93bd Author: Alex Herbert <aherb...@apache.org> AuthorDate: Wed Aug 14 16:30:15 2024 +0100 STATISTICS-87: Add a folded normal distribution --- .../distribution/FoldedNormalDistribution.java | 373 +++++++++++++++++++++ .../distribution/FoldedNormalDistributionTest.java | 108 ++++++ .../distribution/test.foldednormal.1.properties | 48 +++ .../distribution/test.foldednormal.2.properties | 53 +++ .../distribution/test.foldednormal.3.properties | 52 +++ .../distribution/test.foldednormal.4.properties | 51 +++ .../distribution/test.foldednormal.5.properties | 49 +++ .../distribution/DistributionsApplication.java | 1 + .../examples/distribution/FoldedNormalCommand.java | 156 +++++++++ src/changes/changes.xml | 4 + src/conf/checkstyle/checkstyle-suppressions.xml | 2 + src/conf/pmd/pmd-ruleset.xml | 7 + 12 files changed, 904 insertions(+) diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/FoldedNormalDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/FoldedNormalDistribution.java new file mode 100644 index 0000000..58acbbc --- /dev/null +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/FoldedNormalDistribution.java @@ -0,0 +1,373 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import org.apache.commons.numbers.gamma.Erf; +import org.apache.commons.numbers.gamma.ErfDifference; +import org.apache.commons.numbers.gamma.Erfc; +import org.apache.commons.numbers.gamma.InverseErf; +import org.apache.commons.numbers.gamma.InverseErfc; +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.distribution.GaussianSampler; +import org.apache.commons.rng.sampling.distribution.SharedStateContinuousSampler; +import org.apache.commons.rng.sampling.distribution.ZigguratSampler; + +/** + * Implementation of the folded normal distribution. + * + * <p>Given a normally distributed random variable \( X \) with mean \( \mu \) and variance + * \( \sigma^2 \), the random variable \( Y = |X| \) has a folded normal distribution. This is + * equivalent to not recording the sign from a normally distributed random variable. + * + * <p>The probability density function of \( X \) is: + * + * <p>\[ f(x; \mu, \sigma) = \frac 1 {\sigma\sqrt{2\pi}} e^{-{\frac 1 2}\left( \frac{x-\mu}{\sigma} \right)^2 } + + * \frac 1 {\sigma\sqrt{2\pi}} e^{-{\frac 1 2}\left( \frac{x+\mu}{\sigma} \right)^2 }\] + * + * <p>for \( \mu \) the location, + * \( \sigma > 0 \) the scale, and + * \( x \in [0, \infty) \). + * + * <p>If the location \( \mu \) is 0 this reduces to the half-normal distribution. + * + * @see <a href="https://en.wikipedia.org/wiki/Folded_normal_distribution">Folded normal distribution (Wikipedia)</a> + * @see <a href="https://en.wikipedia.org/wiki/Half-normal_distribution">Half-normal distribution (Wikipedia)</a> + * @since 1.1 + */ +public abstract class FoldedNormalDistribution extends AbstractContinuousDistribution { + /** Normalisation constant sqrt(2 / pi). */ + private static final double ROOT_TWO_DIV_PI = 0.7978845608028654; + + /** The scale. */ + final double sigma; + /** + * The scale multiplied by sqrt(2). + * This is used to avoid a double division when computing the value passed to the + * error function: + * <pre> + * ((x - u) / sd) / sqrt(2) == (x - u) / (sd * sqrt(2)). + * </pre> + * <p>Note: Implementations may first normalise x and then divide by sqrt(2) resulting + * in differences due to rounding error that show increasingly large relative + * differences as the error function computes close to 0 in the extreme tail. + */ + final double sigmaSqrt2; + /** + * The scale multiplied by sqrt(2 pi). Computed to high precision. + */ + final double sigmaSqrt2pi; + + /** + * Regular implementation of the folded normal distribution. + */ + private static class RegularFoldedNormalDistribution extends FoldedNormalDistribution { + /** The location. */ + private final double mu; + /** Cached value for inverse probability function. */ + private final double mean; + /** Cached value for inverse probability function. */ + private final double variance; + + /** + * @param mu Location parameter. + * @param sigma Scale parameter. + */ + RegularFoldedNormalDistribution(double mu, double sigma) { + super(sigma); + this.mu = mu; + + final double a = mu / sigmaSqrt2; + mean = sigma * ROOT_TWO_DIV_PI * Math.exp(-a * a) + mu * Erf.value(a); + this.variance = mu * mu + sigma * sigma - mean * mean; + } + + @Override + public double getMu() { + return mu; + } + + @Override + public double density(double x) { + if (x < 0) { + return 0; + } + final double vm = (x - mu) / sigma; + final double vp = (x + mu) / sigma; + return (ExtendedPrecision.expmhxx(vm) + ExtendedPrecision.expmhxx(vp)) / sigmaSqrt2pi; + } + + @Override + public double probability(double x0, + double x1) { + if (x0 > x1) { + throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, + x0, x1); + } + if (x0 <= 0) { + return cumulativeProbability(x1); + } + // Assumes x1 >= x0 && x0 > 0 + final double v0m = (x0 - mu) / sigmaSqrt2; + final double v1m = (x1 - mu) / sigmaSqrt2; + final double v0p = (x0 + mu) / sigmaSqrt2; + final double v1p = (x1 + mu) / sigmaSqrt2; + return 0.5 * (ErfDifference.value(v0m, v1m) + ErfDifference.value(v0p, v1p)); + } + + @Override + public double cumulativeProbability(double x) { + if (x <= 0) { + return 0; + } + return 0.5 * (Erf.value((x - mu) / sigmaSqrt2) + Erf.value((x + mu) / sigmaSqrt2)); + } + + @Override + public double survivalProbability(double x) { + if (x <= 0) { + return 1; + } + return 0.5 * (Erfc.value((x - mu) / sigmaSqrt2) + Erfc.value((x + mu) / sigmaSqrt2)); + } + + @Override + public double getMean() { + return mean; + } + + @Override + public double getVariance() { + return variance; + } + + @Override + public Sampler createSampler(UniformRandomProvider rng) { + // Return the absolute of a Gaussian distribution sampler. + final SharedStateContinuousSampler s = + GaussianSampler.of(ZigguratSampler.NormalizedGaussian.of(rng), mu, sigma); + return () -> Math.abs(s.sample()); + } + } + + /** + * Specialisation for the half-normal distribution. + * + * <p>Elimination of the {@code mu} location parameter simplifies the probability + * functions and allows computation of the log density and inverse CDF/SF. + */ + private static class HalfNormalDistribution extends FoldedNormalDistribution { + /** Variance constant (1 - 2/pi). */ + private static final double VAR = 0.363380227632418617567; + /** ln(2). */ + private static final double LN_2 = 0.6931471805599453094172; + /** 0.5 * ln(2 * pi). Computed to 25-digits precision. */ + private static final double HALF_LOG_TWO_PI = 0.9189385332046727417803297; + /** The value of {@code log(sigma) + 0.5 * log(2*PI)} stored for faster computation. */ + private final double logSigmaPlusHalfLog2Pi; + + /** + * @param sigma Scale parameter. + */ + HalfNormalDistribution(double sigma) { + super(sigma); + logSigmaPlusHalfLog2Pi = Math.log(sigma) + HALF_LOG_TWO_PI; + } + + @Override + public double getMu() { + return 0; + } + + @Override + public double density(double x) { + if (x < 0) { + return 0; + } + return 2 * ExtendedPrecision.expmhxx(x / sigma) / sigmaSqrt2pi; + } + + @Override + public double probability(double x0, + double x1) { + if (x0 > x1) { + throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GT_HIGH, + x0, x1); + } + if (x0 <= 0) { + return cumulativeProbability(x1); + } + // Assumes x1 >= x0 && x0 > 0 + return ErfDifference.value(x0 / sigmaSqrt2, x1 / sigmaSqrt2); + } + + @Override + public double logDensity(double x) { + if (x < 0) { + return Double.NEGATIVE_INFINITY; + } + final double z = x / sigma; + return LN_2 - 0.5 * z * z - logSigmaPlusHalfLog2Pi; + } + + @Override + public double cumulativeProbability(double x) { + if (x <= 0) { + return 0; + } + return Erf.value(x / sigmaSqrt2); + } + + @Override + public double survivalProbability(double x) { + if (x <= 0) { + return 1; + } + return Erfc.value(x / sigmaSqrt2); + } + + @Override + public double inverseCumulativeProbability(double p) { + ArgumentUtils.checkProbability(p); + // Addition of 0.0 ensures 0.0 is returned for p=-0.0 + return 0.0 + sigmaSqrt2 * InverseErf.value(p); + } + + /** {@inheritDoc} */ + @Override + public double inverseSurvivalProbability(double p) { + ArgumentUtils.checkProbability(p); + return sigmaSqrt2 * InverseErfc.value(p); + } + + @Override + public double getMean() { + return sigma * ROOT_TWO_DIV_PI; + } + + @Override + public double getVariance() { + // sigma^2 - mean^2 + // sigma^2 - (sigma^2 * 2/pi) + return sigma * sigma * VAR; + } + + @Override + public Sampler createSampler(UniformRandomProvider rng) { + // Return the absolute of a Gaussian distribution sampler. + final SharedStateContinuousSampler s = ZigguratSampler.NormalizedGaussian.of(rng); + return () -> Math.abs(s.sample() * sigma); + } + } + + /** + * @param sigma Scale parameter. + */ + FoldedNormalDistribution(double sigma) { + this.sigma = sigma; + // Minimise rounding error by computing sqrt(2 * sigma * sigma) exactly. + // Compute using extended precision with care to avoid over/underflow. + sigmaSqrt2 = ExtendedPrecision.sqrt2xx(sigma); + // Compute sigma * sqrt(2 * pi) + sigmaSqrt2pi = ExtendedPrecision.xsqrt2pi(sigma); + } + + /** + * Creates a folded normal distribution. If the location {@code mu} is zero this is + * the half-normal distribution. + * + * @param mu Location parameter. + * @param sigma Scale parameter. + * @return the distribution + * @throws IllegalArgumentException if {@code sigma <= 0}. + */ + public static FoldedNormalDistribution of(double mu, + double sigma) { + if (sigma > 0) { + if (mu == 0) { + return new HalfNormalDistribution(sigma); + } + return new RegularFoldedNormalDistribution(mu, sigma); + } + // scale is zero, negative or nan + throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, sigma); + } + + /** + * Gets the location parameter \( \mu \) of this distribution. + * + * @return the mu parameter. + */ + public abstract double getMu(); + + /** + * Gets the scale parameter \( \sigma \) of this distribution. + * + * @return the sigma parameter. + */ + public double getSigma() { + return sigma; + } + + /** + * {@inheritDoc} + * + * + * <p>For location parameter \( \mu \) and scale parameter \( \sigma \), the mean is: + * + * <p>\[ \sigma \sqrt{ \frac 2 \pi } \exp \left( \frac{-u^2}{2\sigma^2} \right) + + * \mu \operatorname{erf} \left( \frac \mu {\sqrt{2\sigma^2}} \right) \] + * + * <p>where \( \operatorname{erf} \) is the error function. + */ + @Override + public abstract double getMean(); + + /** + * {@inheritDoc} + * + * <p>For location parameter \( \mu \), scale parameter \( \sigma \) and a distribution + * mean \( \mu_Y \), the variance is: + * + * <p>\[ \mu^2 + \sigma^2 - \mu_{Y}^2 \] + */ + @Override + public abstract double getVariance(); + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0. + * + * @return 0. + */ + @Override + public double getSupportLowerBound() { + return 0.0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity. + * + * @return {@linkplain Double#POSITIVE_INFINITY positive infinity}. + */ + @Override + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } +} diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/FoldedNormalDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/FoldedNormalDistributionTest.java new file mode 100644 index 0000000..dee604f --- /dev/null +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/FoldedNormalDistributionTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.statistics.distribution; + +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Test cases for {@link FoldedNormalDistribution}. + * Extends {@link BaseContinuousDistributionTest}. See javadoc of that class for details. + */ +class FoldedNormalDistributionTest extends BaseContinuousDistributionTest { + @Override + ContinuousDistribution makeDistribution(Object... parameters) { + final double mu = (Double) parameters[0]; + final double sigma = (Double) parameters[1]; + return FoldedNormalDistribution.of(mu, sigma); + } + + @Override + Object[][] makeInvalidParameters() { + return new Object[][] { + {0.0, 0.0}, + {0.0, -0.1} + }; + } + + @Override + String[] getParameterNames() { + return new String[] {"Mu", "Sigma"}; + } + + @Override + protected double getRelativeTolerance() { + return 5e-15; + } + + //-------------------- Additional test cases ------------------------------- + + /** + * Test the mean. This is performed using the folding together of two truncated + * normal distributions, with the truncation at the origin. + * + * <p>This test cross-validates the mean computation as the scipy reference + * implementation only supports a positive mu (and no sigma); and the R + * reference library VGAM does not provide mean computation. + */ + @ParameterizedTest + @MethodSource + void testMean(double mu, double sigma) { + // Expected mean is the weighted means of each truncated distribution; + // the mean of the distribution below the origin must be negated + final TruncatedNormalDistribution t1 = TruncatedNormalDistribution.of(mu, sigma, Double.NEGATIVE_INFINITY, 0); + final TruncatedNormalDistribution t2 = TruncatedNormalDistribution.of(mu, sigma, 0, Double.POSITIVE_INFINITY); + final NormalDistribution n = NormalDistribution.of(mu, sigma); + final double p1 = n.cumulativeProbability(0); + final double p2 = 1 - p1; + final double expected = p2 * t2.getMean() - p1 * t1.getMean(); + TestUtils.assertEquals(expected, FoldedNormalDistribution.of(mu, sigma).getMean(), + DoubleTolerances.relative(1e-14)); + } + + static Stream<Arguments> testMean() { + final Stream.Builder<Arguments> builder = Stream.builder(); + for (final double mu : new double[] {-3, -2, -1, 0, 1, 2, 3}) { + for (final double sigma : new double[] {0.75, 1, 1.5}) { + builder.add(Arguments.of(mu, sigma)); + } + } + return builder.build(); + } + + @Test + void testCumulativeProbabilityExtremes() { + // Use a small shape parameter so that we can exceed 40 * shape + testCumulativeProbability(FoldedNormalDistribution.of(1, 0.0001), + new double[] {0, 10}, + new double[] {0, 1.0}, + DoubleTolerances.equals()); + } + + @Test + void testSurvivalProbabilityExtremes() { + // Use a small shape parameter so that we can exceed 40 * shape + testSurvivalProbability(FoldedNormalDistribution.of(1, 0.0001), + new double[] {0, 10}, + new double[] {1.0, 0.0}, + DoubleTolerances.equals()); + } +} diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.1.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.1.properties new file mode 100644 index 0000000..6d25937 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.1.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Standard half-normal distribution +parameters = 0.0 1.0 + +# Computed using sqrt(2/pi) +mean = 0.797884560802865406 +# Computed using 1 - 2/pi +variance = 0.363380227632418618 +lower = 0 + +# Computed using R 4.4.1; library VGAM 1.1-11; foldnorm +cdf.points = \ + -1, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6 +cdf.values = \ + 0.000000000000000000 0.000000000000000000 0.197412651365847402 \ + 0.382924922548026070 0.546745295246263474 0.682689492137085852 \ + 0.788700452666289520 0.866385597462283830 0.954499736103641583 \ + 0.987580669348447682 0.997300203936739793 0.999534741841928920 \ + 0.999936657516333760 0.999993204653750523 0.999999426696856153 \ + 0.999999962020875044 0.999999998026824599 +sf.values = \ + 1.00000000000000000e+00 1.00000000000000000e+00 8.02587348634152598e-01 \ + 6.17075077451973875e-01 4.53254704753736470e-01 3.17310507862914093e-01 \ + 2.11299547333710480e-01 1.33614402537716143e-01 4.55002638963584241e-02 \ + 1.24193306515522697e-02 2.69979606326018915e-03 4.65258158071049871e-04 \ + 6.33424836662398487e-05 6.79534624946011966e-06 5.73303143758387824e-07 \ + 3.79791249317754424e-08 1.97317529007539618e-09 +pdf.values = \ + 0.00000000000000000e+00 7.97884560802865406e-01 7.73336233605698475e-01 \ + 7.04130653528599049e-01 6.02274864309608859e-01 4.83941449038286731e-01 \ + 3.65298170778043829e-01 2.59035191331783488e-01 1.07981933026376126e-01 \ + 3.50566009871370807e-02 8.86369682387601505e-03 1.74536539009152031e-03 \ + 2.67660451529770735e-04 3.19674822138109492e-05 2.97343902946859536e-06 \ + 2.15395200850865522e-07 1.21517656996465722e-08 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.2.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.2.properties new file mode 100644 index 0000000..1ec5b26 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.2.properties @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Scaled folded normal distribution +parameters = 0.0 0.75 + +# Computed using 0.75 * sqrt(2/pi) +mean = 0.5984134206021491 +# Computed using 0.75^2 * (1 - 2/pi) +variance = 0.20440137804323547 +lower = 0 + +# Computed using R 4.4.1; library VGAM 1.1-11; foldnorm +cdf.points = \ + -1, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6 +cdf.values = \ + 0.000000000000000000 0.000000000000000000 0.261117319636472667 \ + 0.495014924906154175 0.682689492137085852 0.817577560548264248 \ + 0.904419295454370609 0.954499736103641583 0.992339238864820450 \ + 0.999141879333606431 0.999936657516333760 0.999996938746526842 \ + 0.999999903573932647 0.999999998026824599 0.999999999973832043 \ + 0.999999999999775513 0.999999999999998668 +sf.values = \ + 1.00000000000000000e+00 1.00000000000000000e+00 7.38882680363527333e-01 \ + 5.04985075093845825e-01 3.17310507862914093e-01 1.82422439451735696e-01 \ + 9.55807045456294052e-02 4.55002638963584241e-02 7.66076113517947469e-03 \ + 8.58120666393674485e-04 6.33424836662398487e-05 3.06125347306212893e-06 \ + 9.64260673022826174e-08 1.97317529007539618e-09 2.61678493721059974e-11 \ + 2.24497625427110064e-13 1.24419211485435697e-15 +pdf.values = \ + 0.00000000000000000e+00 1.06384608107048728e+00 1.00635527384798174e+00 \ + 8.51861348059606005e-01 6.45255265384382271e-01 4.37360199135983008e-01 \ + 2.65272370113996458e-01 1.43975910701834825e-01 3.03892960634598447e-02 \ + 4.11274399010961352e-03 3.56880602039694314e-04 1.98561223216799738e-05 \ + 7.08347175605843793e-07 1.62023542661954285e-08 2.37624004996057561e-10 \ + 2.23451219471724615e-12 1.34727228894317145e-14 + +sf.relative = 1e-14 +sf.mapping.relative = 1e-14 +# The Java implementation does not use log(pdf(x)) but directly implements the log density +logpdf.relative = 2e-14 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.3.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.3.properties new file mode 100644 index 0000000..4225062 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.3.properties @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shifted folded normal distribution +parameters = 1.5 1.0 + +# Computed using scipy.stats.foldnorm 1.11 +mean = 1.5586135875252092 +variance = 0.8207236847817971 +lower = 0 + +cdf.points = \ + -1, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6 +cdf.values = \ + 0. , 0. , 0.06559061680303818, \ + 0.13590512198327787, 0.21440287972182354, 0.3023278734002108 , \ + 0.39831391108202174, 0.4986501019683699 , 0.6912298321949775 , \ + 0.8413130748267098 , 0.9331894010580172 , 0.9772495814002489 , \ + 0.9937903156846614 , 0.9986501009817823 , 0.9997673708808045 , \ + 0.999968328756887 , 0.9999966023268434 +sf.values = \ + 1.0000000000000000e+00, 1.0000000000000000e+00, \ + 9.3440938319696176e-01, 8.6409487801672213e-01, \ + 7.8559712027817641e-01, 6.9767212659978928e-01, \ + 6.0168608891797826e-01, 5.0134989803163010e-01, \ + 3.0877016780502242e-01, 1.5868692517329019e-01, \ + 6.6810598941982782e-02, 2.2750418599751077e-02, \ + 6.2096843153385999e-03, 1.3498990182177382e-03, \ + 2.3262911919553084e-04, 3.1671243112932409e-05, \ + 3.3976731566389709e-06 +pdf.values = \ + 0.0000000000000000e+00, 2.5903519133178349e-01, \ + 2.6892640421553343e-01, 2.9596169103233144e-01, \ + 3.3287708399047183e-01, 3.6959362725786804e-01, \ + 3.9576167930444034e-01, 4.0337412881337070e-01, \ + 3.5293800945934528e-01, 2.4210455474490825e-01, \ + 1.2953357940699864e-01, 5.3992453232702797e-02, \ + 1.7528408191168965e-02, 4.4318544878208573e-03, \ + 8.7268296200142161e-04, 1.3383023489960577e-04, \ + 1.5983741350337533e-05 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.4.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.4.properties new file mode 100644 index 0000000..5a73500 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.4.properties @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shifted and scaled folded normal distribution +parameters = 1.5 0.75 + +# Computed in R 4.4.1: u=1.5; s=0.75; +# m=s*sqrt(2/pi)*exp(-u*u/(2*s*s))+u*(1-2*pnorm(-u/s)) +# v=u*u+s*s-m*m +mean = 1.51273605392524457 +variance = 0.524129631154679476 +lower = 0 + +# Computed using R 4.4.1; library VGAM 1.1-11; foldnorm +cdf.points = \ + -1, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6 +cdf.values = \ + 0.0000000000000000000 0.0000000000000000000 0.0379750236441693673 \ + 0.0873808391582781008 0.1573053558998269430 0.2520634772137260726 \ + 0.3693184737917984961 0.4999683287581668800 0.7475059318263405084 \ + 0.9087887320610984476 0.9772498670652330910 0.9961696194193262466 \ + 0.9995709396666909718 0.9999683287581662139 0.9999984693732634211 \ + 0.9999999517869663235 0.9999999990134122996 +sf.values = \ + 1.00000000000000000e+00 1.00000000000000000e+00 9.62024976355830619e-01 \ + 9.12619160841721899e-01 8.42694644100173029e-01 7.47936522786273872e-01 \ + 6.30681526208201504e-01 5.00031671241833120e-01 2.52494068173659492e-01 \ + 9.12112679389014830e-02 2.27501329347668570e-02 3.83038058067365498e-03 \ + 4.29060333309086753e-04 3.16712418337420192e-05 1.53062673653328920e-06 \ + 4.82130336511464372e-08 9.86587645037705742e-10 +pdf.values = \ + 0.00000000000000000e+00 1.43975910701834825e-01 1.67598703848610864e-01 \ + 2.33874747599721405e-01 3.28536763908108487e-01 4.27987046024857820e-01 \ + 5.03817997792818772e-01 5.32101480836263518e-01 4.25940602090963905e-01 \ + 2.18680453741579267e-01 7.19879634520945444e-02 1.51946481505419121e-02 \ + 2.05637199617206669e-03 1.78440301026583522e-04 9.92806116086602978e-06 \ + 3.54173587802986430e-07 8.10117713309781680e-09 + +sf.relative = 1e-14 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.5.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.5.properties new file mode 100644 index 0000000..ef2e451 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.foldednormal.5.properties @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shifted and scaled folded normal distribution +parameters = -1.5 1.25 + +# Computed in R 4.4.1: u=1.5; s=0.75; +# m=s*sqrt(2/pi)*exp(-u*u/(2*s*s))+u*(1-2*pnorm(-u/s)) +# v=u*u+s*s-m*m +mean = 1.64025612679290766 +variance = 1.12205983851832869 +lower = 0 + +# Computed using R 4.4.1; library VGAM 1.1-11; foldnorm +cdf.points = \ + -1, 0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6 +cdf.values = \ + 0.0000000000000000000 0.0000000000000000000 0.0778985946976860077 \ + 0.1570561068838386420 0.2383227986371477813 0.3218281264414967202 \ + 0.4068368430473984221 0.4918024640754038446 0.6528666112798962029 \ + 0.7874574634786875027 0.8847712211881342270 0.9451690370586088852 \ + 0.9772444555079130568 0.9918016707472518956 0.9974447700253088467 \ + 0.9993128513444938799 0.9998408904232547467 +sf.values = \ + 1.000000000000000000000 1.000000000000000000000 0.922101405302313992252 \ + 0.842943893116161357959 0.761677201362852218658 0.678171873558503279789 \ + 0.593163156952601577920 0.508197535924596155432 0.347133388720103741587 \ + 0.212542536521312525055 0.115228778811865814613 0.054830962941391135634 \ + 0.022755544492086915431 0.008198329252748097484 0.002555229974691103009 \ + 0.000687148655506106374 0.000159109576745179146 +pdf.values = \ + 0.000000000000000000000 0.310697687973140734830 0.313358552123910616416 \ + 0.320489909952750673394 0.329739808954155111387 0.337808885453209140071 \ + 0.341213829457349848706 0.337069448557020479029 0.300948473509042635499 \ + 0.233660512770358069767 0.155838365530661382419 0.088843731924176369552 \ + 0.043212727187582490596 0.017918793275147129607 0.006332790149211550701 \ + 0.001907320022135873755 0.000489526404797298222 diff --git a/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/DistributionsApplication.java b/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/DistributionsApplication.java index 758d483..0fbfd27 100644 --- a/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/DistributionsApplication.java +++ b/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/DistributionsApplication.java @@ -74,6 +74,7 @@ public final class DistributionsApplication { ChiSquaredCommand.class, ExpCommand.class, FCommand.class, + FoldedNormalCommand.class, GammaCommand.class, GeometricCommand.class, GumbelCommand.class, diff --git a/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/FoldedNormalCommand.java b/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/FoldedNormalCommand.java new file mode 100644 index 0000000..07e330d --- /dev/null +++ b/commons-statistics-examples/examples-distribution/src/main/java/org/apache/commons/statistics/examples/distribution/FoldedNormalCommand.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.examples.distribution; + +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.statistics.distribution.ContinuousDistribution; +import org.apache.commons.statistics.distribution.FoldedNormalDistribution; +import picocli.CommandLine.ArgGroup; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; + +/** + * Command for the {@link FoldedNormalDistribution}. + * + * @since 1.1 + */ +@Command(name = "foldednormal", + aliases = {"foldnorm"}, + description = "Folded normal distribution.", + subcommands = { + FoldedNormalCommand.Check.class, + FoldedNormalCommand.PDF.class, + FoldedNormalCommand.LPDF.class, + FoldedNormalCommand.CDF.class, + FoldedNormalCommand.SF.class, + FoldedNormalCommand.ICDF.class, + FoldedNormalCommand.ISF.class, + }) +class FoldedNormalCommand extends AbstractDistributionCommand { + + /** Base command for the distribution that defines the parameters. */ + private abstract static class BaseCommand extends AbstractContinuousDistributionCommand { + /** Distribution parameters. */ + @ArgGroup(validate = false, heading = "Distribution parameters:%n", order = 1) + private Params params = new Params(); + + /** Parameters class. */ + static class Params { + /** The distribution mean. */ + @Option(names = {"-m", "--mu", "--mean"}, + arity = "1..*", + split = ",", + description = {"mean (default: ${DEFAULT-VALUE})."}) + private double[] mu = {0, 0, 1, -1}; + + /** The distribution sigma. */ + @Option(names = {"-s", "--sigma"}, + arity = "1..*", + split = ",", + description = {"standard deviation (default: ${DEFAULT-VALUE})."}) + private double[] sigma = {1, 2, 1, 1}; + } + + /** Extend the options to set the default values for this distribution. */ + static final class Options extends ContinuousDistributionOptions { + /** Set defaults. */ + private Options() { + min = -5; + max = 5; + } + } + + @Override + protected List<Distribution<ContinuousDistribution>> getDistributions() { + double[] mean = params.mu; + double[] sigma = params.sigma; + final int n = DistributionUtils.validateLengths(mean.length, sigma.length); + + mean = DistributionUtils.expandToLength(mean, n); + sigma = DistributionUtils.expandToLength(sigma, n); + + // Create distributions + final ArrayList<Distribution<ContinuousDistribution>> list = new ArrayList<>(); + for (int i = 0; i < n; i++) { + final ContinuousDistribution d = FoldedNormalDistribution.of(mean[i], sigma[i]); + list.add(new Distribution<>(d, "mu=" + mean[i] + ",sigma=" + sigma[i])); + } + return list; + } + } + + /** Base command for the distribution that defines the parameters. */ + private abstract static class ProbabilityCommand extends BaseCommand { + /** The distribution options. */ + @ArgGroup(validate = false, heading = "Evaluation options:%n", order = 2) + private Options distributionOptions = new Options(); + + @Override + protected DistributionOptions getDistributionOptions() { + return distributionOptions; + } + } + + /** Base command for the distribution that defines the parameters for inverse probability functions. */ + private abstract static class InverseProbabilityCommand extends BaseCommand { + /** The distribution options. */ + @ArgGroup(validate = false, heading = "Evaluation options:%n", order = 2) + private InverseContinuousDistributionOptions distributionOptions = new InverseContinuousDistributionOptions(); + + @Override + protected DistributionOptions getDistributionOptions() { + return distributionOptions; + } + } + + /** Verification checks command. */ + @Command(name = "check", + hidden = true, + description = "Folded normal distribution verification checks.") + static class Check extends ProbabilityCommand {} + + /** PDF command. */ + @Command(name = "pdf", + description = "Folded normal distribution PDF.") + static class PDF extends ProbabilityCommand {} + + /** LPDF command. */ + @Command(name = "lpdf", + description = "Folded normal distribution natural logarithm of the PDF.") + static class LPDF extends ProbabilityCommand {} + + /** CDF command. */ + @Command(name = "cdf", + description = "Folded normal distribution CDF.") + static class CDF extends ProbabilityCommand {} + + /** SF command. */ + @Command(name = "sf", + description = "Folded normal distribution survival probability.") + static class SF extends ProbabilityCommand {} + + /** ICDF command. */ + @Command(name = "icdf", + description = "Folded normal distribution inverse CDF.") + static class ICDF extends InverseProbabilityCommand {} + + /** ISF command. */ + @Command(name = "isf", + description = "Folded normal distribution inverse SF.") + static class ISF extends InverseProbabilityCommand {} +} diff --git a/src/changes/changes.xml b/src/changes/changes.xml index ca76f2b..4b5a1eb 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -56,6 +56,10 @@ If the output is not quite correct, check for invisible trailing spaces! <release version="1.1" date="TBD" description=" Adds ranking, inference, descriptive and bom modules. (requires Java 8). "> + <action dev="aherbert" type="add" issue="STATISTICS-87"> + "FoldedNormalDistribution": Add a folded normal distribution implementation with + specialized support for a half-normal distribution. + </action> <action dev="aherbert" type="add" issue="STATISTICS-85"> Add a quantile and median implementation. </action> diff --git a/src/conf/checkstyle/checkstyle-suppressions.xml b/src/conf/checkstyle/checkstyle-suppressions.xml index 4f96cb1..4e2dbc6 100644 --- a/src/conf/checkstyle/checkstyle-suppressions.xml +++ b/src/conf/checkstyle/checkstyle-suppressions.xml @@ -24,6 +24,8 @@ <suppress checks="LocalFinalVariableName" files=".*[/\\]HypergeometricDistribution.java" /> <suppress checks="ParameterNumber" files=".*[/\\]TTest.java" /> <suppress checks="ParameterNumber" files=".*[/\\](Double|Int|Long)Statistics.java" /> + <!-- package-private fields are final --> + <suppress checks="VisibilityModifier" files=".*[/\\]FoldedNormalDistribution.java" /> <!-- Be more lenient on tests. --> <suppress checks="Javadoc" files=".*[/\\]test[/\\].*" /> <suppress checks="MultipleStringLiterals" files=".*[/\\]test[/\\].*" /> diff --git a/src/conf/pmd/pmd-ruleset.xml b/src/conf/pmd/pmd-ruleset.xml index 1233bd7..69bfd12 100644 --- a/src/conf/pmd/pmd-ruleset.xml +++ b/src/conf/pmd/pmd-ruleset.xml @@ -111,6 +111,13 @@ value="./ancestor-or-self::MethodDeclaration[@Name='setBiased' or @Name='setConfiguration']"/> </properties> </rule> + <rule ref="category/java/codestyle.xml/EmptyMethodInAbstractClassShouldBeAbstract"> + <properties> + <!-- Returning 0 for the lower bound triggers a false positive for an empty method --> + <property name="violationSuppressXPath" + value="./ancestor-or-self::MethodDeclaration[@Name='getSupportLowerBound']"/> + </properties> + </rule> <rule ref="category/java/documentation.xml/CommentSize"> <properties>