Repository: commons-math Updated Branches: refs/heads/MATH-1153 [created] 9ba0a1cad
Added Cheng sampling procedure for beta distribution. Thanks to Sergei Lebedev for the patch. JIRA: MATH-1153 Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/9ba0a1ca Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/9ba0a1ca Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/9ba0a1ca Branch: refs/heads/MATH-1153 Commit: 9ba0a1cadf37b0d1c68b01c706a10a042e19e1f6 Parents: d9b951c Author: Luc Maisonobe <l...@apache.org> Authored: Tue Dec 16 21:48:44 2014 +0100 Committer: Luc Maisonobe <l...@apache.org> Committed: Tue Dec 16 21:48:44 2014 +0100 ---------------------------------------------------------------------- pom.xml | 3 + .../math3/distribution/BetaDistribution.java | 139 +++++++++++++++---- .../distribution/BetaDistributionTest.java | 74 ++++++++++ 3 files changed, 192 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/9ba0a1ca/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 3ee5db2..667d36e 100644 --- a/pom.xml +++ b/pom.xml @@ -252,6 +252,9 @@ <name>Piotr Kochanski</name> </contributor> <contributor> + <name>Sergei Lebedev</name> + </contributor> + <contributor> <name>Bob MacCallum</name> </contributor> <contributor> http://git-wip-us.apache.org/repos/asf/commons-math/blob/9ba0a1ca/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java b/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java index 3f62f64..458fe23 100644 --- a/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java +++ b/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java @@ -23,6 +23,7 @@ import org.apache.commons.math3.random.Well19937c; import org.apache.commons.math3.special.Beta; import org.apache.commons.math3.special.Gamma; import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.Precision; /** * Implements the Beta distribution. @@ -34,10 +35,12 @@ public class BetaDistribution extends AbstractRealDistribution { /** * Default inverse cumulative probability accuracy. * @since 2.1 + * @deprecated as of 3.4, this parameter is not used anymore */ + @Deprecated public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; /** Serializable version identifier. */ - private static final long serialVersionUID = -1221965979403477668L; + private static final long serialVersionUID = 20141216L; /** First shape parameter. */ private final double alpha; /** Second shape parameter. */ @@ -46,8 +49,6 @@ public class BetaDistribution extends AbstractRealDistribution { * updated whenever alpha or beta are changed. */ private double z; - /** Inverse cumulative probability accuracy. */ - private final double solverAbsoluteAccuracy; /** * Build a new instance. @@ -63,7 +64,7 @@ public class BetaDistribution extends AbstractRealDistribution { * @param beta Second shape parameter (must be positive). */ public BetaDistribution(double alpha, double beta) { - this(alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + this(new Well19937c(), alpha, beta); } /** @@ -82,9 +83,11 @@ public class BetaDistribution extends AbstractRealDistribution { * cumulative probability estimates (defaults to * {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). * @since 2.1 + * @deprecated as of 3.4, the inverse cumulative accuracy is not used anymore */ + @Deprecated public BetaDistribution(double alpha, double beta, double inverseCumAccuracy) { - this(new Well19937c(), alpha, beta, inverseCumAccuracy); + this(alpha, beta); } /** @@ -96,7 +99,11 @@ public class BetaDistribution extends AbstractRealDistribution { * @since 3.3 */ public BetaDistribution(RandomGenerator rng, double alpha, double beta) { - this(rng, alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + super(rng); + + this.alpha = alpha; + this.beta = beta; + z = Double.NaN; } /** @@ -109,17 +116,14 @@ public class BetaDistribution extends AbstractRealDistribution { * cumulative probability estimates (defaults to * {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). * @since 3.1 + * @deprecated as of 3.4, the inverse cumulative accuracy is not used anymore */ + @Deprecated public BetaDistribution(RandomGenerator rng, double alpha, double beta, double inverseCumAccuracy) { - super(rng); - - this.alpha = alpha; - this.beta = beta; - z = Double.NaN; - solverAbsoluteAccuracy = inverseCumAccuracy; + this(rng, alpha, beta); } /** @@ -188,18 +192,6 @@ public class BetaDistribution extends AbstractRealDistribution { } /** - * Return the absolute accuracy setting of the solver used to estimate - * inverse cumulative probabilities. - * - * @return the solver absolute accuracy. - * @since 2.1 - */ - @Override - protected double getSolverAbsoluteAccuracy() { - return solverAbsoluteAccuracy; - } - - /** * {@inheritDoc} * * For first shape parameter {@code alpha} and second shape parameter @@ -266,4 +258,103 @@ public class BetaDistribution extends AbstractRealDistribution { public boolean isSupportConnected() { return true; } + + /** {@inheritDoc} + * <p> + * Sampling is performed using Cheng algorithms: + * </p> + * <p> + * R. C. H. Cheng, "Generating beta variates with nonintegral shape parameters.". + * Communications of the ACM, 21, 317â322, 1978. + * </p> + */ + @Override + public double sample() { + if (FastMath.min(alpha, beta) > 1) { + return algorithmBB(); + } else { + return algorithmBC(); + } + } + + /** Returns one sample using Cheng's BB algorithm, when both α and β are greater than 1. + * @return sampled value + */ + private double algorithmBB() { + final double a = FastMath.min(alpha, beta); + final double b = FastMath.max(alpha, beta); + final double newAlpha = a + b; + final double newBeta = FastMath.sqrt((newAlpha - 2.) / (2. * a * b - newAlpha)); + final double gamma = a + 1. / newBeta; + + double r; + double w; + double t; + do { + final double u1 = random.nextDouble(); + final double u2 = random.nextDouble(); + final double v = newBeta * FastMath.log(u1 / (1. - u1)); + w = a * FastMath.exp(v); + final double newZ = u1 * u1 * u2; + r = gamma * v - 1.3862944; + final double s = a + r - w; + if (s + 2.609438 >= 5 * newZ) { + break; + } + + t = FastMath.log(newZ); + if (s >= t) { + break; + } + } while (r + newAlpha * FastMath.log(newAlpha / (b + w)) < t); + + w = FastMath.min(w, Double.MAX_VALUE); + return Precision.equals(a, alpha) ? w / (b + w) : b / (b + w); + } + + /** Returns one sample using Cheng's BC algorithm, when at least one of α and β is smaller than 1. + * @return sampled value + */ + private double algorithmBC() { + final double a = FastMath.max(alpha, beta); + final double b = FastMath.min(alpha, beta); + final double newAlpha = a + b; + final double newBeta = 1. / b; + final double delta = 1. + a - b; + final double k1 = delta * (0.0138889 + 0.0416667 * b) / (a * newBeta - 0.777778); + final double k2 = 0.25 + (0.5 + 0.25 / delta) * b; + + double w; + for (;;) { + final double u1 = random.nextDouble(); + final double u2 = random.nextDouble(); + final double y = u1 * u2; + final double newZ = u1 * y; + if (u1 < 0.5) { + if (0.25 * u2 + newZ - y >= k1) { + continue; + } + } else { + if (newZ <= 0.25) { + final double v = newBeta * FastMath.log(u1 / (1. - u1)); + w = a * FastMath.exp(v); + break; + } + + if (newZ >= k2) { + continue; + } + } + + final double v = newBeta * FastMath.log(u1 / (1. - u1)); + w = a * FastMath.exp(v); + if (newAlpha * (FastMath.log(newAlpha / (b + w)) + v) - 1.3862944 >= FastMath.log(newZ)) { + break; + } + } + + w = FastMath.min(w, Double.MAX_VALUE); + return Precision.equals(a, alpha) ? w / (b + w) : b / (b + w); + } + } http://git-wip-us.apache.org/repos/asf/commons-math/blob/9ba0a1ca/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java b/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java index 217ae66..d26c6f0 100644 --- a/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java +++ b/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java @@ -16,10 +16,22 @@ */ package org.apache.commons.math3.distribution; +import java.util.Arrays; + +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well1024a; +import org.apache.commons.math3.random.Well19937a; +import org.apache.commons.math3.stat.StatUtils; +import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest; +import org.apache.commons.math3.stat.inference.TestUtils; import org.junit.Assert; import org.junit.Test; public class BetaDistributionTest { + + static final double[] alphaBetas = {0.1, 1, 10, 100, 1000}; + static final double epsilon = StatUtils.min(alphaBetas); + @Test public void testCumulative() { double[] x = new double[]{-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1}; @@ -303,4 +315,66 @@ public class BetaDistributionTest { Assert.assertEquals(dist.getNumericalMean(), 2.0 / 7.0, tol); Assert.assertEquals(dist.getNumericalVariance(), 10.0 / (49.0 * 8.0), tol); } + + @Test + public void testMomentsSampling() { + RandomGenerator random = new Well1024a(0x7829862c82fec2dal); + final int numSamples = 1000; + for (final double alpha : alphaBetas) { + for (final double beta : alphaBetas) { + final BetaDistribution betaDistribution = new BetaDistribution(random, alpha, beta); + final double[] observed = new BetaDistribution(alpha, beta).sample(numSamples); + Arrays.sort(observed); + + final String distribution = String.format("Beta(%.2f, %.2f)", alpha, beta); + Assert.assertEquals(String.format("E[%s]", distribution), + betaDistribution.getNumericalMean(), + StatUtils.mean(observed), epsilon); + Assert.assertEquals(String.format("Var[%s]", distribution), + betaDistribution.getNumericalVariance(), + StatUtils.variance(observed), epsilon); + } + } + } + + @Test + public void testGoodnessOfFit() { + RandomGenerator random = new Well19937a(0x237db1db907b089fl); + final int numSamples = 1000; + final double level = 0.01; + for (final double alpha : alphaBetas) { + for (final double beta : alphaBetas) { + final BetaDistribution betaDistribution = new BetaDistribution(random, alpha, beta); + final double[] observed = betaDistribution.sample(numSamples); + Assert.assertFalse("G goodness-of-fit test rejected null at alpha = " + level, + gTest(betaDistribution, observed) < level); + Assert.assertFalse("KS goodness-of-fit test rejected null at alpha = " + level, + new KolmogorovSmirnovTest(random).kolmogorovSmirnovTest(betaDistribution, observed) < level); + } + } + } + + private double gTest(final RealDistribution expectedDistribution, final double[] values) { + final int numBins = values.length / 30; + final double[] breaks = new double[numBins]; + for (int b = 0; b < breaks.length; b++) { + breaks[b] = expectedDistribution.inverseCumulativeProbability((double) b / numBins); + } + + final long[] observed = new long[numBins]; + for (final double value : values) { + int b = 0; + do { + b++; + } while (b < numBins && value >= breaks[b]); + + observed[b - 1]++; + } + + final double[] expected = new double[numBins]; + Arrays.fill(expected, (double) values.length / numBins); + + return TestUtils.gTest(expected, observed); + } + }