Author: brentworden Date: Wed Oct 28 19:59:21 2009 New Revision: 830745 URL: http://svn.apache.org/viewvc?rev=830745&view=rev Log: MATH-311. Changed probability calculations for Binomial, Poisson, and Hypergeometric distributions to use Catherine Loader's saddle point approximations
Added: commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/SaddlePointExpansion.java Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/BinomialDistributionImpl.java commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/HypergeometricDistributionImpl.java commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/PoissonDistributionImpl.java commons/proper/math/trunk/src/site/xdoc/changes.xml commons/proper/math/trunk/src/test/java/org/apache/commons/math/distribution/BinomialDistributionTest.java Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/BinomialDistributionImpl.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/BinomialDistributionImpl.java?rev=830745&r1=830744&r2=830745&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/BinomialDistributionImpl.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/BinomialDistributionImpl.java Wed Oct 28 19:59:21 2009 @@ -1,18 +1,15 @@ /* * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law + * or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. */ package org.apache.commons.math.distribution; @@ -25,12 +22,12 @@ /** * The default implementation of {...@link BinomialDistribution}. - * - * @version $Revision$ $Date$ + * + * @version $Revision$ $Date: 2009-09-05 12:36:48 -0500 (Sat, 05 Sep + * 2009) $ */ -public class BinomialDistributionImpl - extends AbstractIntegerDistribution - implements BinomialDistribution, Serializable { +public class BinomialDistributionImpl extends AbstractIntegerDistribution + implements BinomialDistribution, Serializable { /** Serializable version identifier */ private static final long serialVersionUID = 6751309484392813623L; @@ -44,6 +41,7 @@ /** * Create a binomial distribution with the given number of trials and * probability of success. + * * @param trials the number of trials. * @param p the probability of success. */ @@ -55,6 +53,7 @@ /** * Access the number of trials for this distribution. + * * @return the number of trials. */ public int getNumberOfTrials() { @@ -63,6 +62,7 @@ /** * Access the probability of success for this distribution. + * * @return the probability of success. */ public double getProbabilityOfSuccess() { @@ -71,28 +71,30 @@ /** * Change the number of trials for this distribution. + * * @param trials the new number of trials. * @throws IllegalArgumentException if <code>trials</code> is not a valid - * number of trials. + * number of trials. */ public void setNumberOfTrials(int trials) { if (trials < 0) { throw MathRuntimeException.createIllegalArgumentException( - "number of trials must be non-negative ({0})", trials); + "number of trials must be non-negative ({0})", trials); } numberOfTrials = trials; } /** * Change the probability of success for this distribution. + * * @param p the new probability of success. * @throws IllegalArgumentException if <code>p</code> is not a valid - * probability. + * probability. */ public void setProbabilityOfSuccess(double p) { if (p < 0.0 || p > 1.0) { throw MathRuntimeException.createIllegalArgumentException( - "{0} out of [{1}, {2}] range", p, 0.0, 1.0); + "{0} out of [{1}, {2}] range", p, 0.0, 1.0); } probabilityOfSuccess = p; } @@ -100,10 +102,10 @@ /** * Access the domain value lower bound, based on <code>p</code>, used to * bracket a PDF root. - * + * * @param p the desired probability for the critical value - * @return domain value lower bound, i.e. - * P(X < <i>lower bound</i>) < <code>p</code> + * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) < + * <code>p</code> */ @Override protected int getDomainLowerBound(double p) { @@ -113,10 +115,10 @@ /** * Access the domain value upper bound, based on <code>p</code>, used to * bracket a PDF root. - * + * * @param p the desired probability for the critical value - * @return domain value upper bound, i.e. - * P(X < <i>upper bound</i>) > <code>p</code> + * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) > + * <code>p</code> */ @Override protected int getDomainUpperBound(double p) { @@ -125,10 +127,11 @@ /** * For this distribution, X, this method returns P(X ≤ x). + * * @param x the value at which the PDF is evaluated. * @return PDF for this distribution. - * @throws MathException if the cumulative probability can not be - * computed due to convergence or other numerical errors. + * @throws MathException if the cumulative probability can not be computed + * due to convergence or other numerical errors. */ @Override public double cumulativeProbability(int x) throws MathException { @@ -138,18 +141,15 @@ } else if (x >= getNumberOfTrials()) { ret = 1.0; } else { - ret = - 1.0 - Beta.regularizedBeta( - getProbabilityOfSuccess(), - x + 1.0, - getNumberOfTrials() - x); + ret = 1.0 - Beta.regularizedBeta(getProbabilityOfSuccess(), + x + 1.0, getNumberOfTrials() - x); } return ret; } /** * For this distribution, X, this method returns P(X = x). - * + * * @param x the value at which the PMF is evaluated. * @return PMF for this distribution. */ @@ -158,30 +158,30 @@ if (x < 0 || x > getNumberOfTrials()) { ret = 0.0; } else { - ret = MathUtils.binomialCoefficientDouble( - getNumberOfTrials(), x) * - Math.pow(getProbabilityOfSuccess(), x) * - Math.pow(1.0 - getProbabilityOfSuccess(), - getNumberOfTrials() - x); + ret = Math.exp(SaddlePointExpansion.logBinomialProbability(x, + numberOfTrials, probabilityOfSuccess, + 1.0 - probabilityOfSuccess)); } return ret; } /** - * For this distribution, X, this method returns the largest x, such - * that P(X ≤ x) ≤ <code>p</code>. + * For this distribution, X, this method returns the largest x, such that + * P(X ≤ x) ≤ <code>p</code>. * <p> * Returns <code>-1</code> for p=0 and <code>Integer.MAX_VALUE</code> for - * p=1.</p> - * + * p=1. + * </p> + * * @param p the desired probability * @return the largest x such that P(X ≤ x) <= p * @throws MathException if the inverse cumulative probability can not be - * computed due to convergence or other numerical errors. + * computed due to convergence or other numerical errors. * @throws IllegalArgumentException if p < 0 or p > 1 */ @Override - public int inverseCumulativeProbability(final double p) throws MathException { + public int inverseCumulativeProbability(final double p) + throws MathException { // handle extreme values explicitly if (p == 0) { return -1; Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/HypergeometricDistributionImpl.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/HypergeometricDistributionImpl.java?rev=830745&r1=830744&r2=830745&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/HypergeometricDistributionImpl.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/HypergeometricDistributionImpl.java Wed Oct 28 19:59:21 2009 @@ -1,18 +1,15 @@ /* * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law + * or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. */ package org.apache.commons.math.distribution; @@ -24,12 +21,12 @@ /** * The default implementation of {...@link HypergeometricDistribution}. - * - * @version $Revision$ $Date$ + * + * @version $Revision$ $Date: 2009-09-05 12:36:48 -0500 (Sat, 05 Sep + * 2009) $ */ public class HypergeometricDistributionImpl extends AbstractIntegerDistribution - implements HypergeometricDistribution, Serializable -{ + implements HypergeometricDistribution, Serializable { /** Serializable version identifier */ private static final long serialVersionUID = -436928820673516179L; @@ -46,22 +43,25 @@ /** * Construct a new hypergeometric distribution with the given the population * size, the number of successes in the population, and the sample size. + * * @param populationSize the population size. * @param numberOfSuccesses number of successes in the population. * @param sampleSize the sample size. */ public HypergeometricDistributionImpl(int populationSize, - int numberOfSuccesses, int sampleSize) { + int numberOfSuccesses, int sampleSize) { super(); if (numberOfSuccesses > populationSize) { - throw MathRuntimeException.createIllegalArgumentException( - "number of successes ({0}) must be less than or equal to population size ({1})", - numberOfSuccesses, populationSize); + throw MathRuntimeException + .createIllegalArgumentException( + "number of successes ({0}) must be less than or equal to population size ({1})", + numberOfSuccesses, populationSize); } if (sampleSize > populationSize) { - throw MathRuntimeException.createIllegalArgumentException( - "sample size ({0}) must be less than or equal to population size ({1})", - sampleSize, populationSize); + throw MathRuntimeException + .createIllegalArgumentException( + "sample size ({0}) must be less than or equal to population size ({1})", + sampleSize, populationSize); } setPopulationSize(populationSize); setSampleSize(sampleSize); @@ -70,6 +70,7 @@ /** * For this distribution, X, this method returns P(X ≤ x). + * * @param x the value at which the PDF is evaluated. * @return PDF for this distribution. */ @@ -84,7 +85,7 @@ int[] domain = getDomain(n, m, k); if (x < domain[0]) { ret = 0.0; - } else if(x >= domain[1]) { + } else if (x >= domain[1]) { ret = 1.0; } else { ret = innerCumulativeProbability(domain[0], x, 1, n, m, k); @@ -95,40 +96,38 @@ /** * Return the domain for the given hypergeometric distribution parameters. + * * @param n the population size. * @param m number of successes in the population. * @param k the sample size. * @return a two element array containing the lower and upper bounds of the * hypergeometric distribution. */ - private int[] getDomain(int n, int m, int k){ - return new int[]{ - getLowerDomain(n, m, k), - getUpperDomain(m, k) - }; + private int[] getDomain(int n, int m, int k) { + return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) }; } /** * Access the domain value lower bound, based on <code>p</code>, used to * bracket a PDF root. - * + * * @param p the desired probability for the critical value - * @return domain value lower bound, i.e. - * P(X < <i>lower bound</i>) < <code>p</code> + * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) < + * <code>p</code> */ @Override protected int getDomainLowerBound(double p) { return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(), - getSampleSize()); + getSampleSize()); } /** * Access the domain value upper bound, based on <code>p</code>, used to * bracket a PDF root. - * + * * @param p the desired probability for the critical value - * @return domain value upper bound, i.e. - * P(X < <i>upper bound</i>) > <code>p</code> + * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) > + * <code>p</code> */ @Override protected int getDomainUpperBound(double p) { @@ -138,6 +137,7 @@ /** * Return the lowest domain value for the given hypergeometric distribution * parameters. + * * @param n the population size. * @param m number of successes in the population. * @param k the sample size. @@ -149,6 +149,7 @@ /** * Access the number of successes. + * * @return the number of successes. */ public int getNumberOfSuccesses() { @@ -157,6 +158,7 @@ /** * Access the population size. + * * @return the population size. */ public int getPopulationSize() { @@ -165,6 +167,7 @@ /** * Access the sample size. + * * @return the sample size. */ public int getSampleSize() { @@ -174,32 +177,42 @@ /** * Return the highest domain value for the given hypergeometric distribution * parameters. + * * @param m number of successes in the population. * @param k the sample size. * @return the highest domain value of the hypergeometric distribution. */ - private int getUpperDomain(int m, int k){ + private int getUpperDomain(int m, int k) { return Math.min(k, m); } /** * For this distribution, X, this method returns P(X = x). - * + * * @param x the value at which the PMF is evaluated. * @return PMF for this distribution. */ public double probability(int x) { double ret; - int n = getPopulationSize(); - int m = getNumberOfSuccesses(); + int m = getPopulationSize(); + int s = getNumberOfSuccesses(); + int f = m - s; int k = getSampleSize(); - int[] domain = getDomain(n, m, k); - if(x < domain[0] || x > domain[1]){ + int[] domain = getDomain(m, s, k); + if (x < domain[0] || x > domain[1]) { ret = 0.0; } else { - ret = probability(n, m, k, x); + double p = (double) sampleSize / (double) m; + double q = (double) (m - sampleSize) / (double) m; + double p1 = SaddlePointExpansion.logBinomialProbability(x, + numberOfSuccesses, p, q); + double p2 = SaddlePointExpansion.logBinomialProbability(sampleSize + - x, f, p, q); + double p3 = SaddlePointExpansion.logBinomialProbability(sampleSize, + m, p, q); + ret = Math.exp(p1 + p2 - p3); } return ret; @@ -208,7 +221,7 @@ /** * For the distribution, X, defined by the given hypergeometric distribution * parameters, this method returns P(X = x). - * + * * @param n the population size. * @param m number of successes in the population. * @param k the sample size. @@ -216,55 +229,56 @@ * @return PMF for the distribution. */ private double probability(int n, int m, int k, int x) { - return Math.exp(MathUtils.binomialCoefficientLog(m, x) + - MathUtils.binomialCoefficientLog(n - m, k - x) - - MathUtils.binomialCoefficientLog(n, k)); + return Math.exp(MathUtils.binomialCoefficientLog(m, x) + + MathUtils.binomialCoefficientLog(n - m, k - x) + - MathUtils.binomialCoefficientLog(n, k)); } /** * Modify the number of successes. + * * @param num the new number of successes. * @throws IllegalArgumentException if <code>num</code> is negative. */ public void setNumberOfSuccesses(int num) { - if(num < 0){ + if (num < 0) { throw MathRuntimeException.createIllegalArgumentException( - "number of successes must be non-negative ({0})", - num); + "number of successes must be non-negative ({0})", num); } numberOfSuccesses = num; } /** * Modify the population size. + * * @param size the new population size. * @throws IllegalArgumentException if <code>size</code> is not positive. */ public void setPopulationSize(int size) { - if(size <= 0){ + if (size <= 0) { throw MathRuntimeException.createIllegalArgumentException( - "population size must be positive ({0})", - size); + "population size must be positive ({0})", size); } populationSize = size; } /** * Modify the sample size. + * * @param size the new sample size. * @throws IllegalArgumentException if <code>size</code> is negative. */ public void setSampleSize(int size) { if (size < 0) { throw MathRuntimeException.createIllegalArgumentException( - "sample size must be positive ({0})", - size); + "sample size must be positive ({0})", size); } sampleSize = size; } /** * For this distribution, X, this method returns P(X ≥ x). + * * @param x the value at which the CDF is evaluated. * @return upper tail CDF for this distribution. * @since 1.1 @@ -279,7 +293,7 @@ int[] domain = getDomain(n, m, k); if (x < domain[0]) { ret = 1.0; - } else if(x > domain[1]) { + } else if (x > domain[1]) { ret = 0.0; } else { ret = innerCumulativeProbability(domain[1], x, -1, n, m, k); @@ -289,21 +303,21 @@ } /** - * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This + * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This * probability is computed by summing the point probabilities for the values * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. + * * @param x0 the inclusive, lower bound * @param x1 the inclusive, upper bound * @param dx the direction of summation. 1 indicates summing from x0 to x1. - * 0 indicates summing from x1 to x0. + * 0 indicates summing from x1 to x0. * @param n the population size. * @param m number of successes in the population. * @param k the sample size. * @return P(x0 ≤ X ≤ x1). */ - private double innerCumulativeProbability( - int x0, int x1, int dx, int n, int m, int k) - { + private double innerCumulativeProbability(int x0, int x1, int dx, int n, + int m, int k) { double ret = probability(n, m, k, x0); while (x0 != x1) { x0 += dx; Modified: commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/PoissonDistributionImpl.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/PoissonDistributionImpl.java?rev=830745&r1=830744&r2=830745&view=diff ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/PoissonDistributionImpl.java (original) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/PoissonDistributionImpl.java Wed Oct 28 19:59:21 2009 @@ -1,18 +1,15 @@ /* * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law + * or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. */ package org.apache.commons.math.distribution; @@ -25,8 +22,9 @@ /** * Implementation for the {...@link PoissonDistribution}. - * - * @version $Revision$ $Date$ + * + * @version $Revision$ $Date: 2009-09-05 12:36:48 -0500 (Sat, 05 Sep + * 2009) $ */ public class PoissonDistributionImpl extends AbstractIntegerDistribution implements PoissonDistribution, Serializable { @@ -43,10 +41,9 @@ private double mean; /** - * Create a new Poisson distribution with the given the mean. - * The mean value must be positive; otherwise an - * <code>IllegalArgument</code> is thrown. - * + * Create a new Poisson distribution with the given the mean. The mean value + * must be positive; otherwise an <code>IllegalArgument</code> is thrown. + * * @param p the Poisson mean * @throws IllegalArgumentException if p ≤ 0 */ @@ -55,10 +52,9 @@ } /** - * Create a new Poisson distribution with the given the mean. - * The mean value must be positive; otherwise an - * <code>IllegalArgument</code> is thrown. - * + * Create a new Poisson distribution with the given the mean. The mean value + * must be positive; otherwise an <code>IllegalArgument</code> is thrown. + * * @param p the Poisson mean * @param z a normal distribution used to compute normal approximations. * @throws IllegalArgumentException if p ≤ 0 @@ -72,7 +68,7 @@ /** * Get the Poisson mean for the distribution. - * + * * @return the Poisson mean for the distribution. */ public double getMean() { @@ -80,18 +76,16 @@ } /** - * Set the Poisson mean for the distribution. - * The mean value must be positive; otherwise an - * <code>IllegalArgument</code> is thrown. - * + * Set the Poisson mean for the distribution. The mean value must be + * positive; otherwise an <code>IllegalArgument</code> is thrown. + * * @param p the Poisson mean value * @throws IllegalArgumentException if p ≤ 0 */ public void setMean(double p) { if (p <= 0) { throw MathRuntimeException.createIllegalArgumentException( - "the Poisson mean must be positive ({0})", - p); + "the Poisson mean must be positive ({0})", p); } this.mean = p; normal.setMean(p); @@ -100,25 +94,34 @@ /** * The probability mass function P(X = x) for a Poisson distribution. - * - * @param x the value at which the probability density function is evaluated. + * + * @param x the value at which the probability density function is + * evaluated. * @return the value of the probability mass function at x */ public double probability(int x) { + double ret; if (x < 0 || x == Integer.MAX_VALUE) { - return 0; + ret = 0.0; + } else if (x == 0) { + ret = Math.exp(-mean); + } else { + ret = Math.exp(-SaddlePointExpansion.getStirlingError(x) + - SaddlePointExpansion.getDeviancePart(x, mean)) + / Math.sqrt(2.0 * Math.PI * x); // TODO make MathUtils.PI + // public } - return Math.pow(getMean(), x) / - MathUtils.factorialDouble(x) * Math.exp(-mean); + return ret; } /** - * The probability distribution function P(X <= x) for a Poisson distribution. - * + * The probability distribution function P(X <= x) for a Poisson + * distribution. + * * @param x the value at which the PDF is evaluated. * @return Poisson distribution function evaluated at x - * @throws MathException if the cumulative probability can not be - * computed due to convergence or other numerical errors. + * @throws MathException if the cumulative probability can not be computed + * due to convergence or other numerical errors. */ @Override public double cumulativeProbability(int x) throws MathException { @@ -128,21 +131,24 @@ if (x == Integer.MAX_VALUE) { return 1; } - return Gamma.regularizedGammaQ((double)x + 1, mean, - 1E-12, Integer.MAX_VALUE); + return Gamma.regularizedGammaQ((double) x + 1, mean, 1E-12, + Integer.MAX_VALUE); } /** * Calculates the Poisson distribution function using a normal - * approximation. The <code>N(mean, sqrt(mean))</code> - * distribution is used to approximate the Poisson distribution. + * approximation. The <code>N(mean, sqrt(mean))</code> distribution is used + * to approximate the Poisson distribution. * <p> * The computation uses "half-correction" -- evaluating the normal - * distribution function at <code>x + 0.5</code></p> - * + * distribution function at <code>x + 0.5</code> + * </p> + * * @param x the upper bound, inclusive - * @return the distribution function value calculated using a normal approximation - * @throws MathException if an error occurs computing the normal approximation + * @return the distribution function value calculated using a normal + * approximation + * @throws MathException if an error occurs computing the normal + * approximation */ public double normalApproximateProbability(int x) throws MathException { // calculate the probability using half-correction @@ -151,9 +157,9 @@ /** * Access the domain value lower bound, based on <code>p</code>, used to - * bracket a CDF root. This method is used by + * bracket a CDF root. This method is used by * {...@link #inverseCumulativeProbability(double)} to find critical values. - * + * * @param p the desired probability for the critical value * @return domain lower bound */ @@ -164,9 +170,9 @@ /** * Access the domain value upper bound, based on <code>p</code>, used to - * bracket a CDF root. This method is used by + * bracket a CDF root. This method is used by * {...@link #inverseCumulativeProbability(double)} to find critical values. - * + * * @param p the desired probability for the critical value * @return domain upper bound */ @@ -176,9 +182,10 @@ } /** - * Modify the normal distribution used to compute normal approximations. - * The caller is responsible for insuring the normal distribution has the - * proper parameter settings. + * Modify the normal distribution used to compute normal approximations. The + * caller is responsible for insuring the normal distribution has the proper + * parameter settings. + * * @param value the new distribution * @since 1.2 */ Added: commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/SaddlePointExpansion.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/SaddlePointExpansion.java?rev=830745&view=auto ============================================================================== --- commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/SaddlePointExpansion.java (added) +++ commons/proper/math/trunk/src/main/java/org/apache/commons/math/distribution/SaddlePointExpansion.java Wed Oct 28 19:59:21 2009 @@ -0,0 +1,185 @@ +package org.apache.commons.math.distribution; + +import org.apache.commons.math.special.Gamma; + +/** + * <p> + * Utility class used by various distributions to accurately compute their + * respective probability mass functions. The implementation for this class is + * based on the Catherine Loader's <a target="_blank" + * href="http://www.herine.net/stat/software/dbinom.html">dbinom</a> routines. + * </p> + * <p> + * This class is not intended to be called directly. + * </p> + * <p> + * References: + * <ol> + * <li>Catherine Loader (2000). "Fast and Accurate Computation of Binomial + * Probabilities.". <a target="_blank" + * href="http://www.herine.net/stat/papers/dbinom.pdf"> + * http://www.herine.net/stat/papers/dbinom.pdf</a></li> + * </ol> + * </p> + * + * @since 1.2 + * @version $Revision: 1.3 $ $Date: 2007/11/18 23:51:21 $ + */ +final class SaddlePointExpansion { + + /** 2 π. */ + private static double PI_2 = 2.0 * Math.PI; + + /** 1/2 * log(2 π). */ + private static double HALF_LOG_2_PI = 0.5 * Math.log(PI_2); + + /** exact striling expansion error for certain values. */ + private static final double[] EXACT_STIRLING_ERRORS = { 0.0, /* 0.0 */ + 0.1534264097200273452913848, /* 0.5 */ + 0.0810614667953272582196702, /* 1.0 */ + 0.0548141210519176538961390, /* 1.5 */ + 0.0413406959554092940938221, /* 2.0 */ + 0.03316287351993628748511048, /* 2.5 */ + 0.02767792568499833914878929, /* 3.0 */ + 0.02374616365629749597132920, /* 3.5 */ + 0.02079067210376509311152277, /* 4.0 */ + 0.01848845053267318523077934, /* 4.5 */ + 0.01664469118982119216319487, /* 5.0 */ + 0.01513497322191737887351255, /* 5.5 */ + 0.01387612882307074799874573, /* 6.0 */ + 0.01281046524292022692424986, /* 6.5 */ + 0.01189670994589177009505572, /* 7.0 */ + 0.01110455975820691732662991, /* 7.5 */ + 0.010411265261972096497478567, /* 8.0 */ + 0.009799416126158803298389475, /* 8.5 */ + 0.009255462182712732917728637, /* 9.0 */ + 0.008768700134139385462952823, /* 9.5 */ + 0.008330563433362871256469318, /* 10.0 */ + 0.007934114564314020547248100, /* 10.5 */ + 0.007573675487951840794972024, /* 11.0 */ + 0.007244554301320383179543912, /* 11.5 */ + 0.006942840107209529865664152, /* 12.0 */ + 0.006665247032707682442354394, /* 12.5 */ + 0.006408994188004207068439631, /* 13.0 */ + 0.006171712263039457647532867, /* 13.5 */ + 0.005951370112758847735624416, /* 14.0 */ + 0.005746216513010115682023589, /* 14.5 */ + 0.005554733551962801371038690 /* 15.0 */ + }; + + /** + * Default constructor. + */ + private SaddlePointExpansion() { + super(); + } + + /** + * Compute the error of Stirling's series at the given value. + * <p> + * References: + * <ol> + * <li>Eric W. Weisstein. "Stirling's Series." From MathWorld--A Wolfram Web + * Resource. <a target="_blank" + * href="http://mathworld.wolfram.com/StirlingsSeries.html"> + * http://mathworld.wolfram.com/StirlingsSeries.html</a></li> + * </ol> + * </p> + * + * @param z the value. + * @return the Striling's series error. + */ + static double getStirlingError(double z) { + double ret; + if (z < 15.0) { + double z2 = 2.0 * z; + if (Math.floor(z2) == z2) { + ret = EXACT_STIRLING_ERRORS[(int) z2]; + } else { + ret = Gamma.logGamma(z + 1.0) - (z + 0.5) * Math.log(z) + z + - HALF_LOG_2_PI; + } + } else { + double z2 = z * z; + ret = (0.083333333333333333333 - (0.00277777777777777777778 - (0.00079365079365079365079365 - (0.000595238095238095238095238 - 0.0008417508417508417508417508 / z2) + / z2) + / z2) + / z2) + / z; + } + return ret; + } + + /** + * A part of the deviance portion of the saddle point approximation. + * <p> + * References: + * <ol> + * <li>Catherine Loader (2000). "Fast and Accurate Computation of Binomial + * Probabilities.". <a target="_blank" + * href="http://www.herine.net/stat/papers/dbinom.pdf"> + * http://www.herine.net/stat/papers/dbinom.pdf</a></li> + * </ol> + * </p> + * + * @param x the x value. + * @param mu the average. + * @return a part of the deviance. + */ + static double getDeviancePart(double x, double mu) { + double ret; + if (Math.abs(x - mu) < 0.1 * (x + mu)) { + double d = (x - mu); + double v = d / (x + mu); + double s1 = v * d; + double s = Double.NaN; + double ej = 2.0 * x * v; + v = v * v; + int j = 1; + while (s1 != s) { + s = s1; + ej *= v; + s1 = s + ej / ((j * 2) + 1); + ++j; + } + ret = s1; + } else { + ret = x * Math.log(x / mu) + mu - x; + } + return ret; + } + + /** + * Compute the PMF for a binomial distribution using the saddle point + * expansion. + * + * @param x the value at which the probability is evaluated. + * @param n the number of trials. + * @param p the probability of success. + * @param q the probability of failure (1 - p). + * @return log(p(x)). + */ + static double logBinomialProbability(int x, int n, double p, double q) { + double ret; + if (x == 0) { + if (p < 0.1) { + ret = -getDeviancePart(n, n * q) - n * p; + } else { + ret = n * Math.log(q); + } + } else if (x == n) { + if (q < 0.1) { + ret = -getDeviancePart(n, n * p) - n * q; + } else { + ret = n * Math.log(p); + } + } else { + ret = getStirlingError(n) - getStirlingError(x) + - getStirlingError(n - x) - getDeviancePart(x, n * p) + - getDeviancePart(n - x, n * q); + double f = (PI_2 * x * (n - x)) / n; + ret = -0.5 * Math.log(f) + ret; + } + return ret; + } +} Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=830745&r1=830744&r2=830745&view=diff ============================================================================== --- commons/proper/math/trunk/src/site/xdoc/changes.xml (original) +++ commons/proper/math/trunk/src/site/xdoc/changes.xml Wed Oct 28 19:59:21 2009 @@ -39,6 +39,10 @@ </properties> <body> <release version="2.1" date="TBD" description="TBD"> + <action dev="brentworden" type="update" issue="MATH-311" due-to="Nipun Jawalkar"> + Changed probability calculations for Binomial, Poisson, and Hypergeometric + distributions to use Catherine Loader's saddle point approximations. + </action> <action dev="psteitz" type="fix" issue="MATH-306" due-to="Joerg Huber"> Removed dead code from Complex#divide. </action> Modified: commons/proper/math/trunk/src/test/java/org/apache/commons/math/distribution/BinomialDistributionTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/java/org/apache/commons/math/distribution/BinomialDistributionTest.java?rev=830745&r1=830744&r2=830745&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/java/org/apache/commons/math/distribution/BinomialDistributionTest.java (original) +++ commons/proper/math/trunk/src/test/java/org/apache/commons/math/distribution/BinomialDistributionTest.java Wed Oct 28 19:59:21 2009 @@ -1,57 +1,57 @@ /* * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law + * or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. */ package org.apache.commons.math.distribution; /** - * Test cases for BinomialDistribution. - * Extends IntegerDistributionAbstractTest. See class javadoc for - * IntegerDistributionAbstractTest for details. - * - * @version $Revision$ $Date$ + * Test cases for BinomialDistribution. Extends IntegerDistributionAbstractTest. + * See class javadoc for IntegerDistributionAbstractTest for details. + * + * @version $Revision$ $Date: 2009-09-05 12:36:48 -0500 (Sat, 05 Sep + * 2009) $ */ public class BinomialDistributionTest extends IntegerDistributionAbstractTest { /** * Constructor for BinomialDistributionTest. + * * @param name */ public BinomialDistributionTest(String name) { super(name); } - //-------------- Implementations for abstract methods ----------------------- + // -------------- Implementations for abstract methods + // ----------------------- /** Creates the default discrete distribution instance to use in tests. */ @Override public IntegerDistribution makeDistribution() { - return new BinomialDistributionImpl(10,0.70); + return new BinomialDistributionImpl(10, 0.70); } /** Creates the default probability density test input values */ @Override public int[] makeDensityTestPoints() { - return new int[] {-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + return new int[] { -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; } /** Creates the default probability density test expected values */ @Override public double[] makeDensityTestValues() { - return new double[] {0d, 0.0000d, 0.0001d, 0.0014d, 0.0090d, 0.0368d, 0.1029d, - 0.2001d, 0.2668d, 0.2335d, 0.1211d, 0.0282d, 0d}; + return new double[] { 0d, 0.0000059049d, 0.000137781d, 0.0014467d, + 0.00900169d, 0.0367569d, 0.102919d, 0.200121d, 0.266828d, + 0.233474d, 0.121061d, 0.0282475d, 0d }; } /** Creates the default cumulative probability density test input values */ @@ -63,48 +63,51 @@ /** Creates the default cumulative probability density test expected values */ @Override public double[] makeCumulativeTestValues() { - return new double[] {0d, 0.0000d, 0.0001d, 0.0016d, 0.0106d, 0.0473d, - 0.1503d, 0.3504d, 0.6172d, 0.8507d, 0.9718d, 1d, 1d}; - } + return new double[] { 0d, 0.0000d, 0.0001d, 0.0016d, 0.0106d, 0.0473d, + 0.1503d, 0.3504d, 0.6172d, 0.8507d, 0.9718d, 1d, 1d }; + } /** Creates the default inverse cumulative probability test input values */ @Override public double[] makeInverseCumulativeTestPoints() { - return new double[] {0, 0.001d, 0.010d, 0.025d, 0.050d, 0.100d, 0.999d, - 0.990d, 0.975d, 0.950d, 0.900d,1}; - } + return new double[] { 0, 0.001d, 0.010d, 0.025d, 0.050d, 0.100d, + 0.999d, 0.990d, 0.975d, 0.950d, 0.900d, 1 }; + } - /** Creates the default inverse cumulative probability density test expected values */ + /** + * Creates the default inverse cumulative probability density test expected + * values + */ @Override public int[] makeInverseCumulativeTestValues() { - return new int[] {-1, 1, 2, 3, 4, 4, 9, 9, 9, 8, 8, Integer.MAX_VALUE}; + return new int[] { -1, 1, 2, 3, 4, 4, 9, 9, 9, 8, 8, Integer.MAX_VALUE }; } - //----------------- Additional test cases --------------------------------- + // ----------------- Additional test cases --------------------------------- - /** Test degenerate case p = 0 */ + /** Test degenerate case p = 0 */ public void testDegenerate0() throws Exception { - setDistribution(new BinomialDistributionImpl(5,0.0d)); - setCumulativeTestPoints(new int[] {-1, 0, 1, 5, 10 }); - setCumulativeTestValues(new double[] {0d, 1d, 1d, 1d, 1d}); - setDensityTestPoints(new int[] {-1, 0, 1, 10, 11}); - setDensityTestValues(new double[] {0d, 1d, 0d, 0d, 0d}); - setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); - setInverseCumulativeTestValues(new int[] {-1, -1}); + setDistribution(new BinomialDistributionImpl(5, 0.0d)); + setCumulativeTestPoints(new int[] { -1, 0, 1, 5, 10 }); + setCumulativeTestValues(new double[] { 0d, 1d, 1d, 1d, 1d }); + setDensityTestPoints(new int[] { -1, 0, 1, 10, 11 }); + setDensityTestValues(new double[] { 0d, 1d, 0d, 0d, 0d }); + setInverseCumulativeTestPoints(new double[] { 0.1d, 0.5d }); + setInverseCumulativeTestValues(new int[] { -1, -1 }); verifyDensities(); verifyCumulativeProbabilities(); verifyInverseCumulativeProbabilities(); } - /** Test degenerate case p = 1 */ + /** Test degenerate case p = 1 */ public void testDegenerate1() throws Exception { - setDistribution(new BinomialDistributionImpl(5,1.0d)); - setCumulativeTestPoints(new int[] {-1, 0, 1, 2, 5, 10 }); - setCumulativeTestValues(new double[] {0d, 0d, 0d, 0d, 1d, 1d}); - setDensityTestPoints(new int[] {-1, 0, 1, 2, 5, 10}); - setDensityTestValues(new double[] {0d, 0d, 0d, 0d, 1d, 0d}); - setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); - setInverseCumulativeTestValues(new int[] {4, 4}); + setDistribution(new BinomialDistributionImpl(5, 1.0d)); + setCumulativeTestPoints(new int[] { -1, 0, 1, 2, 5, 10 }); + setCumulativeTestValues(new double[] { 0d, 0d, 0d, 0d, 1d, 1d }); + setDensityTestPoints(new int[] { -1, 0, 1, 2, 5, 10 }); + setDensityTestValues(new double[] { 0d, 0d, 0d, 0d, 1d, 0d }); + setInverseCumulativeTestPoints(new double[] { 0.1d, 0.5d }); + setInverseCumulativeTestValues(new int[] { 4, 4 }); verifyDensities(); verifyCumulativeProbabilities(); verifyInverseCumulativeProbabilities();