Repository: commons-math Updated Branches: refs/heads/master 4140e0126 -> ece7c6fc6
[MATH-437] Remove deprecated KS distribution. Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/4be09dff Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/4be09dff Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/4be09dff Branch: refs/heads/master Commit: 4be09dfff27baf84c8c500e38e1a1e5a99f3f1a9 Parents: 4140e01 Author: Thomas Neidhart <thomas.neidh...@gmail.com> Authored: Tue Feb 24 23:07:58 2015 +0100 Committer: Thomas Neidhart <thomas.neidh...@gmail.com> Committed: Tue Feb 24 23:07:58 2015 +0100 ---------------------------------------------------------------------- .../KolmogorovSmirnovDistribution.java | 357 ------------------- .../KolmogorovSmirnovDistributionTest.java | 119 ------- 2 files changed, 476 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/4be09dff/src/main/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistribution.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistribution.java b/src/main/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistribution.java deleted file mode 100644 index 7199121..0000000 --- a/src/main/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistribution.java +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.math4.distribution; - -import java.io.Serializable; -import java.math.BigDecimal; - -import org.apache.commons.math4.exception.MathArithmeticException; -import org.apache.commons.math4.exception.NotStrictlyPositiveException; -import org.apache.commons.math4.exception.NumberIsTooLargeException; -import org.apache.commons.math4.exception.util.LocalizedFormats; -import org.apache.commons.math4.fraction.BigFraction; -import org.apache.commons.math4.fraction.BigFractionField; -import org.apache.commons.math4.fraction.FractionConversionException; -import org.apache.commons.math4.linear.Array2DRowFieldMatrix; -import org.apache.commons.math4.linear.Array2DRowRealMatrix; -import org.apache.commons.math4.linear.FieldMatrix; -import org.apache.commons.math4.linear.RealMatrix; -import org.apache.commons.math4.util.FastMath; - -/** - * Implementation of the Kolmogorov-Smirnov distribution. - * - * <p> - * Treats the distribution of the two-sided {@code P(D_n < d)} where - * {@code D_n = sup_x |G(x) - G_n (x)|} for the theoretical cdf {@code G} and - * the empirical cdf {@code G_n}. - * </p> - * <p> - * This implementation is based on [1] with certain quick decisions for extreme - * values given in [2]. - * </p> - * <p> - * In short, when wanting to evaluate {@code P(D_n < d)}, the method in [1] is - * to write {@code d = (k - h) / n} for positive integer {@code k} and - * {@code 0 <= h < 1}. Then {@code P(D_n < d) = (n! / n^n) * t_kk}, where - * {@code t_kk} is the {@code (k, k)}'th entry in the special matrix - * {@code H^n}, i.e. {@code H} to the {@code n}'th power. - * </p> - * <p> - * References: - * <ul> - * <li>[1] <a href="http://www.jstatsoft.org/v08/i18/"> - * Evaluating Kolmogorov's Distribution</a> by George Marsaglia, Wai - * Wan Tsang, and Jingbo Wang</li> - * <li>[2] <a href="http://www.jstatsoft.org/v39/i11/"> - * Computing the Two-Sided Kolmogorov-Smirnov Distribution</a> by Richard Simard - * and Pierre L'Ecuyer</li> - * </ul> - * Note that [1] contains an error in computing h, refer to - * <a href="https://issues.apache.org/jira/browse/MATH-437">MATH-437</a> for details. - * </p> - * - * @see <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> - * Kolmogorov-Smirnov test (Wikipedia)</a> - * @deprecated to be removed in version 4.0 - - * use {@link org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest} - */ -public class KolmogorovSmirnovDistribution implements Serializable { - - /** Serializable version identifier. */ - private static final long serialVersionUID = -4670676796862967187L; - - /** Number of observations. */ - private int n; - - /** - * @param n Number of observations - * @throws NotStrictlyPositiveException if {@code n <= 0} - */ - public KolmogorovSmirnovDistribution(int n) - throws NotStrictlyPositiveException { - if (n <= 0) { - throw new NotStrictlyPositiveException(LocalizedFormats.NOT_POSITIVE_NUMBER_OF_SAMPLES, n); - } - - this.n = n; - } - - /** - * Calculates {@code P(D_n < d)} using method described in [1] with quick - * decisions for extreme values given in [2] (see above). The result is not - * exact as with - * {@link KolmogorovSmirnovDistribution#cdfExact(double)} because - * calculations are based on {@code double} rather than - * {@link org.apache.commons.math4.fraction.BigFraction}. - * - * @param d statistic - * @return the two-sided probability of {@code P(D_n < d)} - * @throws MathArithmeticException if algorithm fails to convert {@code h} - * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing - * {@code d} as {@code (k - h) / m} for integer {@code k, m} and - * {@code 0 <= h < 1}. - */ - public double cdf(double d) throws MathArithmeticException { - return this.cdf(d, false); - } - - /** - * Calculates {@code P(D_n < d)} using method described in [1] with quick - * decisions for extreme values given in [2] (see above). The result is - * exact in the sense that BigFraction/BigReal is used everywhere at the - * expense of very slow execution time. Almost never choose this in real - * applications unless you are very sure; this is almost solely for - * verification purposes. Normally, you would choose - * {@link KolmogorovSmirnovDistribution#cdf(double)} - * - * @param d statistic - * @return the two-sided probability of {@code P(D_n < d)} - * @throws MathArithmeticException if algorithm fails to convert {@code h} - * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing - * {@code d} as {@code (k - h) / m} for integer {@code k, m} and - * {@code 0 <= h < 1}. - */ - public double cdfExact(double d) throws MathArithmeticException { - return this.cdf(d, true); - } - - /** - * Calculates {@code P(D_n < d)} using method described in [1] with quick - * decisions for extreme values given in [2] (see above). - * - * @param d statistic - * @param exact whether the probability should be calculated exact using - * {@link org.apache.commons.math4.fraction.BigFraction} everywhere at the - * expense of very slow execution time, or if {@code double} should be used - * convenient places to gain speed. Almost never choose {@code true} in real - * applications unless you are very sure; {@code true} is almost solely for - * verification purposes. - * @return the two-sided probability of {@code P(D_n < d)} - * @throws MathArithmeticException if algorithm fails to convert {@code h} - * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing - * {@code d} as {@code (k - h) / m} for integer {@code k, m} and - * {@code 0 <= h < 1}. - */ - public double cdf(double d, boolean exact) throws MathArithmeticException { - - final double ninv = 1 / ((double) n); - final double ninvhalf = 0.5 * ninv; - - if (d <= ninvhalf) { - - return 0; - - } else if (ninvhalf < d && d <= ninv) { - - double res = 1; - double f = 2 * d - ninv; - - // n! f^n = n*f * (n-1)*f * ... * 1*x - for (int i = 1; i <= n; ++i) { - res *= i * f; - } - - return res; - - } else if (1 - ninv <= d && d < 1) { - - return 1 - 2 * FastMath.pow(1 - d, n); - - } else if (1 <= d) { - - return 1; - } - - return exact ? exactK(d) : roundedK(d); - } - - /** - * Calculates the exact value of {@code P(D_n < d)} using method described - * in [1] and {@link org.apache.commons.math4.fraction.BigFraction} (see - * above). - * - * @param d statistic - * @return the two-sided probability of {@code P(D_n < d)} - * @throws MathArithmeticException if algorithm fails to convert {@code h} - * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing - * {@code d} as {@code (k - h) / m} for integer {@code k, m} and - * {@code 0 <= h < 1}. - */ - private double exactK(double d) throws MathArithmeticException { - - final int k = (int) FastMath.ceil(n * d); - - final FieldMatrix<BigFraction> H = this.createH(d); - final FieldMatrix<BigFraction> Hpower = H.power(n); - - BigFraction pFrac = Hpower.getEntry(k - 1, k - 1); - - for (int i = 1; i <= n; ++i) { - pFrac = pFrac.multiply(i).divide(n); - } - - /* - * BigFraction.doubleValue converts numerator to double and the - * denominator to double and divides afterwards. That gives NaN quite - * easy. This does not (scale is the number of digits): - */ - return pFrac.bigDecimalValue(20, BigDecimal.ROUND_HALF_UP).doubleValue(); - } - - /** - * Calculates {@code P(D_n < d)} using method described in [1] and doubles - * (see above). - * - * @param d statistic - * @return the two-sided probability of {@code P(D_n < d)} - * @throws MathArithmeticException if algorithm fails to convert {@code h} - * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing - * {@code d} as {@code (k - h) / m} for integer {@code k, m} and - * {@code 0 <= h < 1}. - */ - private double roundedK(double d) throws MathArithmeticException { - - final int k = (int) FastMath.ceil(n * d); - final FieldMatrix<BigFraction> HBigFraction = this.createH(d); - final int m = HBigFraction.getRowDimension(); - - /* - * Here the rounding part comes into play: use - * RealMatrix instead of FieldMatrix<BigFraction> - */ - final RealMatrix H = new Array2DRowRealMatrix(m, m); - - for (int i = 0; i < m; ++i) { - for (int j = 0; j < m; ++j) { - H.setEntry(i, j, HBigFraction.getEntry(i, j).doubleValue()); - } - } - - final RealMatrix Hpower = H.power(n); - - double pFrac = Hpower.getEntry(k - 1, k - 1); - - for (int i = 1; i <= n; ++i) { - pFrac *= (double) i / (double) n; - } - - return pFrac; - } - - /*** - * Creates {@code H} of size {@code m x m} as described in [1] (see above). - * - * @param d statistic - * @return H matrix - * @throws NumberIsTooLargeException if fractional part is greater than 1 - * @throws FractionConversionException if algorithm fails to convert - * {@code h} to a {@link org.apache.commons.math4.fraction.BigFraction} in - * expressing {@code d} as {@code (k - h) / m} for integer {@code k, m} and - * {@code 0 <= h < 1}. - */ - private FieldMatrix<BigFraction> createH(double d) - throws NumberIsTooLargeException, FractionConversionException { - - int k = (int) FastMath.ceil(n * d); - - int m = 2 * k - 1; - double hDouble = k - n * d; - - if (hDouble >= 1) { - throw new NumberIsTooLargeException(hDouble, 1.0, false); - } - - BigFraction h = null; - - try { - h = new BigFraction(hDouble, 1.0e-20, 10000); - } catch (FractionConversionException e1) { - try { - h = new BigFraction(hDouble, 1.0e-10, 10000); - } catch (FractionConversionException e2) { - h = new BigFraction(hDouble, 1.0e-5, 10000); - } - } - - final BigFraction[][] Hdata = new BigFraction[m][m]; - - /* - * Start by filling everything with either 0 or 1. - */ - for (int i = 0; i < m; ++i) { - for (int j = 0; j < m; ++j) { - if (i - j + 1 < 0) { - Hdata[i][j] = BigFraction.ZERO; - } else { - Hdata[i][j] = BigFraction.ONE; - } - } - } - - /* - * Setting up power-array to avoid calculating the same value twice: - * hPowers[0] = h^1 ... hPowers[m-1] = h^m - */ - final BigFraction[] hPowers = new BigFraction[m]; - hPowers[0] = h; - for (int i = 1; i < m; ++i) { - hPowers[i] = h.multiply(hPowers[i - 1]); - } - - /* - * First column and last row has special values (each other reversed). - */ - for (int i = 0; i < m; ++i) { - Hdata[i][0] = Hdata[i][0].subtract(hPowers[i]); - Hdata[m - 1][i] = Hdata[m - 1][i].subtract(hPowers[m - i - 1]); - } - - /* - * [1] states: "For 1/2 < h < 1 the bottom left element of the matrix - * should be (1 - 2*h^m + (2h - 1)^m )/m!" Since 0 <= h < 1, then if h > - * 1/2 is sufficient to check: - */ - if (h.compareTo(BigFraction.ONE_HALF) == 1) { - Hdata[m - 1][0] = Hdata[m - 1][0].add(h.multiply(2).subtract(1).pow(m)); - } - - /* - * Aside from the first column and last row, the (i, j)-th element is - * 1/(i - j + 1)! if i - j + 1 >= 0, else 0. 1's and 0's are already - * put, so only division with (i - j + 1)! is needed in the elements - * that have 1's. There is no need to calculate (i - j + 1)! and then - * divide - small steps avoid overflows. - * - * Note that i - j + 1 > 0 <=> i + 1 > j instead of j'ing all the way to - * m. Also note that it is started at g = 2 because dividing by 1 isn't - * really necessary. - */ - for (int i = 0; i < m; ++i) { - for (int j = 0; j < i + 1; ++j) { - if (i - j + 1 > 0) { - for (int g = 2; g <= i - j + 1; ++g) { - Hdata[i][j] = Hdata[i][j].divide(g); - } - } - } - } - - return new Array2DRowFieldMatrix<BigFraction>(BigFractionField.getInstance(), Hdata); - } -} http://git-wip-us.apache.org/repos/asf/commons-math/blob/4be09dff/src/test/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistributionTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistributionTest.java deleted file mode 100644 index eb472b7..0000000 --- a/src/test/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistributionTest.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.math4.distribution; - -import org.apache.commons.math4.distribution.KolmogorovSmirnovDistribution; -import org.junit.Assert; -import org.junit.Test; - -/** - * Test cases for {@link KolmogorovSmirnovDistribution}. - * - */ -@Deprecated -public class KolmogorovSmirnovDistributionTest { - - private static final double TOLERANCE = 10e-10; - - @Test - public void testCumulativeDensityFunction() { - - KolmogorovSmirnovDistribution dist; - - /* The code below is generated using the R-script located in - * /src/test/R/KolmogorovSmirnovDistributionTestCases.R - */ - - /* R version 2.11.1 (2010-05-31) */ - - - /* formatC(.C("pkolmogorov2x", p = as.double(0.005), n = as.integer(200), PACKAGE = "stats")$p, 40) gives - * 4.907829957616471622388047046469198862537e-86 - */ - dist = new KolmogorovSmirnovDistribution(200); - Assert.assertEquals(4.907829957616471622388047046469198862537e-86, dist.cdf(0.005, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.02), n = as.integer(200), PACKAGE = "stats")$p, 40) gives - * 5.151982014280041957199687829849210629618e-06 - */ - dist = new KolmogorovSmirnovDistribution(200); - Assert.assertEquals(5.151982014280041957199687829849210629618e-06, dist.cdf(0.02, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.031111), n = as.integer(200), PACKAGE = "stats")$p, 40) gives - * 0.01291614648162886340443389343590752105229 - */ - dist = new KolmogorovSmirnovDistribution(200); - Assert.assertEquals(0.01291614648162886340443389343590752105229, dist.cdf(0.031111, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.04), n = as.integer(200), PACKAGE = "stats")$p, 40) gives - * 0.1067137011362679355208626930107129737735 - */ - dist = new KolmogorovSmirnovDistribution(200); - Assert.assertEquals(0.1067137011362679355208626930107129737735, dist.cdf(0.04, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.005), n = as.integer(341), PACKAGE = "stats")$p, 40) gives - * 1.914734701559404553985102395145063418825e-53 - */ - dist = new KolmogorovSmirnovDistribution(341); - Assert.assertEquals(1.914734701559404553985102395145063418825e-53, dist.cdf(0.005, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.02), n = as.integer(341), PACKAGE = "stats")$p, 40) gives - * 0.001171328985781981343872182321774744195864 - */ - dist = new KolmogorovSmirnovDistribution(341); - Assert.assertEquals(0.001171328985781981343872182321774744195864, dist.cdf(0.02, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.031111), n = as.integer(341), PACKAGE = "stats")$p, 40) gives - * 0.1142955196267499418105728636874118819833 - */ - dist = new KolmogorovSmirnovDistribution(341); - Assert.assertEquals(0.1142955196267499418105728636874118819833, dist.cdf(0.031111, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.04), n = as.integer(341), PACKAGE = "stats")$p, 40) gives - * 0.3685529520496805266915885113121476024389 - */ - dist = new KolmogorovSmirnovDistribution(341); - Assert.assertEquals(0.3685529520496805266915885113121476024389, dist.cdf(0.04, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.005), n = as.integer(389), PACKAGE = "stats")$p, 40) gives - * 1.810657144595055888918455512707637574637e-47 - */ - dist = new KolmogorovSmirnovDistribution(389); - Assert.assertEquals(1.810657144595055888918455512707637574637e-47, dist.cdf(0.005, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.02), n = as.integer(389), PACKAGE = "stats")$p, 40) gives - * 0.003068542559702356568168690742481885536108 - */ - dist = new KolmogorovSmirnovDistribution(389); - Assert.assertEquals(0.003068542559702356568168690742481885536108, dist.cdf(0.02, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.031111), n = as.integer(389), PACKAGE = "stats")$p, 40) gives - * 0.1658291700122746237244797384846606291831 - */ - dist = new KolmogorovSmirnovDistribution(389); - Assert.assertEquals(0.1658291700122746237244797384846606291831, dist.cdf(0.031111, false), TOLERANCE); - - /* formatC(.C("pkolmogorov2x", p = as.double(0.04), n = as.integer(389), PACKAGE = "stats")$p, 40) gives - * 0.4513143712128902529379104180407011881471 - */ - dist = new KolmogorovSmirnovDistribution(389); - Assert.assertEquals(0.4513143712128902529379104180407011881471, dist.cdf(0.04, false), TOLERANCE); - - } - -}