Repository: commons-math Updated Branches: refs/heads/MATH_3_X 759fed8a7 -> 7a6aa92c8
Fixed error in computing discrete distribution of D statistics for small-sample 2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned by exactP and monteCarloP methods (used by default for small, mid-size samples). JIRA: MATH-1245 Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/7a6aa92c Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/7a6aa92c Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/7a6aa92c Branch: refs/heads/MATH_3_X Commit: 7a6aa92c8ac46059f7ca9d76d7da6b710df901aa Parents: 759fed8 Author: Phil Steitz <phil.ste...@gmail.com> Authored: Fri Jul 10 12:31:36 2015 -0700 Committer: Phil Steitz <phil.ste...@gmail.com> Committed: Fri Jul 10 12:31:36 2015 -0700 ---------------------------------------------------------------------- src/changes/changes.xml | 5 ++ .../stat/inference/KolmogorovSmirnovTest.java | 11 ++-- .../inference/KolmogorovSmirnovTestTest.java | 55 +++++++++++++++++++- 3 files changed, 65 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/7a6aa92c/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 5d47406..c5cdb11 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,11 @@ If the output is not quite correct, check for invisible trailing spaces! </properties> <body> <release version="3.6" date="XXXX-XX-XX" description=""> + <action dev="psteitz" type="fix" issue="MATH-1245"> + Fixed error in computing discrete distribution of D statistics for small-sample + 2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned + by exactP and monteCarloP methods (used by default for small, mid-size samples). + </action> <action dev="tn" type="fix" issue="MATH-1240"> "KolmogorovSmirnovTest#ksSum(...)" returned wrong result in case the provided t-parameters was zero. This affected the calculation of "approximateP(...)" for http://git-wip-us.apache.org/repos/asf/commons-math/blob/7a6aa92c/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java index e0f5c7d..f32dbf3 100644 --- a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java @@ -21,6 +21,7 @@ import java.math.BigDecimal; import java.util.Arrays; import java.util.Iterator; +import org.apache.commons.math3.util.Precision; import org.apache.commons.math3.distribution.RealDistribution; import org.apache.commons.math3.exception.InsufficientDataException; import org.apache.commons.math3.exception.MathArithmeticException; @@ -885,6 +886,7 @@ public class KolmogorovSmirnovTest { long tail = 0; final double[] nSet = new double[n]; final double[] mSet = new double[m]; + final double tol = 1e-12; // d-values within tol of one another are considered equal while (combinationsIterator.hasNext()) { // Generate an n-set final int[] nSetI = combinationsIterator.next(); @@ -899,9 +901,8 @@ public class KolmogorovSmirnovTest { } } final double curD = kolmogorovSmirnovStatistic(nSet, mSet); - if (curD > d) { - tail++; - } else if (curD == d && !strict) { + final int order = Precision.compareTo(curD, d, tol); + if (order > 0 || (order == 0 && !strict)) { tail++; } } @@ -957,6 +958,7 @@ public class KolmogorovSmirnovTest { final int nn = FastMath.max(n, m); final int mm = FastMath.min(n, m); final int sum = nn + mm; + final double tol = 1e-12; // d-values within tol of one another are considered equal int tail = 0; final boolean b[] = new boolean[sum]; @@ -978,7 +980,8 @@ public class KolmogorovSmirnovTest { final double cdf_n = rankN / (double) nn; final double cdf_m = rankM / (double) mm; final double curD = FastMath.abs(cdf_n - cdf_m); - if (curD > d || (curD == d && !strict)) { + final int order = Precision.compareTo(curD, d, tol); + if (order > 0 || (order == 0 && !strict)) { tail++; break; } http://git-wip-us.apache.org/repos/asf/commons-math/blob/7a6aa92c/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java index 3d90e31..9d0d669 100644 --- a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java @@ -323,7 +323,7 @@ public class KolmogorovSmirnovTestTest { */ // @Test public void testTwoSampleMonteCarloPerformance() { - int numIterations = 100_000; + int numIterations = 100000; int N = (int)Math.sqrt(KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT); final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(new Well19937c(1000)); for (int n = 2; n <= N; ++n) { @@ -400,7 +400,7 @@ public class KolmogorovSmirnovTestTest { @Test public void testTwoSamplesAllEqual() { - int iterations = 10_000; + int iterations = 10000; final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); for (int i = 2; i < 30; ++i) { // testing values with ties @@ -427,6 +427,57 @@ public class KolmogorovSmirnovTestTest { Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.); } } + + /** + * JIRA: MATH-1245 + * + * Verify that D-values are not viewed as distinct when they are mathematically equal + * when computing p-statistics for small sample tests. Reference values are from R 3.2.0. + */ + @Test + public void testDRounding() { + final double tol = 1e-12; + final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12}; + final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18}; + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + Assert.assertEquals(0.0027495724090154106, test.kolmogorovSmirnovTest(x, y,false), tol); + + final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13}; + final double[] y1 = {0, 1, 3, 5, 7}; + Assert.assertEquals(0.085914085914085896, test.kolmogorovSmirnovTest(x1, y1, false), tol); + + final double[] x2 = {4, 6, 7, 8, 9, 10, 11}; + final double[] y2 = {0, 1, 2, 3, 5}; + Assert.assertEquals(0.015151515151515027, test.kolmogorovSmirnovTest(x2, y2, false), tol); + } + + /** + * JIRA: MATH-1245 + * + * Verify that D-values are not viewed as distinct when they are mathematically equal + * when computing p-statistics for small sample tests. Reference values are from R 3.2.0. + */ + @Test + public void testDRoundingMonteCarlo() { + final double tol = 1e-2; + final int iterations = 1000000; + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(new Well19937c(1000)); + + final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12}; + final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18}; + double d = test.kolmogorovSmirnovStatistic(x, y); + Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations), tol); + + final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13}; + final double[] y1 = {0, 1, 3, 5, 7}; + d = test.kolmogorovSmirnovStatistic(x1, y1); + Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations), tol); + + final double[] x2 = {4, 6, 7, 8, 9, 10, 11}; + final double[] y2 = {0, 1, 2, 3, 5}; + d = test.kolmogorovSmirnovStatistic(x2, y2); + Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol); + } /** * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n,