Repository: commons-math Updated Branches: refs/heads/master 843267df5 -> 980554552
MATH-1405: Iteratively double minDelta for Kolmogorov-Smirnov Test jiggling Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/18f181ad Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/18f181ad Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/18f181ad Branch: refs/heads/master Commit: 18f181ada7826542725fa4a9460307d606695b5d Parents: 843267d Author: Daniil Finkel <dfin...@coldlight.com> Authored: Wed Mar 1 17:18:23 2017 -0500 Committer: Daniil Finkel <dfin...@coldlight.com> Committed: Wed Mar 1 17:18:23 2017 -0500 ---------------------------------------------------------------------- .../stat/inference/KolmogorovSmirnovTest.java | 13 ++++++------ .../inference/KolmogorovSmirnovTestTest.java | 22 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/18f181ad/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java index 9757b73..d863847 100644 --- a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java @@ -24,7 +24,6 @@ import java.util.HashSet; import org.apache.commons.math4.distribution.EnumeratedRealDistribution; import org.apache.commons.math4.distribution.RealDistribution; import org.apache.commons.math4.distribution.AbstractRealDistribution; -import org.apache.commons.math4.distribution.UniformRealDistribution; import org.apache.commons.math4.exception.InsufficientDataException; import org.apache.commons.math4.exception.MathArithmeticException; import org.apache.commons.math4.exception.MathInternalError; @@ -1156,18 +1155,18 @@ public class KolmogorovSmirnovTest { // Add jitter using a fixed seed (so same arguments always give same results), // low-initialization-overhead generator - final RealDistribution.Sampler sampler = - new UniformRealDistribution(-minDelta, minDelta).createSampler(RandomSource.create(RandomSource.JDK, 100)); + UniformRandomProvider rng = RandomSource.create(RandomSource.JDK, 100); // It is theoretically possible that jitter does not break ties, so repeat // until all ties are gone. Bound the loop and throw MIE if bound is exceeded. int ct = 0; boolean ties = true; do { - jitter(x, sampler); - jitter(y, sampler); + jitter(x, rng, minDelta); + jitter(y, rng, minDelta); ties = hasTies(x, y); ct++; + minDelta *= 2; // if jittering hasn't resolved ties, minDelta may be too small, so double it for next iteration } while (ties && ct < 1000); if (ties) { throw new MathInternalError(); // Should never happen @@ -1207,9 +1206,9 @@ public class KolmogorovSmirnovTest { * @param sampler probability distribution to sample for jitter values * @throws NullPointerException if either of the parameters is null */ - private static void jitter(double[] data, RealDistribution.Sampler sampler) { + private static void jitter(final double[] data, final UniformRandomProvider rng, final double delta) { for (int i = 0; i < data.length; i++) { - final double d = sampler.sample(); + final double d = delta * (2 * rng.nextDouble() - 1); data[i] += d; } } http://git-wip-us.apache.org/repos/asf/commons-math/blob/18f181ad/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java index 0e8c505..389acbe 100644 --- a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java @@ -436,6 +436,28 @@ public class KolmogorovSmirnovTestTest { Assert.assertEquals(0.9792777290, test.kolmogorovSmirnovTest(x, y), 1e-6); } + + @Test + public void testTwoSampleWithManyTiesAndVerySmallDelta() { + // MATH-1405 + final double[] x = { + 0.000000, 0.000000, 1.000000, + 1.000000, 1.500000, 1.600000, + 1.700000, 1.800000, 1.900000, 2.000000, 2.000000000000001 }; + + final double[] y = { + 0.000000, 0.000000, 10.000000, + 10.000000, 11.000000, 11.000000, + 11.000000, 15.000000, 16.000000, + 17.000000, 18.000000, 19.000000, 20.000000, 20.000000000000001 }; + + // these values result in an initial calculated minDelta of 4.440892098500626E-16, + // which is too small to jitter the existing values to new ones bc of floating-point precision + // MATH-1405 added functionality to iteratively increase minDelta until a noticeable jitter occurs + + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + Assert.assertEquals(1.12173015e-5, test.kolmogorovSmirnovTest(x, y), 1e-6); + } @Test public void testTwoSamplesAllEqual() {