Repository: commons-math
Updated Branches:
  refs/heads/master 843267df5 -> 980554552


MATH-1405: Iteratively double minDelta for Kolmogorov-Smirnov Test jiggling


Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/18f181ad
Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/18f181ad
Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/18f181ad

Branch: refs/heads/master
Commit: 18f181ada7826542725fa4a9460307d606695b5d
Parents: 843267d
Author: Daniil Finkel <dfin...@coldlight.com>
Authored: Wed Mar 1 17:18:23 2017 -0500
Committer: Daniil Finkel <dfin...@coldlight.com>
Committed: Wed Mar 1 17:18:23 2017 -0500

----------------------------------------------------------------------
 .../stat/inference/KolmogorovSmirnovTest.java   | 13 ++++++------
 .../inference/KolmogorovSmirnovTestTest.java    | 22 ++++++++++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-math/blob/18f181ad/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
 
b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
index 9757b73..d863847 100644
--- 
a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
+++ 
b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
@@ -24,7 +24,6 @@ import java.util.HashSet;
 import org.apache.commons.math4.distribution.EnumeratedRealDistribution;
 import org.apache.commons.math4.distribution.RealDistribution;
 import org.apache.commons.math4.distribution.AbstractRealDistribution;
-import org.apache.commons.math4.distribution.UniformRealDistribution;
 import org.apache.commons.math4.exception.InsufficientDataException;
 import org.apache.commons.math4.exception.MathArithmeticException;
 import org.apache.commons.math4.exception.MathInternalError;
@@ -1156,18 +1155,18 @@ public class KolmogorovSmirnovTest {
 
        // Add jitter using a fixed seed (so same arguments always give same 
results),
        // low-initialization-overhead generator
-       final RealDistribution.Sampler sampler =
-           new UniformRealDistribution(-minDelta, 
minDelta).createSampler(RandomSource.create(RandomSource.JDK, 100));
+       UniformRandomProvider rng = RandomSource.create(RandomSource.JDK, 100);
 
        // It is theoretically possible that jitter does not break ties, so 
repeat
        // until all ties are gone.  Bound the loop and throw MIE if bound is 
exceeded.
        int ct = 0;
        boolean ties = true;
        do {
-           jitter(x, sampler);
-           jitter(y, sampler);
+           jitter(x, rng, minDelta);
+           jitter(y, rng, minDelta);
            ties = hasTies(x, y);
            ct++;
+           minDelta *= 2; // if jittering hasn't resolved ties, minDelta may 
be too small, so double it for next iteration
        } while (ties && ct < 1000);
        if (ties) {
            throw new MathInternalError(); // Should never happen
@@ -1207,9 +1206,9 @@ public class KolmogorovSmirnovTest {
      * @param sampler probability distribution to sample for jitter values
      * @throws NullPointerException if either of the parameters is null
      */
-    private static void jitter(double[] data, RealDistribution.Sampler 
sampler) {
+    private static void jitter(final double[] data, final 
UniformRandomProvider rng, final double delta) {
         for (int i = 0; i < data.length; i++) {
-            final double d = sampler.sample();
+            final double d = delta * (2 * rng.nextDouble() - 1);
             data[i] += d;
         }
     }

http://git-wip-us.apache.org/repos/asf/commons-math/blob/18f181ad/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
 
b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
index 0e8c505..389acbe 100644
--- 
a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
+++ 
b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
@@ -436,6 +436,28 @@ public class KolmogorovSmirnovTestTest {
         Assert.assertEquals(0.9792777290, test.kolmogorovSmirnovTest(x, y), 
1e-6);
 
     }
+    
+    @Test
+    public void testTwoSampleWithManyTiesAndVerySmallDelta() {
+        // MATH-1405
+        final double[] x = {
+                0.000000, 0.000000, 1.000000,
+                1.000000, 1.500000, 1.600000,
+                1.700000, 1.800000, 1.900000, 2.000000, 2.000000000000001 };
+        
+        final double[] y = {
+                0.000000, 0.000000, 10.000000,
+                10.000000, 11.000000, 11.000000,
+                11.000000, 15.000000, 16.000000,
+                17.000000, 18.000000, 19.000000, 20.000000, 20.000000000000001 
};
+        
+        // these values result in an initial calculated minDelta of 
4.440892098500626E-16,
+        // which is too small to jitter the existing values to new ones bc of 
floating-point precision
+        // MATH-1405 added functionality to iteratively increase minDelta 
until a noticeable jitter occurs
+
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        Assert.assertEquals(1.12173015e-5, test.kolmogorovSmirnovTest(x, y), 
1e-6);
+    }
 
     @Test
     public void testTwoSamplesAllEqual() {

Reply via email to