This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 482e4b5e371152f1341e64bbbf952392268e7993 Author: aherbert <aherb...@apache.org> AuthorDate: Fri Jul 9 10:32:13 2021 +0100 RNG-154: Avoid infinity in the tails of a normalised Gaussian sampler --- .../distribution/BoxMullerGaussianSampler.java | 6 ++++- .../BoxMullerNormalizedGaussianSampler.java | 6 ++++- .../ZigguratNormalizedGaussianSampler.java | 12 +++++++-- .../rng/sampling/distribution/LevySamplerTest.java | 30 +++++++++++++++++----- src/changes/changes.xml | 5 ++++ 5 files changed, 48 insertions(+), 11 deletions(-) diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerGaussianSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerGaussianSampler.java index 8fa9104..e2f7222 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerGaussianSampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerGaussianSampler.java @@ -68,8 +68,12 @@ public class BoxMullerGaussianSampler if (Double.isNaN(nextGaussian)) { // Generate a pair of Gaussian numbers. + // Avoid zero for the uniform deviate y. + // The extreme tail of the sample is: + // y = 2^-53 + // r = 8.57167 final double x = rng.nextDouble(); - final double y = rng.nextDouble(); + final double y = InternalUtils.makeNonZeroDouble(rng.nextLong()); final double alpha = 2 * Math.PI * x; final double r = Math.sqrt(-2 * Math.log(y)); diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.java index d110e4b..b90a383 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/BoxMullerNormalizedGaussianSampler.java @@ -48,8 +48,12 @@ public class BoxMullerNormalizedGaussianSampler if (Double.isNaN(nextGaussian)) { // Generate a pair of Gaussian numbers. + // Avoid zero for the uniform deviate y. + // The extreme tail of the sample is: + // y = 2^-53 + // r = 8.57167 final double x = rng.nextDouble(); - final double y = rng.nextDouble(); + final double y = InternalUtils.makeNonZeroDouble(rng.nextLong()); final double alpha = 2 * Math.PI * x; final double r = Math.sqrt(-2 * Math.log(y)); diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.java index ce39773..1d1acb2 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/ZigguratNormalizedGaussianSampler.java @@ -133,8 +133,16 @@ public class ZigguratNormalizedGaussianSampler double y; double x; do { - y = -Math.log(rng.nextDouble()); - x = -Math.log(rng.nextDouble()) * ONE_OVER_R; + // Avoid infinity by creating a non-zero double. + // Note: The extreme value y from -Math.log(2^-53) is (to 4 sf): + // y = 36.74 + // The largest value x where 2y < x^2 is false is sqrt(2*36.74): + // x = 8.571 + // The extreme tail is: + // out = +/- 12.01 + // To generate this requires longs of 0 and then (1377 << 11). + y = -Math.log(InternalUtils.makeNonZeroDouble(rng.nextLong())); + x = -Math.log(InternalUtils.makeNonZeroDouble(rng.nextLong())) * ONE_OVER_R; } while (y + y < x * x); final double out = R + x; diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/LevySamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/LevySamplerTest.java index 4ae4802..4678f36 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/LevySamplerTest.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/LevySamplerTest.java @@ -81,18 +81,34 @@ public class LevySamplerTest { }, location, scale); Assert.assertEquals(Double.POSITIVE_INFINITY, s1.sample(), 0.0); - // Force the underlying ZigguratNormalizedGaussianSampler to create +inf + // Force the underlying ZigguratNormalizedGaussianSampler to create the largest value. + // This is 14.11 final LevySampler s2 = LevySampler.of( new SplitMix64(0L) { + private int i; @Override public long next() { - return (Long.MAX_VALUE << 7) & Long.MAX_VALUE; - } - @Override - public double nextDouble() { - return 0.0; + i++; + if (i == 1) { + // Set the first value to ensure we sample the tail of the ziggurat. + // The lowest 7 bits are zero to select rectangle 0 from the ziggurat. + return (Long.MAX_VALUE << 7) & Long.MAX_VALUE; + } + if (i == 2) { + // Set the second value to generate y as the largest value possible by + // ensuring Math.log is called with a small value. + return 0L; + } + // The next value generates x which must be set to the largest value x which + // satisfies the condition: + // 2y >= x^2 + return 1377L << 11; } }, location, scale); - Assert.assertEquals(0.0, s2.sample(), 0.0); + // The tail of the zigguart should be s=12.014118700751192 + // expected is 1/s^2 = 0.006928132149804786 + // This is as close to zero as the sampler can get. + final double expected = 0.006928132149804786; + Assert.assertEquals(expected, s2.sample(), 0.0); } } diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 8641760..77c87dd 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -77,6 +77,11 @@ re-run tests that fail, and pass the build if they succeed within the allotted number of reruns (the test will be marked as 'flaky' in the report). "> + <action dev="aherbert" type="update" issue="154"> + Update Gaussian samplers to avoid infinity in the tails of the distribution. Applies + to: ZigguratNormalisedGaussianSampler; BoxMullerNormalizedGaussianSampler; and + BoxMullerGaussianSampler. + </action> <action dev="aherbert" type="update" issue="153"> "UnitBallSampler": Update to use the ZigguratSampler for an exponential deviate for ball point picking.