This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
The following commit(s) were added to refs/heads/master by this push: new f4023d5 RNG-148: Detect and discard infinite length vectors f4023d5 is described below commit f4023d55bc9336036e5faf10e69ba391257a64d4 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Wed Jun 30 09:50:18 2021 +0100 RNG-148: Detect and discard infinite length vectors --- .../commons/rng/sampling/UnitSphereSampler.java | 33 +++++++-- .../rng/sampling/UnitSphereSamplerTest.java | 78 ++++++++++++++-------- src/changes/changes.xml | 3 + 3 files changed, 82 insertions(+), 32 deletions(-) diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java index 76db7b0..e9b2a2a 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/UnitSphereSampler.java @@ -104,8 +104,8 @@ public class UnitSphereSampler implements SharedStateObjectSampler<double[]> { final double y = sampler.sample(); final double sum = x * x + y * y; - if (sum == 0) { - // Zero-norm vector is discarded. + if (isInvalidSumForNormalization(sum)) { + // Invalid vector is discarded. return sample(); } @@ -141,8 +141,8 @@ public class UnitSphereSampler implements SharedStateObjectSampler<double[]> { final double z = sampler.sample(); final double sum = x * x + y * y + z * z; - if (sum == 0) { - // Zero-norm vector is discarded. + if (isInvalidSumForNormalization(sum)) { + // Invalid vector is discarded. return sample(); } @@ -187,8 +187,8 @@ public class UnitSphereSampler implements SharedStateObjectSampler<double[]> { sum += x * x; } - if (sum == 0) { - // Zero-norm vector is discarded. + if (isInvalidSumForNormalization(sum)) { + // Invalid vector is discarded. // Using recursion as it is highly unlikely to generate more // than a few such vectors. It also protects against infinite // loop (in case a buggy generator is used), by eventually @@ -284,4 +284,25 @@ public class UnitSphereSampler implements SharedStateObjectSampler<double[]> { } return new UnitSphereSamplerND(dimension, rng); } + + /** + * Returns true if the sum of squared components of a vector is invalid for + * normalization. + * + * <p>This is true for any sum where the factor {@code f = 1.0 / sqrt(sum)} + * cannot be used to create a unit length vector by multiplication. The sum + * is invalid if: + * + * <ul> + * <li>{@code sum = 0} then {@code f = infinity} + * <li>{@code sum = infinity} then {@code f = 0} + * </ul> + * + * @param sum Sum of squared components of a vector + * @return true if invalid for normalisation + */ + private static boolean isInvalidSumForNormalization(double sum) { + // Note: Deliberate floating-point comparison with zero + return sum == 0 || sum == Double.POSITIVE_INFINITY; + } } diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java index 005d548..9733858 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/UnitSphereSamplerTest.java @@ -388,46 +388,72 @@ public class UnitSphereSamplerTest { } /** - * Test the edge case where the normalisation sum to divide by is zero for 2D. - */ - @Test - public void testInvalidInverseNormalisation2D() { - testInvalidInverseNormalisationND(2); - } - - /** - * Test the edge case where the normalisation sum to divide by is zero for 3D. + * Test the edge case where the normalisation sum to divide by is zero. */ @Test - public void testInvalidInverseNormalisation3D() { - testInvalidInverseNormalisationND(3); + public void testInvalidInverseNormalisationWithZeroLength() { + for (int dim = 2; dim <= 4; dim++) { + testInvalidInverseNormalisationND(dim, true); + } } /** - * Test the edge case where the normalisation sum to divide by is zero for 4D. + * Test the edge case where the normalisation sum to divide by is infinite. */ @Test - public void testInvalidInverseNormalisation4D() { - testInvalidInverseNormalisationND(4); + public void testInvalidInverseNormalisationWithInfiniteLength() { + for (int dim = 2; dim <= 4; dim++) { + testInvalidInverseNormalisationND(dim, false); + } } /** - * Test the edge case where the normalisation sum to divide by is zero. - * This test requires generation of Gaussian samples with the value 0. - * See RNG-55. + * Test the edge case where the normalisation sum to divide by is zero or infinite. This + * test requires generation of Gaussian samples with the value 0 or infinity. See RNG-55. + * + * @param dimension the dimension + * @param zeroSum true if the sum for the first vector should be zero */ - private static void testInvalidInverseNormalisationND(final int dimension) { + private static void testInvalidInverseNormalisationND(final int dimension, boolean zeroSum) { // Create a provider that will create a bad first sample but then recover. // This checks recursion will return a good value. - final UniformRandomProvider bad = new SplitMix64(0x1a2b3cL) { - private int count = -2 * dimension; - @Override - public long nextLong() { - // Return enough zeros to create Gaussian samples of zero for all coordinates. - return count++ < 0 ? 0 : super.nextLong(); - } - }; + // This sampler will createbvalues that manipulate the underlying Gaussian sampler. + UniformRandomProvider bad; + if (zeroSum) { + // Create Gaussian samples of zero + bad = new SplitMix64(0x1a2b3cL) { + private int count = -2 * dimension; + + @Override + public long nextLong() { + // Return enough zeros to create Gaussian samples of zero for all coordinates. + return count++ < 0 ? 0 : super.nextLong(); + } + }; + } else { + // Create a Gaussian sample of infinity. + // This only requires 1 infinite value to create an infinite length vector. + // Assumes the ZigguratNormalizedGaussianSampler. + // To create infinity requires a very large long value with the lowest 7 bits as 0, + // then two doubles of zero. + bad = new SplitMix64(0x1a2b3cL) { + private int lcount = -1; + private int dcount = -2; + + @Override + public long nextLong() { + return lcount++ < 0 ? + (-1L << 7) & Long.MAX_VALUE : + super.nextLong(); + } + + @Override + public double nextDouble() { + return dcount++ < 0 ? 0 : super.nextDouble(); + } + }; + } final double[] vector = UnitSphereSampler.of(dimension, bad).sample(); Assert.assertEquals(dimension, vector.length); diff --git a/src/changes/changes.xml b/src/changes/changes.xml index f74e736..096844b 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -77,6 +77,9 @@ re-run tests that fail, and pass the build if they succeed within the allotted number of reruns (the test will be marked as 'flaky' in the report). "> + <action dev="aherbert" type="fix" issue="148"> + "UnitSphereSampler": Detect and discard infinite length vectors. + </action> <action dev="aherbert" type="add" issue="147"> New "LevySampler" to sample from a Levy distribution. </action>