This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit d344c5fcbcd7bec12c34b24d8ca0573575c064d7 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Fri Apr 30 21:23:48 2021 +0100 RNG-134: BoxSampler to sample uniformly from a box (or hyperrectangle) --- .../commons/rng/sampling/shape/BoxSampler.java | 269 +++++++++++++++ .../commons/rng/sampling/shape/BoxSamplerTest.java | 369 +++++++++++++++++++++ src/changes/changes.xml | 3 + 3 files changed, 641 insertions(+) diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/shape/BoxSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/shape/BoxSampler.java new file mode 100644 index 0000000..227fad6 --- /dev/null +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/shape/BoxSampler.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rng.sampling.shape; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.SharedStateSampler; + +/** + * Generate points uniformly distributed within a n-dimension box (hyperrectangle). + * + * <p>Sampling uses:</p> + * + * <ul> + * <li>{@link UniformRandomProvider#nextDouble()} + * </ul> + * + * @see <a href="https://en.wikipedia.org/wiki/Hyperrectangle">Hyperrectangle (Wikipedia)</a> + * @since 1.4 + */ +public abstract class BoxSampler implements SharedStateSampler<BoxSampler> { + /** The dimension for 2D sampling. */ + private static final int TWO_D = 2; + /** The dimension for 3D sampling. */ + private static final int THREE_D = 3; + /** The source of randomness. */ + private final UniformRandomProvider rng; + + // The following code defines a point within the range ab: + // p = (1 - u)a + ub, u in [0, 1] + // + // This is the same method used in the + // o.a.c.rng.sampling.distribution.ContinuousUniformSampler but extended to N-dimensions. + + /** + * Sample uniformly from a box in 2D. This is an non-array based specialisation of + * {@link BoxSamplerND} for performance. + */ + private static class BoxSampler2D extends BoxSampler { + /** The x component of bound a. */ + private final double ax; + /** The y component of bound a. */ + private final double ay; + /** The x component of bound b. */ + private final double bx; + /** The y component of bound b. */ + private final double by; + + /** + * @param a Bound a. + * @param b Bound b. + * @param rng Source of randomness. + */ + BoxSampler2D(double[] a, double[] b, UniformRandomProvider rng) { + super(rng); + ax = a[0]; + ay = a[1]; + bx = b[0]; + by = b[1]; + } + + /** + * @param rng Source of randomness. + * @param source Source to copy. + */ + BoxSampler2D(UniformRandomProvider rng, BoxSampler2D source) { + super(rng); + ax = source.ax; + ay = source.ay; + bx = source.bx; + by = source.by; + } + + @Override + public double[] sample() { + return new double[] {createSample(ax, bx), + createSample(ay, by)}; + } + + @Override + public BoxSampler withUniformRandomProvider(UniformRandomProvider rng) { + return new BoxSampler2D(rng, this); + } + } + + /** + * Sample uniformly from a box in 3D. This is an non-array based specialisation of + * {@link BoxSamplerND} for performance. + */ + private static class BoxSampler3D extends BoxSampler { + /** The x component of bound a. */ + private final double ax; + /** The y component of bound a. */ + private final double ay; + /** The z component of bound a. */ + private final double az; + /** The x component of bound b. */ + private final double bx; + /** The y component of bound b. */ + private final double by; + /** The z component of bound b. */ + private final double bz; + + /** + * @param a Bound a. + * @param b Bound b. + * @param rng Source of randomness. + */ + BoxSampler3D(double[] a, double[] b, UniformRandomProvider rng) { + super(rng); + ax = a[0]; + ay = a[1]; + az = a[2]; + bx = b[0]; + by = b[1]; + bz = b[2]; + } + + /** + * @param rng Source of randomness. + * @param source Source to copy. + */ + BoxSampler3D(UniformRandomProvider rng, BoxSampler3D source) { + super(rng); + ax = source.ax; + ay = source.ay; + az = source.az; + bx = source.bx; + by = source.by; + bz = source.bz; + } + + @Override + public double[] sample() { + return new double[] {createSample(ax, bx), + createSample(ay, by), + createSample(az, bz)}; + } + + @Override + public BoxSampler withUniformRandomProvider(UniformRandomProvider rng) { + return new BoxSampler3D(rng, this); + } + } + + /** + * Sample uniformly from a box in ND. + */ + private static class BoxSamplerND extends BoxSampler { + /** Bound a. */ + private final double[] a; + /** Bound b. */ + private final double[] b; + + /** + * @param a Bound a. + * @param b Bound b. + * @param rng Source of randomness. + */ + BoxSamplerND(double[] a, double[] b, UniformRandomProvider rng) { + super(rng); + // Defensive copy + this.a = a.clone(); + this.b = b.clone(); + } + + /** + * @param rng Source of randomness. + * @param source Source to copy. + */ + BoxSamplerND(UniformRandomProvider rng, BoxSamplerND source) { + super(rng); + // Shared state is immutable + a = source.a; + b = source.b; + } + + @Override + public double[] sample() { + final double[] x = new double[a.length]; + for (int i = 0; i < x.length; i++) { + x[i] = createSample(a[i], b[i]); + } + return x; + } + + @Override + public BoxSampler withUniformRandomProvider(UniformRandomProvider rng) { + return new BoxSamplerND(rng, this); + } + } + + /** + * @param rng Source of randomness. + */ + BoxSampler(UniformRandomProvider rng) { + this.rng = rng; + } + + /** + * @return a random Cartesian point within the box. + */ + public abstract double[] sample(); + + /** + * Creates the sample between bound a and b. + * + * @param a Bound a + * @param b Bound b + * @return the sample + */ + double createSample(double a, double b) { + final double u = rng.nextDouble(); + return (1.0 - u) * a + u * b; + } + + /** + * Create a box sampler with bounds {@code a} and {@code b}. + * Points are returned within the box defined by the bounds. + * + * <p>Sampling is supported in dimensions of 2 or above. + * + * <p>There is no requirement that {@code a <= b}. + * + * @param a Bound a. + * @param b Bound b. + * @param rng Source of randomness. + * @return the sampler + * @throws IllegalArgumentException If the bounds do not have the same + * dimension; the dimension is less than 2; or bounds have non-finite coordinates. + */ + public static BoxSampler of(double[] a, + double[] b, + UniformRandomProvider rng) { + final int dimension = a.length; + if (dimension != b.length) { + throw new IllegalArgumentException( + new StringBuilder("Mismatch of box dimensions: ").append(dimension).append(',') + .append(b.length).toString()); + } + // Detect non-finite bounds + Coordinates.requireFinite(a, "Bound a"); + Coordinates.requireFinite(b, "Bound b"); + // Low dimension specialisations + if (dimension == TWO_D) { + return new BoxSampler2D(a, b, rng); + } else if (dimension == THREE_D) { + return new BoxSampler3D(a, b, rng); + } else if (dimension > THREE_D) { + return new BoxSamplerND(a, b, rng); + } + // Less than 2D + throw new IllegalArgumentException("Unsupported dimension: " + dimension); + } +} diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/shape/BoxSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/shape/BoxSamplerTest.java new file mode 100644 index 0000000..96a15e8 --- /dev/null +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/shape/BoxSamplerTest.java @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.sampling.shape; + +import java.util.Arrays; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.commons.math3.stat.inference.ChiSquareTest; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.sampling.RandomAssert; +import org.apache.commons.rng.sampling.UnitSphereSampler; +import org.apache.commons.rng.simple.RandomSource; + +/** + * Test for {@link BoxSampler}. + */ +public class BoxSamplerTest { + /** + * Test an unsupported dimension. + */ + @Test(expected = IllegalArgumentException.class) + public void testInvalidDimensionThrows() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + BoxSampler.of(new double[1], new double[1], rng); + } + + /** + * Test a dimension mismatch between vertices. + */ + @Test + public void testDimensionMismatchThrows() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + final double[] c2 = new double[2]; + final double[] c3 = new double[3]; + for (double[][] c : new double[][][] { + {c2, c3}, + {c3, c2}, + }) { + try { + BoxSampler.of(c[0], c[1], rng); + Assert.fail(String.format("Did not detect dimension mismatch: %d,%d", + c[0].length, c[1].length)); + } catch (IllegalArgumentException ex) { + // Expected + } + } + } + + /** + * Test non-finite vertices. + */ + @Test + public void testNonFiniteVertexCoordinates() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + // A valid box + final double[][] c = new double[][] { + {0, 1, 2}, {-1, 2, 3} + }; + Assert.assertNotNull(BoxSampler.of(c[0], c[1], rng)); + final double[] bad = {Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.NaN}; + for (int i = 0; i < c.length; i++) { + for (int j = 0; j < c[0].length; j++) { + for (final double d : bad) { + final double value = c[i][j]; + c[i][j] = d; + try { + BoxSampler.of(c[0], c[1], rng); + Assert.fail(String.format("Did not detect non-finite coordinate: %d,%d = %s", i, j, d)); + } catch (IllegalArgumentException ex) { + // Expected + } + c[i][j] = value; + } + } + } + } + + /** + * Test a box with coordinates that are separated by more than + * {@link Double#MAX_VALUE} in 2D. + */ + @Test + public void testExtremeValueCoordinates2D() { + testExtremeValueCoordinates(2); + } + + /** + * Test a box with coordinates that are separated by more than + * {@link Double#MAX_VALUE} in 3D. + */ + @Test + public void testExtremeValueCoordinates3D() { + testExtremeValueCoordinates(3); + } + + /** + * Test a box with coordinates that are separated by more than + * {@link Double#MAX_VALUE} in 4D. + */ + @Test + public void testExtremeValueCoordinates4D() { + testExtremeValueCoordinates(4); + } + + /** + * Test a box with coordinates that are separated by more than + * {@link Double#MAX_VALUE}. + * + * @param dimension the dimension + */ + private static void testExtremeValueCoordinates(int dimension) { + // Object seed so use Long not long + final Long seed = 456456L; + final double[][] c1 = new double[2][dimension]; + final double[][] c2 = new double[2][dimension]; + // Create a valid box that can be scaled + Arrays.fill(c1[0], -1); + Arrays.fill(c1[1], 1); + // Extremely large value for scaling. Use a power of 2 for exact scaling. + final double scale = 0x1.0p1023; + for (int i = 0; i < c1.length; i++) { + // Scale the second box + for (int j = 0; j < dimension; j++) { + c2[i][j] = c1[i][j] * scale; + } + } + // Show the box is too big to compute vectors between points. + Assert.assertEquals("Expect vector b - a to be infinite in the x dimension", + Double.POSITIVE_INFINITY, c2[1][0] - c2[0][0], 0.0); + + final BoxSampler sampler1 = BoxSampler.of(c1[0], c1[1], + RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PP, seed)); + final BoxSampler sampler2 = BoxSampler.of(c2[0], c2[1], + RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PP, seed)); + + for (int n = 0; n < 10; n++) { + final double[] a = sampler1.sample(); + final double[] b = sampler2.sample(); + for (int i = 0; i < a.length; i++) { + a[i] *= scale; + } + Assert.assertArrayEquals(a, b, 0.0); + } + } + + /** + * Test the distribution of points in 2D. + */ + @Test + public void testDistribution2D() { + testDistributionND(2); + } + + /** + * Test the distribution of points in 3D. + */ + @Test + public void testDistribution3D() { + testDistributionND(3); + } + + /** + * Test the distribution of points in 4D. + */ + @Test + public void testDistribution4D() { + testDistributionND(4); + } + + /** + * Test the distribution of points in N dimensions. The output coordinates + * should be uniform in the box. + * + * @param dimension the dimension + */ + private static void testDistributionND(int dimension) { + final UniformRandomProvider rng = RandomSource.create(RandomSource.JSF_64, 0xdabfab); + + final UnitSphereSampler sphere = UnitSphereSampler.of(dimension, rng); + final double[] a = sphere.nextVector(); + final double[] b = sphere.nextVector(); + + // Assign bins + final int bins = 10; + final int samplesPerBin = 20; + + // To test uniformity within the box assign the position to a bin using: + // x - a + // ----- * bins + // b - a + // Pre-compute scaling + final double[] scale = new double[dimension]; + // Precompute the bin offset for each increasing dimension: + // 1, bins, bins*bins, bins*bins*bins, ... + final int[] offset = new int[dimension]; + for (int i = 0; i < dimension; i++) { + scale[i] = 1.0 / (b[i] - a[i]); + offset[i] = (int) Math.pow(bins, i); + } + + // Expect a uniform distribution + final double[] expected = new double[(int) Math.pow(bins, dimension)]; + Arrays.fill(expected, 1.0); + + // Increase the loops and use a null seed (i.e. randomly generated) to verify robustness + final BoxSampler sampler = BoxSampler.of(a, b, rng); + final int samples = expected.length * samplesPerBin; + for (int n = 0; n < 1; n++) { + // Assign each coordinate to a region inside the box + final long[] observed = new long[expected.length]; + for (int i = 0; i < samples; i++) { + final double[] x = sampler.sample(); + Assert.assertEquals(dimension, x.length); + int index = 0; + for (int j = 0; j < dimension; j++) { + final double c = (x[j] - a[j]) * scale[j]; + Assert.assertTrue("Not within the box", c >= 0.0 && c <= 1.0); + // Get the bin for this dimension (assumes c != 1.0) + final int bin = (int) (c * bins); + // Add to the final bin index + index += bin * offset[j]; + } + // Assign the uniform deviate to a bin + observed[index]++; + } + final double p = new ChiSquareTest().chiSquareTest(expected, observed); + Assert.assertFalse("p-value too small: " + p, p < 0.001); + } + } + + /** + * Test the SharedStateSampler implementation for 2D. + */ + @Test + public void testSharedStateSampler2D() { + testSharedStateSampler(2); + } + + /** + * Test the SharedStateSampler implementation for 3D. + */ + @Test + public void testSharedStateSampler3D() { + testSharedStateSampler(3); + } + + /** + * Test the SharedStateSampler implementation for 4D. + */ + @Test + public void testSharedStateSampler4D() { + testSharedStateSampler(4); + } + + /** + * Test the SharedStateSampler implementation for the given dimension. + */ + private static void testSharedStateSampler(int dimension) { + final UniformRandomProvider rng1 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + final UniformRandomProvider rng2 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + final double[] c1 = createCoordinate(1, dimension); + final double[] c2 = createCoordinate(2, dimension); + final BoxSampler sampler1 = BoxSampler.of(c1, c2, rng1); + final BoxSampler sampler2 = sampler1.withUniformRandomProvider(rng2); + RandomAssert.assertProduceSameSequence( + new RandomAssert.Sampler<double[]>() { + @Override + public double[] sample() { + return sampler1.sample(); + } + }, + new RandomAssert.Sampler<double[]>() { + @Override + public double[] sample() { + return sampler2.sample(); + } + }); + } + + /** + * Test the input vectors are copied and not used by reference for 2D. + */ + @Test + public void testChangedInputCoordinates2D() { + testChangedInputCoordinates(2); + } + + /** + * Test the input vectors are copied and not used by reference for 3D. + */ + @Test + public void testChangedInputCoordinates3D() { + testChangedInputCoordinates(3); + } + + /** + * Test the input vectors are copied and not used by reference for 4D. + */ + @Test + public void testChangedInputCoordinates4D() { + testChangedInputCoordinates(4); + } + + /** + * Test the input vectors are copied and not used by reference for the given + * dimension. + * + * @param dimension the dimension + */ + private static void testChangedInputCoordinates(int dimension) { + final UniformRandomProvider rng1 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + final UniformRandomProvider rng2 = RandomSource.create(RandomSource.SPLIT_MIX_64, 0L); + final double[] c1 = createCoordinate(1, dimension); + final double[] c2 = createCoordinate(2, dimension); + final BoxSampler sampler1 = BoxSampler.of(c1, c2, rng1); + // Check the input vectors are copied and not used by reference. + // Change them in place and create a new sampler. It should have different output + // translated by the offset. + final double offset = 10; + for (int i = 0; i < dimension; i++) { + c1[i] += offset; + c2[i] += offset; + } + final BoxSampler sampler2 = BoxSampler.of(c1, c2, rng2); + for (int n = 0; n < 3; n++) { + final double[] s1 = sampler1.sample(); + final double[] s2 = sampler2.sample(); + Assert.assertEquals(s1.length, s2.length); + Assert.assertFalse("First sampler has used the vertices by reference", + Arrays.equals(s1, s2)); + for (int i = 0; i < dimension; i++) { + Assert.assertEquals(s1[i] + offset, s2[i], 1e-10); + } + } + } + + /** + * Creates the coordinate of length specified by the dimension filled with + * the given value and the dimension index: x + i. + * + * @param x the value for index 0 + * @param dimension the dimension + * @return the coordinate + */ + private static double[] createCoordinate(double x, int dimension) { + final double[] coord = new double[dimension]; + for (int i = 0; i < dimension; i++) { + coord[0] = x + i; + } + return coord; + } +} diff --git a/src/changes/changes.xml b/src/changes/changes.xml index ab5de29..d323cfb 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -75,6 +75,9 @@ re-run tests that fail, and pass the build if they succeed within the allotted number of reruns (the test will be marked as 'flaky' in the report). "> + <action dev="aherbert" type="add" issue="134"> + New "BoxSampler" to sample uniformly from a box (or hyperrectangle). + </action> <action dev="aherbert" type="add" issue="133"> New "LineSampler" to sample uniformly on a line segment. </action>