This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 61f982c4011fa18a88354224e07ae81240c2b815 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Mon Sep 2 13:26:32 2024 +0100 RNG-187: Use batched index generation in array shuffle --- .../apache/commons/rng/sampling/ArraySampler.java | 287 +++++++++++++++++++-- .../commons/rng/sampling/ArraySamplerTest.java | 181 ++++++++++++- src/changes/changes.xml | 3 + 3 files changed, 441 insertions(+), 30 deletions(-) diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java index e2772ce4..9be2de44 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java @@ -25,9 +25,29 @@ import org.apache.commons.rng.UniformRandomProvider; * href="https://en.wikipedia.org/wiki/Fisher-Yates_shuffle#The_modern_algorithm"> * Fisher-Yates</a> algorithm. * + * <p>Small ranges use batched random integer generation to increase performance. + * + * <ul> + * <li>Nevin Brackett-Rozinsky, Daniel Lemire, + * Batched Ranged Random Integer Generation, Software: Practice and Experience (to appear) + * <a href="https://arxiv.org/abs/2408.06213">arXiv:2408.06213M</a> + * </ul> + * * @since 1.6 */ public final class ArraySampler { + /** Mask the lower 32-bit of a long. */ + private static final long MASK_32 = 0xffffffffL; + /** 2^32. Used for the bounded random algorithm. This is required as the original + * method used (-bound % bound) for (2^L % bound) which only works for unsigned integer + * modulus. */ + private static final long POW_32 = 1L << 32; + /** Length threshold to sample 2 integers from a random 32-bit value. + * The threshold provided in the Brackett-Rozinsky and Lemire paper + * is the power of 2 below 20724. Note that the product 2^15*2^15 + * is representable using signed integers. */ + private static final int BATCH_2 = 1 << 15; + /** Class contains only static methods. */ private ArraySampler() {} @@ -39,9 +59,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static boolean[] shuffle(UniformRandomProvider rng, boolean[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -53,9 +83,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static byte[] shuffle(UniformRandomProvider rng, byte[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -67,9 +107,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static char[] shuffle(UniformRandomProvider rng, char[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -81,9 +131,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static double[] shuffle(UniformRandomProvider rng, double[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -95,9 +155,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static float[] shuffle(UniformRandomProvider rng, float[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -109,9 +179,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static int[] shuffle(UniformRandomProvider rng, int[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -123,9 +203,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static long[] shuffle(UniformRandomProvider rng, long[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -137,9 +227,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static short[] shuffle(UniformRandomProvider rng, short[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -152,9 +252,19 @@ public final class ArraySampler { * @return a reference to the given array */ public static <T> T[] shuffle(UniformRandomProvider rng, T[] array) { - for (int i = array.length; i > 1; i--) { + int i = array.length; + for (; i > BATCH_2; --i) { swap(array, i - 1, rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, i - 1, index1); + swap(array, i - 2, index2); + } return array; } @@ -169,10 +279,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static boolean[] shuffle(UniformRandomProvider rng, boolean[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -187,10 +306,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static byte[] shuffle(UniformRandomProvider rng, byte[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -205,10 +333,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static char[] shuffle(UniformRandomProvider rng, char[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -223,10 +360,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static double[] shuffle(UniformRandomProvider rng, double[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -241,10 +387,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static float[] shuffle(UniformRandomProvider rng, float[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -259,10 +414,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static int[] shuffle(UniformRandomProvider rng, int[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -277,10 +441,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static long[] shuffle(UniformRandomProvider rng, long[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -295,10 +468,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static short[] shuffle(UniformRandomProvider rng, short[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -314,10 +496,19 @@ public final class ArraySampler { * @throws IndexOutOfBoundsException if the sub-range is out of bounds */ public static <T> T[] shuffle(UniformRandomProvider rng, T[] array, int from, int to) { - final int length = to - checkFromToIndex(from, to, array.length); - for (int i = length; i > 1; i--) { + int i = to - checkFromToIndex(from, to, array.length); + for (; i > BATCH_2; --i) { swap(array, from + i - 1, from + rng.nextInt(i)); } + // Batches of 2 + final int[] productBound = {i * (i - 1)}; + for (; i > 1; i -= 2) { + final int[] indices = randomBounded2(i, i - 1, productBound, rng); + final int index1 = indices[0]; + final int index2 = indices[1]; + swap(array, from + i - 1, from + index1); + swap(array, from + i - 2, from + index2); + } return array; } @@ -438,6 +629,48 @@ public final class ArraySampler { array[j] = tmp; } + + /** + * Return two random values in {@code [0, range1)} and {@code [0, range2)}. The + * product bound is used for the reject algorithm. See Brackett-Rozinsky and Lemire. + * + * <p>The product bound can be any positive integer {@code >= range1*range2}. + * It may be updated to become {@code range1*range2}. + * + * @param range1 Range 1. + * @param range2 Range 2. + * @param productBound Product bound. + * @param rng Source of randomness. + * @return [index1, index2] + */ + static int[] randomBounded2(int range1, int range2, int[] productBound, UniformRandomProvider rng) { + long m = (rng.nextInt() & MASK_32) * range1; + // index1 and index2 are the top 32-bits of the long + long index1 = m; + // Leftover bits * range2 + m = (m & MASK_32) * range2; + long index2 = m; + // Leftover bits must be unsigned + long l = m & MASK_32; + if (l < productBound[0]) { + final int bound = range1 * range2; + productBound[0] = bound; + if (l < bound) { + // 2^32 % bound + final long t = POW_32 % bound; + while (l < t) { + m = (rng.nextInt() & MASK_32) * range1; + index1 = m; + m = (m & MASK_32) * range2; + index2 = m; + l = m & MASK_32; + } + } + } + // Convert to [0, range1), [0, range2) + return new int[] {(int) (index1 >> 32), (int) (index2 >> 32)}; + } + /** * Checks if the sub-range from fromIndex (inclusive) to toIndex (exclusive) is * within the bounds of range from 0 (inclusive) to length (exclusive). diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java index 7229896d..26c607c4 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java @@ -17,18 +17,34 @@ package org.apache.commons.rng.sampling; import java.util.Arrays; +import java.util.stream.Stream; import org.apache.commons.math3.stat.inference.ChiSquareTest; import org.apache.commons.rng.UniformRandomProvider; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; /** * Tests for {@link ArraySampler}. */ class ArraySamplerTest { + /** Constant for a large array where batched index generation is not used. */ + private static final int ABOVE_BATCH_LIMIT = 43210; + + /** + * Return arguments for shuffle of a sub-range where batch index generation is + * not always used. Arguments are [from, to, length]. + * + * @return the stream + */ + static Stream<Arguments> aboveBatchLimit() { + return Stream.of(Arguments.of(123, ABOVE_BATCH_LIMIT + 123, ABOVE_BATCH_LIMIT + 789)); + } + @Test void testNullArguments() { final UniformRandomProvider rng = RandomAssert.seededRNG(); @@ -199,7 +215,7 @@ class ArraySamplerTest { * Test that all (unique) entries exist in the shuffled array. */ @ParameterizedTest - @ValueSource(ints = {13, 42, 100}) + @ValueSource(ints = {13, 42, 100, ABOVE_BATCH_LIMIT}) void testShuffleNoDuplicates(int length) { final int[] array = PermutationSampler.natural(length); final UniformRandomProvider rng = RandomAssert.seededRNG(); @@ -226,6 +242,7 @@ class ArraySamplerTest { "0, 5, 10", "5, 10, 15", }) + @MethodSource("aboveBatchLimit") void testShuffleSubRangeNoDuplicates(int from, int to, int length) { // Natural sequence in the sub-range final int[] array = natural(from, to, length); @@ -253,7 +270,7 @@ class ArraySamplerTest { * Test that shuffle of the full range using the range arguments matches a full-range shuffle. */ @ParameterizedTest - @ValueSource(ints = {9, 17}) + @ValueSource(ints = {9, 17, ABOVE_BATCH_LIMIT}) void testShuffleFullRangeMatchesShuffle(int length) { final int[] array1 = PermutationSampler.natural(length); final int[] array2 = array1.clone(); @@ -277,6 +294,7 @@ class ArraySamplerTest { "0, 5, 10", "5, 10, 15", }) + @MethodSource("aboveBatchLimit") void testShuffleSubRangeMatchesShuffle(int from, int to, int length) { final int[] array1 = PermutationSampler.natural(to - from); // Natural sequence in the sub-range @@ -330,6 +348,47 @@ class ArraySamplerTest { Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p); } + + @ParameterizedTest + @ValueSource(ints = {ABOVE_BATCH_LIMIT}) + void testShuffleIsRandomLarge(int length) { + final int[] array = PermutationSampler.natural(length); + final UniformRandomProvider rng = RandomAssert.createRNG(); + final int samples = 1000000 / length; + final int bins = 8; + final long[][] counts = new long[bins][bins]; + final int width = (int) Math.ceil((double) length / bins); + for (int j = 1; j <= samples; j++) { + ArraySampler.shuffle(rng, array); + for (int i = 0; i < length; i++) { + counts[i / width][array[i] / width]++; + } + } + final double p = new ChiSquareTest().chiSquareTest(counts); + Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p); + } + + @ParameterizedTest + @MethodSource("aboveBatchLimit") + void testShuffleSubRangeIsRandomLarge(int from, int to, int length) { + // Natural sequence in the sub-range + final int[] array = natural(from, to, length); + final UniformRandomProvider rng = RandomAssert.createRNG(); + final int n = to - from; + final int samples = 1000000 / n; + final int bins = 8; + final long[][] counts = new long[bins][bins]; + final int width = (int) Math.ceil((double) length / bins); + for (int j = 1; j <= samples; j++) { + ArraySampler.shuffle(rng, array, from, to); + for (int i = 0; i < n; i++) { + counts[i / width][array[from + i] / width]++; + } + } + final double p = new ChiSquareTest().chiSquareTest(counts); + Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p); + } + // Test other implementations. Include zero length arrays. @ParameterizedTest @@ -405,7 +464,7 @@ class ArraySamplerTest { // most robust to detecting the boolean shuffle swapping around the wrong pairs. @ParameterizedTest - @ValueSource(ints = {0, 1234}) + @ValueSource(ints = {0, 1234, ABOVE_BATCH_LIMIT}) void testShuffleBoolean(int length) { final byte[] a = randomBitsAsBytes(length); final boolean[] b = booleans(a); @@ -422,6 +481,7 @@ class ArraySamplerTest { "0, 900, 1000", "100, 1100, 1200", }) + @MethodSource("aboveBatchLimit") void testShuffleBooleanSubRange(int from, int to, int length) { final byte[] a = randomBitsAsBytes(length); final boolean[] b = booleans(a); @@ -430,6 +490,104 @@ class ArraySamplerTest { Assertions.assertArrayEquals(a, bytes(b)); } + @ParameterizedTest + @ValueSource(ints = {ABOVE_BATCH_LIMIT}) + void testShuffleLarge(int length) { + final int[] a = randomBitsAsInts(length); + final byte[] b = bytes(a); + final char[] c = chars(a); + final double[] d = doubles(a); + final float[] e = floats(a); + final long[] f = longs(a); + final short[] g = shorts(a); + final Integer[] h = boxed(a); + ArraySampler.shuffle(RandomAssert.seededRNG(), a); + ArraySampler.shuffle(RandomAssert.seededRNG(), b); + ArraySampler.shuffle(RandomAssert.seededRNG(), c); + ArraySampler.shuffle(RandomAssert.seededRNG(), d); + ArraySampler.shuffle(RandomAssert.seededRNG(), e); + ArraySampler.shuffle(RandomAssert.seededRNG(), f); + ArraySampler.shuffle(RandomAssert.seededRNG(), g); + ArraySampler.shuffle(RandomAssert.seededRNG(), h); + Assertions.assertArrayEquals(a, ints(b), "byte"); + Assertions.assertArrayEquals(a, ints(c), "char"); + Assertions.assertArrayEquals(a, ints(d), "double"); + Assertions.assertArrayEquals(a, ints(e), "float"); + Assertions.assertArrayEquals(a, ints(f), "long"); + Assertions.assertArrayEquals(a, ints(g), "short"); + Assertions.assertArrayEquals(a, ints(h), "Object"); + } + + @ParameterizedTest + @MethodSource("aboveBatchLimit") + void testShuffleSubRangeLarge(int from, int to, int length) { + final int[] a = randomBitsAsInts(length); + final byte[] b = bytes(a); + final char[] c = chars(a); + final double[] d = doubles(a); + final float[] e = floats(a); + final long[] f = longs(a); + final short[] g = shorts(a); + final Integer[] h = boxed(a); + ArraySampler.shuffle(RandomAssert.seededRNG(), a, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), b, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), c, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), d, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), e, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), f, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), g, from, to); + ArraySampler.shuffle(RandomAssert.seededRNG(), h, from, to); + Assertions.assertArrayEquals(a, ints(b), "byte"); + Assertions.assertArrayEquals(a, ints(c), "char"); + Assertions.assertArrayEquals(a, ints(d), "double"); + Assertions.assertArrayEquals(a, ints(e), "float"); + Assertions.assertArrayEquals(a, ints(f), "long"); + Assertions.assertArrayEquals(a, ints(g), "short"); + Assertions.assertArrayEquals(a, ints(h), "Object"); + } + + /** + * Test that the indices generated after rejection of the first pair use an identical + * generation method. This is done by randomly forcing pairs to be rejected by outputting + * zero for the random bits. This effectively breaks up the usual output from the + * generation algorithm so that the indices may come from the first generation step, + * or the subsequent generation step inside the generating loop. + */ + @Test + void testRandomBounded2() { + final UniformRandomProvider rng = RandomAssert.seededRNG(); + final UniformRandomProvider rng1 = RandomAssert.seededRNG(); + + // Create a RNG which randomly outputs nextInt() with zeros + final UniformRandomProvider delegate = RandomAssert.seededRNG(); + final UniformRandomProvider randomChoice = RandomAssert.seededRNG(); + final UniformRandomProvider rng2 = new UniformRandomProvider() { + @Override + public int nextInt() { + if (randomChoice.nextBoolean()) { + return 0; + } + return delegate.nextInt(); + } + + @Override + public long nextLong() { + throw new IllegalStateException("unused"); + } + }; + + for (int i = 0; i < 100; i++) { + // Avoid an index bound of 0 or 1 by adding 2 + final int n = 2 + rng.nextInt(ABOVE_BATCH_LIMIT); + final int[] bound1 = {n * (n - 1)}; + final int[] i1 = ArraySampler.randomBounded2(n, n - 1, bound1, rng1); + final int[] bound2 = {n * (n - 1)}; + final int[] i2 = ArraySampler.randomBounded2(n, n - 1, bound2, rng2); + Assertions.assertArrayEquals(i1, i2, "indices"); + Assertions.assertArrayEquals(bound1, bound2, "bounds"); + } + } + /** * Creates a natural sequence (0, 1, ..., n-1) in the sub-range {@code [from, to)} * where {@code n = to - from}. Values outside the sub-range are a continuation @@ -471,6 +629,23 @@ class ArraySamplerTest { return a; } + /** + * Create random bits of the specified length stored as ints using {0, 1}. + * + * @param length Length of the array. + * @return the bits, 1 per int + */ + private static int[] randomBitsAsInts(int length) { + // Random ints + final int[] a = new int[length]; + final UniformRandomProvider rng = RandomAssert.createRNG(); + // Convert to boolean bits: 0 or 1 + for (int i = 0; i < length; i++) { + a[i] = rng.nextBoolean() ? 1 : 0; + } + return a; + } + // Conversion helpers // Special case for boolean <=> bytes as {0, 1} diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 2cd4a1aa..033b90f5 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -56,6 +56,9 @@ If the output is not quite correct, check for invisible trailing spaces! <release version="1.7" date="TBD" description=" New features, updates and bug fixes (requires Java 8). "> + <action dev="aherbert" type="update" issue="RNG-187"> + "ArraySampler": Improve performance on small ranges using batched index generation. + </action> </release> <release version="1.6" date="2024-07-15" description="