This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git

commit 61f982c4011fa18a88354224e07ae81240c2b815
Author: Alex Herbert <aherb...@apache.org>
AuthorDate: Mon Sep 2 13:26:32 2024 +0100

    RNG-187: Use batched index generation in array shuffle
---
 .../apache/commons/rng/sampling/ArraySampler.java  | 287 +++++++++++++++++++--
 .../commons/rng/sampling/ArraySamplerTest.java     | 181 ++++++++++++-
 src/changes/changes.xml                            |   3 +
 3 files changed, 441 insertions(+), 30 deletions(-)

diff --git 
a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java
 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java
index e2772ce4..9be2de44 100644
--- 
a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java
+++ 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/ArraySampler.java
@@ -25,9 +25,29 @@ import org.apache.commons.rng.UniformRandomProvider;
  * 
href="https://en.wikipedia.org/wiki/Fisher-Yates_shuffle#The_modern_algorithm";>
  * Fisher-Yates</a> algorithm.
  *
+ * <p>Small ranges use batched random integer generation to increase 
performance.
+ *
+ * <ul>
+ * <li>Nevin Brackett-Rozinsky, Daniel Lemire,
+ * Batched Ranged Random Integer Generation, Software: Practice and Experience 
(to appear)
+ * <a href="https://arxiv.org/abs/2408.06213";>arXiv:2408.06213M</a>
+ * </ul>
+ *
  * @since 1.6
  */
 public final class ArraySampler {
+    /** Mask the lower 32-bit of a long. */
+    private static final long MASK_32 = 0xffffffffL;
+    /** 2^32. Used for the bounded random algorithm. This is required as the 
original
+     * method used (-bound % bound) for (2^L % bound) which only works for 
unsigned integer
+     * modulus. */
+    private static final long POW_32 = 1L << 32;
+    /** Length threshold to sample 2 integers from a random 32-bit value.
+     * The threshold provided in the Brackett-Rozinsky and Lemire paper
+     * is the power of 2 below 20724. Note that the product 2^15*2^15
+     * is representable using signed integers. */
+    private static final int BATCH_2 = 1 << 15;
+
     /** Class contains only static methods. */
     private ArraySampler() {}
 
@@ -39,9 +59,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static boolean[] shuffle(UniformRandomProvider rng, boolean[] 
array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -53,9 +83,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static byte[] shuffle(UniformRandomProvider rng, byte[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -67,9 +107,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static char[] shuffle(UniformRandomProvider rng, char[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -81,9 +131,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static double[] shuffle(UniformRandomProvider rng, double[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -95,9 +155,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static float[] shuffle(UniformRandomProvider rng, float[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -109,9 +179,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static int[] shuffle(UniformRandomProvider rng, int[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -123,9 +203,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static long[] shuffle(UniformRandomProvider rng, long[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -137,9 +227,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static short[] shuffle(UniformRandomProvider rng, short[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -152,9 +252,19 @@ public final class ArraySampler {
      * @return a reference to the given array
      */
     public static <T> T[] shuffle(UniformRandomProvider rng, T[] array) {
-        for (int i = array.length; i > 1; i--) {
+        int i = array.length;
+        for (; i > BATCH_2; --i) {
             swap(array, i - 1, rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, i - 1, index1);
+            swap(array, i - 2, index2);
+        }
         return array;
     }
 
@@ -169,10 +279,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static boolean[] shuffle(UniformRandomProvider rng, boolean[] 
array, int from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -187,10 +306,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static byte[] shuffle(UniformRandomProvider rng, byte[] array, int 
from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -205,10 +333,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static char[] shuffle(UniformRandomProvider rng, char[] array, int 
from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -223,10 +360,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static double[] shuffle(UniformRandomProvider rng, double[] array, 
int from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -241,10 +387,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static float[] shuffle(UniformRandomProvider rng, float[] array, 
int from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -259,10 +414,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static int[] shuffle(UniformRandomProvider rng, int[] array, int 
from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -277,10 +441,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static long[] shuffle(UniformRandomProvider rng, long[] array, int 
from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -295,10 +468,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static short[] shuffle(UniformRandomProvider rng, short[] array, 
int from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -314,10 +496,19 @@ public final class ArraySampler {
      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
      */
     public static <T> T[] shuffle(UniformRandomProvider rng, T[] array, int 
from, int to) {
-        final int length = to - checkFromToIndex(from, to, array.length);
-        for (int i = length; i > 1; i--) {
+        int i = to - checkFromToIndex(from, to, array.length);
+        for (; i > BATCH_2; --i) {
             swap(array, from + i - 1, from + rng.nextInt(i));
         }
+        // Batches of 2
+        final int[] productBound = {i * (i - 1)};
+        for (; i > 1; i -= 2) {
+            final int[] indices = randomBounded2(i, i - 1, productBound, rng);
+            final int index1 = indices[0];
+            final int index2 = indices[1];
+            swap(array, from + i - 1, from + index1);
+            swap(array, from + i - 2, from + index2);
+        }
         return array;
     }
 
@@ -438,6 +629,48 @@ public final class ArraySampler {
         array[j] = tmp;
     }
 
+
+    /**
+     * Return two random values in {@code [0, range1)} and {@code [0, 
range2)}. The
+     * product bound is used for the reject algorithm. See Brackett-Rozinsky 
and Lemire.
+     *
+     * <p>The product bound can be any positive integer {@code >= 
range1*range2}.
+     * It may be updated to become {@code range1*range2}.
+     *
+     * @param range1 Range 1.
+     * @param range2 Range 2.
+     * @param productBound Product bound.
+     * @param rng Source of randomness.
+     * @return [index1, index2]
+     */
+    static int[] randomBounded2(int range1, int range2, int[] productBound, 
UniformRandomProvider rng) {
+        long m = (rng.nextInt() & MASK_32) * range1;
+        // index1 and index2 are the top 32-bits of the long
+        long index1 = m;
+        // Leftover bits * range2
+        m = (m & MASK_32) * range2;
+        long index2 = m;
+        // Leftover bits must be unsigned
+        long l = m & MASK_32;
+        if (l < productBound[0]) {
+            final int bound = range1 * range2;
+            productBound[0] = bound;
+            if (l < bound) {
+                // 2^32 % bound
+                final long t = POW_32 % bound;
+                while (l < t) {
+                    m = (rng.nextInt() & MASK_32) * range1;
+                    index1 = m;
+                    m = (m & MASK_32) * range2;
+                    index2 = m;
+                    l = m & MASK_32;
+                }
+            }
+        }
+        // Convert to [0, range1), [0, range2)
+        return new int[] {(int) (index1 >> 32), (int) (index2 >> 32)};
+    }
+
     /**
      * Checks if the sub-range from fromIndex (inclusive) to toIndex 
(exclusive) is
      * within the bounds of range from 0 (inclusive) to length (exclusive).
diff --git 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java
 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java
index 7229896d..26c607c4 100644
--- 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java
+++ 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/ArraySamplerTest.java
@@ -17,18 +17,34 @@
 package org.apache.commons.rng.sampling;
 
 import java.util.Arrays;
+import java.util.stream.Stream;
 import org.apache.commons.math3.stat.inference.ChiSquareTest;
 import org.apache.commons.rng.UniformRandomProvider;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.MethodSource;
 import org.junit.jupiter.params.provider.ValueSource;
 
 /**
  * Tests for {@link ArraySampler}.
  */
 class ArraySamplerTest {
+    /** Constant for a large array where batched index generation is not used. 
*/
+    private static final int ABOVE_BATCH_LIMIT = 43210;
+
+    /**
+     * Return arguments for shuffle of a sub-range where batch index 
generation is
+     * not always used. Arguments are [from, to, length].
+     *
+     * @return the stream
+     */
+    static Stream<Arguments> aboveBatchLimit() {
+        return Stream.of(Arguments.of(123, ABOVE_BATCH_LIMIT + 123, 
ABOVE_BATCH_LIMIT + 789));
+    }
+
     @Test
     void testNullArguments() {
         final UniformRandomProvider rng = RandomAssert.seededRNG();
@@ -199,7 +215,7 @@ class ArraySamplerTest {
      * Test that all (unique) entries exist in the shuffled array.
      */
     @ParameterizedTest
-    @ValueSource(ints = {13, 42, 100})
+    @ValueSource(ints = {13, 42, 100, ABOVE_BATCH_LIMIT})
     void testShuffleNoDuplicates(int length) {
         final int[] array = PermutationSampler.natural(length);
         final UniformRandomProvider rng = RandomAssert.seededRNG();
@@ -226,6 +242,7 @@ class ArraySamplerTest {
         "0, 5, 10",
         "5, 10, 15",
     })
+    @MethodSource("aboveBatchLimit")
     void testShuffleSubRangeNoDuplicates(int from, int to, int length) {
         // Natural sequence in the sub-range
         final int[] array = natural(from, to, length);
@@ -253,7 +270,7 @@ class ArraySamplerTest {
      * Test that shuffle of the full range using the range arguments matches a 
full-range shuffle.
      */
     @ParameterizedTest
-    @ValueSource(ints = {9, 17})
+    @ValueSource(ints = {9, 17, ABOVE_BATCH_LIMIT})
     void testShuffleFullRangeMatchesShuffle(int length) {
         final int[] array1 = PermutationSampler.natural(length);
         final int[] array2 = array1.clone();
@@ -277,6 +294,7 @@ class ArraySamplerTest {
         "0, 5, 10",
         "5, 10, 15",
     })
+    @MethodSource("aboveBatchLimit")
     void testShuffleSubRangeMatchesShuffle(int from, int to, int length) {
         final int[] array1 = PermutationSampler.natural(to - from);
         // Natural sequence in the sub-range
@@ -330,6 +348,47 @@ class ArraySamplerTest {
         Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p);
     }
 
+
+    @ParameterizedTest
+    @ValueSource(ints = {ABOVE_BATCH_LIMIT})
+    void testShuffleIsRandomLarge(int length) {
+        final int[] array = PermutationSampler.natural(length);
+        final UniformRandomProvider rng = RandomAssert.createRNG();
+        final int samples = 1000000 / length;
+        final int bins = 8;
+        final long[][] counts = new long[bins][bins];
+        final int width = (int) Math.ceil((double) length / bins);
+        for (int j = 1; j <= samples; j++) {
+            ArraySampler.shuffle(rng, array);
+            for (int i = 0; i < length; i++) {
+                counts[i / width][array[i] / width]++;
+            }
+        }
+        final double p = new ChiSquareTest().chiSquareTest(counts);
+        Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p);
+    }
+
+    @ParameterizedTest
+    @MethodSource("aboveBatchLimit")
+    void testShuffleSubRangeIsRandomLarge(int from, int to, int length) {
+        // Natural sequence in the sub-range
+        final int[] array = natural(from, to, length);
+        final UniformRandomProvider rng = RandomAssert.createRNG();
+        final int n = to - from;
+        final int samples = 1000000 / n;
+        final int bins = 8;
+        final long[][] counts = new long[bins][bins];
+        final int width = (int) Math.ceil((double) length / bins);
+        for (int j = 1; j <= samples; j++) {
+            ArraySampler.shuffle(rng, array, from, to);
+            for (int i = 0; i < n; i++) {
+                counts[i / width][array[from + i] / width]++;
+            }
+        }
+        final double p = new ChiSquareTest().chiSquareTest(counts);
+        Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p);
+    }
+
     // Test other implementations. Include zero length arrays.
 
     @ParameterizedTest
@@ -405,7 +464,7 @@ class ArraySamplerTest {
     // most robust to detecting the boolean shuffle swapping around the wrong 
pairs.
 
     @ParameterizedTest
-    @ValueSource(ints = {0, 1234})
+    @ValueSource(ints = {0, 1234, ABOVE_BATCH_LIMIT})
     void testShuffleBoolean(int length) {
         final byte[] a = randomBitsAsBytes(length);
         final boolean[] b = booleans(a);
@@ -422,6 +481,7 @@ class ArraySamplerTest {
         "0, 900, 1000",
         "100, 1100, 1200",
     })
+    @MethodSource("aboveBatchLimit")
     void testShuffleBooleanSubRange(int from, int to, int length) {
         final byte[] a = randomBitsAsBytes(length);
         final boolean[] b = booleans(a);
@@ -430,6 +490,104 @@ class ArraySamplerTest {
         Assertions.assertArrayEquals(a, bytes(b));
     }
 
+    @ParameterizedTest
+    @ValueSource(ints = {ABOVE_BATCH_LIMIT})
+    void testShuffleLarge(int length) {
+        final int[] a = randomBitsAsInts(length);
+        final byte[] b = bytes(a);
+        final char[] c = chars(a);
+        final double[] d = doubles(a);
+        final float[] e = floats(a);
+        final long[] f = longs(a);
+        final short[] g = shorts(a);
+        final Integer[] h = boxed(a);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), a);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), b);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), c);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), d);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), e);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), f);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), g);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), h);
+        Assertions.assertArrayEquals(a, ints(b), "byte");
+        Assertions.assertArrayEquals(a, ints(c), "char");
+        Assertions.assertArrayEquals(a, ints(d), "double");
+        Assertions.assertArrayEquals(a, ints(e), "float");
+        Assertions.assertArrayEquals(a, ints(f), "long");
+        Assertions.assertArrayEquals(a, ints(g), "short");
+        Assertions.assertArrayEquals(a, ints(h), "Object");
+    }
+
+    @ParameterizedTest
+    @MethodSource("aboveBatchLimit")
+    void testShuffleSubRangeLarge(int from, int to, int length) {
+        final int[] a = randomBitsAsInts(length);
+        final byte[] b = bytes(a);
+        final char[] c = chars(a);
+        final double[] d = doubles(a);
+        final float[] e = floats(a);
+        final long[] f = longs(a);
+        final short[] g = shorts(a);
+        final Integer[] h = boxed(a);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), a, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), b, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), c, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), d, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), e, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), f, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), g, from, to);
+        ArraySampler.shuffle(RandomAssert.seededRNG(), h, from, to);
+        Assertions.assertArrayEquals(a, ints(b), "byte");
+        Assertions.assertArrayEquals(a, ints(c), "char");
+        Assertions.assertArrayEquals(a, ints(d), "double");
+        Assertions.assertArrayEquals(a, ints(e), "float");
+        Assertions.assertArrayEquals(a, ints(f), "long");
+        Assertions.assertArrayEquals(a, ints(g), "short");
+        Assertions.assertArrayEquals(a, ints(h), "Object");
+    }
+
+    /**
+     * Test that the indices generated after rejection of the first pair use 
an identical
+     * generation method. This is done by randomly forcing pairs to be 
rejected by outputting
+     * zero for the random bits. This effectively breaks up the usual output 
from the
+     * generation algorithm so that the indices may come from the first 
generation step,
+     * or the subsequent generation step inside the generating loop.
+     */
+    @Test
+    void testRandomBounded2() {
+        final UniformRandomProvider rng = RandomAssert.seededRNG();
+        final UniformRandomProvider rng1 = RandomAssert.seededRNG();
+
+        // Create a RNG which randomly outputs nextInt() with zeros
+        final UniformRandomProvider delegate = RandomAssert.seededRNG();
+        final UniformRandomProvider randomChoice = RandomAssert.seededRNG();
+        final UniformRandomProvider rng2 = new UniformRandomProvider() {
+            @Override
+            public int nextInt() {
+                if (randomChoice.nextBoolean()) {
+                    return 0;
+                }
+                return delegate.nextInt();
+            }
+
+            @Override
+            public long nextLong() {
+                throw new IllegalStateException("unused");
+            }
+        };
+
+        for (int i = 0; i < 100; i++) {
+            // Avoid an index bound of 0 or 1 by adding 2
+            final int n = 2 + rng.nextInt(ABOVE_BATCH_LIMIT);
+            final int[] bound1 = {n * (n - 1)};
+            final int[] i1 = ArraySampler.randomBounded2(n, n - 1, bound1, 
rng1);
+            final int[] bound2 = {n * (n - 1)};
+            final int[] i2 = ArraySampler.randomBounded2(n, n - 1, bound2, 
rng2);
+            Assertions.assertArrayEquals(i1, i2, "indices");
+            Assertions.assertArrayEquals(bound1, bound2, "bounds");
+        }
+    }
+
     /**
      * Creates a natural sequence (0, 1, ..., n-1) in the sub-range {@code 
[from, to)}
      * where {@code n = to - from}. Values outside the sub-range are a 
continuation
@@ -471,6 +629,23 @@ class ArraySamplerTest {
         return a;
     }
 
+    /**
+     * Create random bits of the specified length stored as ints using {0, 1}.
+     *
+     * @param length Length of the array.
+     * @return the bits, 1 per int
+     */
+    private static int[] randomBitsAsInts(int length) {
+        // Random ints
+        final int[] a = new int[length];
+        final UniformRandomProvider rng = RandomAssert.createRNG();
+        // Convert to boolean bits: 0 or 1
+        for (int i = 0; i < length; i++) {
+            a[i] = rng.nextBoolean() ? 1 : 0;
+        }
+        return a;
+    }
+
     // Conversion helpers
     // Special case for boolean <=> bytes as {0, 1}
 
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2cd4a1aa..033b90f5 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -56,6 +56,9 @@ If the output is not quite correct, check for invisible 
trailing spaces!
     <release version="1.7" date="TBD" description="
 New features, updates and bug fixes (requires Java 8).
 ">
+      <action dev="aherbert" type="update" issue="RNG-187">
+        "ArraySampler": Improve performance on small ranges using batched 
index generation.
+      </action>
 </release>
 
     <release version="1.6" date="2024-07-15" description="

Reply via email to