This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 69b966f3ab135fe3f507f69de3d97bbac64fb24b Author: Alex Herbert <[email protected]> AuthorDate: Mon Aug 5 11:36:32 2019 +0100 RNG-85: Added SeedUtils to allow generation of hex permutations. This is to be used to seed the Middle Square Weyl Sequence generator. --- commons-rng-simple/pom.xml | 7 + .../commons/rng/simple/internal/SeedUtils.java | 213 +++++++++++++++++++++ .../commons/rng/simple/internal/SeedUtilsTest.java | 104 ++++++++++ 3 files changed, 324 insertions(+) diff --git a/commons-rng-simple/pom.xml b/commons-rng-simple/pom.xml index 84c3210..116c7fc 100644 --- a/commons-rng-simple/pom.xml +++ b/commons-rng-simple/pom.xml @@ -50,6 +50,13 @@ <artifactId>commons-rng-core</artifactId> <version>1.3-SNAPSHOT</version> </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>3.6.1</version> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/commons-rng-simple/src/main/java/org/apache/commons/rng/simple/internal/SeedUtils.java b/commons-rng-simple/src/main/java/org/apache/commons/rng/simple/internal/SeedUtils.java new file mode 100644 index 0000000..5d6aa14 --- /dev/null +++ b/commons-rng-simple/src/main/java/org/apache/commons/rng/simple/internal/SeedUtils.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rng.simple.internal; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.core.util.NumberFactory; + +/** + * Utility for creating seeds. + */ +final class SeedUtils { + /** The modulus {@code 256 % 9}. */ + private static final int MOD_09 = 256 % 9; + /** The modulus {@code 256 % 10}. */ + private static final int MOD_10 = 256 % 10; + /** The modulus {@code 256 % 11}. */ + private static final int MOD_11 = 256 % 11; + /** The modulus {@code 256 % 12}. */ + private static final int MOD_12 = 256 % 12; + /** The modulus {@code 256 % 13}. */ + private static final int MOD_13 = 256 % 13; + /** The modulus {@code 256 % 14}. */ + private static final int MOD_14 = 256 % 14; + /** The modulus {@code 256 % 15}. */ + private static final int MOD_15 = 256 % 15; + /** The 16 hex digits in an array. */ + private static final byte[] HEX_DIGIT_ARRAY = {0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, + 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0}; + + /** + * Provider of unsigned 8-bit integers. + */ + private static class UnsignedByteProvider { + /** Mask to extract the lowest 2 bits from an integer. */ + private static final int MASK_2_BITS = 0x3; + /** Mask to extract the lowest 8 bits from an integer. */ + private static final int MASK_8_BITS = 0xff; + + /** Source of randomness. */ + private final UniformRandomProvider rng; + /** The current 32-bits of randomness. */ + private int bits; + /** The counter tracking the bits to extract. */ + private int counter; + + /** + * @param rng Source of randomness. + */ + UnsignedByteProvider(UniformRandomProvider rng) { + this.rng = rng; + } + + /** + * Return the next unsigned byte. + * + * @return Value in the range[0,255] + */ + int nextUnsignedByte() { + // Every 4 bytes generate a new 32-bit value + final int count = counter & MASK_2_BITS; + counter++; + if (count == 0) { + bits = rng.nextInt(); + // Consume the first byte + return bits & MASK_8_BITS; + } + // Consume a remaining byte (occurs 3 times) + bits >>>= 8; + return bits & MASK_8_BITS; + } + } + + /** + * Class contains only static methods. + */ + private SeedUtils() {} + + /** + * Creates an {@code int} containing a permutation of 8 hex digits chosen from 16. + * + * @param rng Source of randomness. + * @return hex digit permutation. + */ + static int createIntHexPermutation(UniformRandomProvider rng) { + final UnsignedByteProvider provider = new UnsignedByteProvider(rng); + return createUpperBitsHexPermutation(provider); + } + + /** + * Creates a {@code long} containing a permutation of 8 hex digits chosen from 16 in + * the upper and lower 32-bits. + * + * @param rng Source of randomness. + * @return hex digit permutation. + */ + static long createLongHexPermutation(UniformRandomProvider rng) { + final UnsignedByteProvider provider = new UnsignedByteProvider(rng); + // Extract upper bits and combine with a second sample + return NumberFactory.makeLong(createUpperBitsHexPermutation(provider), + createUpperBitsHexPermutation(provider)); + } + + /** + * Creates a {@code int} containing a permutation of 8 hex digits chosen from 16. + * + * @param provider Source of randomness. + * @return hex digit permutation. + */ + private static int createUpperBitsHexPermutation(UnsignedByteProvider provider) { + // Compute a Fisher-Yates shuffle in-place on the 16 hex digits. + // Each digit is chosen uniformly from the remaining digits. + // The value is swapped with the current digit. + + // The following is an optimised sampling algorithm that generates + // a uniform deviate in the range [0,n) from an unsigned byte. + // The expected number of samples is 256/n. + // This has a bias when n does not divide into 256. So samples must + // be rejected at a rate of (256 % n) / 256: + // n 256 % n Rejection rate + // 9 4 0.015625 + // 10 6 0.0234375 + // 11 3 0.01171875 + // 12 4 0.015625 + // 13 9 0.03515625 + // 14 4 0.015625 + // 15 1 0.00390625 + // 16 0 0 + // The probability of no rejections is 1 - (1-p15) * (1-p14) ... = 0.115 + + final byte[] digits = HEX_DIGIT_ARRAY.clone(); + + // The first digit has no bias. Get an index using a mask to avoid modulus. + int bits = copyToOutput(digits, 0, 15, provider.nextUnsignedByte() & 0xf); + + // All remaining digits have a bias so use the rejection algorithm to find + // an appropriate uniform deviate in the range [0,n) + bits = copyToOutput(digits, bits, 14, nextUnsignedByteInRange(provider, MOD_15, 15)); + bits = copyToOutput(digits, bits, 13, nextUnsignedByteInRange(provider, MOD_14, 14)); + bits = copyToOutput(digits, bits, 12, nextUnsignedByteInRange(provider, MOD_13, 13)); + bits = copyToOutput(digits, bits, 11, nextUnsignedByteInRange(provider, MOD_12, 12)); + bits = copyToOutput(digits, bits, 10, nextUnsignedByteInRange(provider, MOD_11, 11)); + bits = copyToOutput(digits, bits, 9, nextUnsignedByteInRange(provider, MOD_10, 10)); + bits = copyToOutput(digits, bits, 8, nextUnsignedByteInRange(provider, MOD_09, 9)); + + return bits; + } + + + /** + * Get the next unsigned byte in the range {@code [0,n)} rejecting any over-represented + * sample value using the pre-computed modulus. + * + * <p>This algorithm is as per Lemire (2019) adapted for 8-bit arithmetic.</p> + * + * @param provider Provider of bytes. + * @param threshold Modulus threshold {@code 256 % n}. + * @param n Upper range (exclusive) + * @return Value. + * @see <a href="https://arxiv.org/abs/1805.10941"> + * Lemire (2019): Fast Random Integer Generation in an Interval</a> + */ + private static int nextUnsignedByteInRange(UnsignedByteProvider provider, int threshold, int n) { + // Rejection method using multiply by a fraction: + // n * [0, 2^8 - 1) + // ------------ + // 2^8 + // The result is mapped back to an integer and will be in the range [0, n) + int m; + do { + // Compute product of n * [0, 2^8 - 1) + m = n * provider.nextUnsignedByte(); + + // Test the sample uniformity. + } while ((m & 0xff) < threshold); + // Final divide by 2^8 + return m >>> 8; + } + + /** + * Copy the lower hex digit to the output bits. Swap the upper hex digit into + * the lower position. This is equivalent to a swap step of a Fisher-Yates shuffle on + * an array but the output of the shuffle are written to the bits. + * + * @param digits Digits. + * @param bits Bits. + * @param upper Upper index. + * @param lower Lower index. + * @return Updated bits. + */ + private static int copyToOutput(byte[] digits, int bits, int upper, int lower) { + // Move the existing bits up and append the next hex digit. + // This is equivalent to swapping lower to upper. + final int newbits = bits << 4 | digits[lower] & 0xf; + // Swap upper to lower. + digits[lower] = digits[upper]; + return newbits; + } +} diff --git a/commons-rng-simple/src/test/java/org/apache/commons/rng/simple/internal/SeedUtilsTest.java b/commons-rng-simple/src/test/java/org/apache/commons/rng/simple/internal/SeedUtilsTest.java new file mode 100644 index 0000000..412cc8c --- /dev/null +++ b/commons-rng-simple/src/test/java/org/apache/commons/rng/simple/internal/SeedUtilsTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rng.simple.internal; + +import org.apache.commons.math3.stat.inference.ChiSquareTest; +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.core.source64.SplitMix64; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +/** + * Tests for the {@link SeedUtils}. + */ +public class SeedUtilsTest { + /** + * Test the int hex permutation has 8 unique hex digits per permutation. + * A uniformity test is performed on to check each hex digits is used evenly at each + * character position. + */ + @Test + public void testCreateIntHexPermutation() { + final UniformRandomProvider rng = new SplitMix64(-567435247L); + final long[][] samples = new long[8][16]; + for (int i = 0; i < 1000; i++) { + int sample = SeedUtils.createIntHexPermutation(rng); + int observed = 0; + for (int j = 0; j < 8; j++) { + final int digit = sample & 0xf; + Assert.assertEquals("Duplicate digit in sample", 0, observed & (1 << digit)); + observed |= 1 << digit; + samples[j][digit]++; + sample >>>= 4; + } + } + + final ChiSquareTest chiSquareTest = new ChiSquareTest(); + final double[] expected = new double[16]; + Arrays.fill(expected, 1.0 / 16); + // Pass if we cannot reject null hypothesis that distributions are the same. + for (int j = 0; j < 8; j++) { + Assert.assertFalse("Not uniform in digit " + j, + chiSquareTest.chiSquareTest(expected, samples[j], 0.001)); + } + } + + /** + * Test the long hex permutation has 8 unique hex digits per permutation in the upper and + * lower 32-bits. + * A uniformity test is performed on to check each hex digits is used evenly at each + * character position. + */ + @Test + public void testCreateLongHexPermutation() { + final UniformRandomProvider rng = new SplitMix64(34645768L); + final long[][] samples = new long[16][16]; + for (int i = 0; i < 1000; i++) { + long sample = SeedUtils.createLongHexPermutation(rng); + // Check lower 32-bits + long observed = 0; + for (int j = 0; j < 8; j++) { + final int digit = (int) (sample & 0xfL); + Assert.assertEquals("Duplicate digit in lower sample", 0, observed & (1 << digit)); + observed |= 1 << digit; + samples[j][digit]++; + sample >>>= 4; + } + // Check upper 32-bits + observed = 0; + for (int j = 8; j < 16; j++) { + final int digit = (int) (sample & 0xfL); + Assert.assertEquals("Duplicate digit in upper sample", 0, observed & (1 << digit)); + observed |= 1 << digit; + samples[j][digit]++; + sample >>>= 4; + } + } + + final ChiSquareTest chiSquareTest = new ChiSquareTest(); + final double[] expected = new double[16]; + Arrays.fill(expected, 1.0 / 16); + // Pass if we cannot reject null hypothesis that distributions are the same. + for (int j = 0; j < 16; j++) { + Assert.assertFalse("Not uniform in digit " + j, + chiSquareTest.chiSquareTest(expected, samples[j], 0.001)); + } + } +}
