This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit 1f3a83b96c9621857bcc6db0a5db9d7ec9c35c6b Author: Alex Herbert <aherb...@apache.org> AuthorDate: Mon May 13 20:15:09 2019 +0100 RNG-101: Updated implementation to use factory methods for construction. The methods can choose a concrete implementation that has optimal storage for the distribution size. Factory methods are provided for any discrete distribution, and the Poisson and Binomial distributions. --- .../distribution/DiscreteSamplersPerformance.java | 12 +- .../MarsagliaTsangWangBinomialSampler.java | 257 ------ .../MarsagliaTsangWangDiscreteSampler.java | 962 +++++++++++++++------ .../MarsagliaTsangWangSmallMeanPoissonSampler.java | 218 ----- .../distribution/DiscreteSamplersList.java | 14 +- .../MarsagliaTsangWangBinomialSamplerTest.java | 242 ------ .../MarsagliaTsangWangDiscreteSamplerTest.java | 419 +++++++-- ...sagliaTsangWangSmallMeanPoissonSamplerTest.java | 119 --- 8 files changed, 1047 insertions(+), 1196 deletions(-) diff --git a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/DiscreteSamplersPerformance.java b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/DiscreteSamplersPerformance.java index 81dc616..1e90a90 100644 --- a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/DiscreteSamplersPerformance.java +++ b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/DiscreteSamplersPerformance.java @@ -23,9 +23,7 @@ import org.apache.commons.rng.sampling.distribution.DiscreteSampler; import org.apache.commons.rng.sampling.distribution.DiscreteUniformSampler; import org.apache.commons.rng.sampling.distribution.GeometricSampler; import org.apache.commons.rng.sampling.distribution.LargeMeanPoissonSampler; -import org.apache.commons.rng.sampling.distribution.MarsagliaTsangWangBinomialSampler; import org.apache.commons.rng.sampling.distribution.MarsagliaTsangWangDiscreteSampler; -import org.apache.commons.rng.sampling.distribution.MarsagliaTsangWangSmallMeanPoissonSampler; import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler; import org.apache.commons.rng.sampling.distribution.SmallMeanPoissonSampler; @@ -70,7 +68,7 @@ public class DiscreteSamplersPerformance { "LargeMeanPoissonSampler", "GeometricSampler", "MarsagliaTsangWangDiscreteSampler", - "MarsagliaTsangWangSmallMeanPoissonSampler", + "MarsagliaTsangWangPoissonSampler", "MarsagliaTsangWangBinomialSampler", }) private String samplerType; @@ -103,11 +101,11 @@ public class DiscreteSamplersPerformance { } else if ("GeometricSampler".equals(samplerType)) { sampler = new GeometricSampler(rng, 0.21); } else if ("MarsagliaTsangWangDiscreteSampler".equals(samplerType)) { - sampler = new MarsagliaTsangWangDiscreteSampler(rng, new double[] {0.1, 0.2, 0.3, 0.4}); - } else if ("MarsagliaTsangWangSmallMeanPoissonSampler".equals(samplerType)) { - sampler = new MarsagliaTsangWangSmallMeanPoissonSampler(rng, 8.9); + sampler = MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(rng, new double[] {0.1, 0.2, 0.3, 0.4}); + } else if ("MarsagliaTsangWangPoissonSampler".equals(samplerType)) { + sampler = MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(rng, 8.9); } else if ("MarsagliaTsangWangBinomialSampler".equals(samplerType)) { - sampler = new MarsagliaTsangWangBinomialSampler(rng, 20, 0.33); + sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, 20, 0.33); } } } diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangBinomialSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangBinomialSampler.java deleted file mode 100644 index 5b13155..0000000 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangBinomialSampler.java +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rng.sampling.distribution; - -import org.apache.commons.rng.UniformRandomProvider; - -/** - * Sampler for the <a href="https://en.wikipedia.org/wiki/Binomial_distribution">Binomial - * distribution</a> using an optimised look-up table. - * - * <ul> - * <li> - * A Binomial process is simulated using pre-tabulated probabilities, as - * described in George Marsaglia, Wai Wan Tsang, Jingbo Wang (2004) Fast Generation of - * Discrete Random Variables. Journal of Statistical Software. Vol. 11, Issue. 3, pp. 1-11. - * </li> - * </ul> - * - * <p>The sampler will fail on construction if the distribution cannot be computed. This - * occurs when {@code trials} is large and probability of success is close to {@code 0.5}. - * The exact failure condition is:</p> - * - * <pre> - * {@code Math.exp(trials * Math.log(Math.min(p, 1 - p))) < Double.MIN_VALUE} - * </pre> - * - * <p>In this case the distribution can be approximated using a limiting distributions - * of either a Poisson or a Normal distribution as appropriate.</p> - * - * <p>Note: The algorithm ignores any observation where for a sample size of - * 2<sup>31</sup> the expected number of occurrences is {@code < 0.5}.</p> - * - * <p>Sampling uses 1 call to {@link UniformRandomProvider#nextInt()}. Storage - * requirements depend on the probabilities and are capped at 2<sup>17</sup> bytes, or 131 - * kB.</p> - * - * @see <a href="http://dx.doi.org/10.18637/jss.v011.i03">Margsglia, et al (2004) JSS Vol. - * 11, Issue 3</a> - * @since 1.3 - */ -public class MarsagliaTsangWangBinomialSampler implements DiscreteSampler { - /** - * The value 2<sup>30</sup> as an {@code int}.</p> - */ - private static final int INT_30 = 1 << 30; - /** - * The value 2<sup>16</sup> as an {@code int}.</p> - */ - private static final int INT_16 = 1 << 16; - /** - * The value 2<sup>31</sup> as an {@code double}.</p> - */ - private static final double DOUBLE_31 = 1L << 31; - - /** The delegate. */ - private final DiscreteSampler delegate; - - /** - * Return a fixed result for the Binomial distribution. - */ - private static class FixedResultDiscreteSampler implements DiscreteSampler { - /** The result. */ - private final int result; - - /** - * @param result Result. - */ - FixedResultDiscreteSampler(int result) { - this.result = result; - } - - @Override - public int sample() { - return result; - } - - @Override - public String toString() { - return "Binomial deviate"; - } - } - - /** - * Return an inversion result for the Binomial distribution. This assumes the - * following: - * - * <pre> - * Binomial(n, p) = 1 - Binomial(n, 1 - p) - * </pre> - */ - private static class InversionBinomialDiscreteSampler implements DiscreteSampler { - /** The number of trials. */ - private final int trials; - /** The Binomial distribution sampler. */ - private final DiscreteSampler sampler; - - /** - * @param trials Number of trials. - * @param sampler Binomial distribution sampler. - */ - InversionBinomialDiscreteSampler(int trials, DiscreteSampler sampler) { - this.trials = trials; - this.sampler = sampler; - } - - @Override - public int sample() { - return trials - sampler.sample(); - } - - @Override - public String toString() { - return sampler.toString(); - } - } - - /** - * Create a new instance. - * - * @param rng Generator of uniformly distributed random numbers. - * @param trials Number of trials. - * @param p Probability of success. - * @throws IllegalArgumentException if {@code trials < 0} or {@code trials >= 2^16}, - * {@code p} is not in the range {@code [0-1]}, or the probability distribution cannot - * be computed. - */ - public MarsagliaTsangWangBinomialSampler(UniformRandomProvider rng, int trials, double p) { - if (trials < 0) { - throw new IllegalArgumentException("Trials is not positive: " + trials); - } - if (p < 0 || p > 1) { - throw new IllegalArgumentException("Probability is not in range [0,1]: " + p); - } - - // Handle edge cases - if (p == 0) { - delegate = new FixedResultDiscreteSampler(0); - return; - } - if (p == 1) { - delegate = new FixedResultDiscreteSampler(trials); - return; - } - - // A simple check using the supported index size. - if (trials >= INT_16) { - throw new IllegalArgumentException("Unsupported number of trials: " + trials); - } - - // The maximum supported value for Math.exp is approximately -744. - // This occurs when trials is large and p is close to 1. - // Handle this by using an inversion: generate j=Binomial(n,1-p), return n-j - final boolean inversion = p > 0.5; - if (inversion) { - p = 1 - p; - } - - // Check if the distribution can be computed - final double p0 = Math.exp(trials * Math.log(1 - p)); - if (p0 < Double.MIN_VALUE) { - throw new IllegalArgumentException("Unable to compute distribution"); - } - - // First find size of probability array - double t = p0; - final double h = p / (1 - p); - // Find first probability - int begin = 0; - if (t * DOUBLE_31 < 1) { - // Somewhere after p(0) - // Note: - // If this loop is entered p(0) is < 2^-31. - // This has been tested at the extreme for p(0)=Double.MIN_VALUE and either - // p=0.5 or trials=2^16-1 and does not fail to find the beginning. - for (int i = 1; i <= trials; i++) { - t *= (trials + 1 - i) * h / i; - if (t * DOUBLE_31 >= 1) { - begin = i; - break; - } - } - } - // Find last probability - int end = trials; - for (int i = begin + 1; i <= trials; i++) { - t *= (trials + 1 - i) * h / i; - if (t * DOUBLE_31 < 1) { - end = i - 1; - break; - } - } - final int size = end - begin + 1; - final int offset = begin; - - // Then assign probability values as 30-bit integers - final int[] prob = new int[size]; - t = p0; - for (int i = 1; i <= begin; i++) { - t *= (trials + 1 - i) * h / i; - } - int sum = toUnsignedInt30(t); - prob[0] = sum; - for (int i = begin + 1; i <= end; i++) { - t *= (trials + 1 - i) * h / i; - prob[i - begin] = toUnsignedInt30(t); - sum += prob[i - begin]; - } - - // If the sum is < 2^30 add the remaining sum to the mode (floor((n+1)p))). - final int mode = (int) ((trials + 1) * p) - offset; - prob[mode] += Math.max(0, INT_30 - sum); - - final MarsagliaTsangWangDiscreteSampler sampler = new MarsagliaTsangWangDiscreteSampler(rng, prob, offset); - - if (inversion) { - delegate = new InversionBinomialDiscreteSampler(trials, sampler); - } else { - delegate = sampler; - } - } - - /** - * Convert the probability to an unsigned integer in the range [0,2^30]. - * - * @param p the probability - * @return the integer - */ - private static int toUnsignedInt30(double p) { - return (int) (p * INT_30 + 0.5); - } - - /** {@inheritDoc} */ - @Override - public int sample() { - return delegate.sample(); - } - - /** {@inheritDoc} */ - @Override - public String toString() { - return "Binomial " + delegate.toString(); - } -} diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSampler.java index a1fc5a7..136df8c 100644 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSampler.java +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSampler.java @@ -37,142 +37,154 @@ import org.apache.commons.rng.UniformRandomProvider; * {@code n <= } 2<sup>8</sup>, 17.0MB for {@code n <= } 2<sup>16</sup>, and 4.3GB for * {@code n <=} 2<sup>30</sup>. Realistic requirements will be in the kB range.</p> * + * <p>The sampler supports the following distributions:</p> + * + * <ul> + * <li>Any discrete probability distribution (probabilities must be provided) + * <li>Poisson distribution up to {@code mean = 1024} + * <li>Binomial distribution up to {@code trials = 65535} + * </ul> + * * @since 1.3 * @see <a href="http://dx.doi.org/10.18637/jss.v011.i03">Margsglia, et al (2004) JSS Vol. * 11, Issue 3</a> */ -public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { - /** The exclusive upper bound for an unsigned 8-bit integer. */ - private static final int UNSIGNED_INT_8 = 1 << 8; - /** The exclusive upper bound for an unsigned 16-bit integer. */ - private static final int UNSIGNED_INT_16 = 1 << 16; - - /** Limit for look-up table 1. */ - private final int t1; - /** Limit for look-up table 2. */ - private final int t2; - /** Limit for look-up table 3. */ - private final int t3; - /** Limit for look-up table 4. */ - private final int t4; - - /** Index look-up table. */ - private final IndexTable indexTable; - - /** Underlying source of randomness. */ - private final UniformRandomProvider rng; +public abstract class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { + /** The value 2<sup>8</sup> as an {@code int}. */ + private static final int INT_8 = 1 << 8; + /** The value 2<sup>16</sup> as an {@code int}. */ + private static final int INT_16 = 1 << 16; + /** The value 2<sup>30</sup> as an {@code int}. */ + private static final int INT_30 = 1 << 30; + /** The value 2<sup>31</sup> as a {@code double}. */ + private static final double DOUBLE_31 = 1L << 31; + + /** The general name of any discrete probability distribution. */ + private static final String DISCRETE_NAME = "discrete"; + /** The name of the Poisson distribution. */ + private static final String POISSON_NAME = "Poisson"; + /** The name of the Binomial distribution. */ + private static final String BINOMIAL_NAME = "Binomial"; /** - * An index table contains the sample values. This is efficiently accessed for any index in the - * range {@code [0,2^30)} by using an algorithm based on the decomposition of the index into - * 5 base-64 digits. + * Upper bound on the mean for the Poisson distribution. * - * <p>This interface defines the methods for the filling and accessing values from 5 tables. - * It allows a concrete implementation to allocate appropriate tables to optimise memory - * requirements.</p> + * <p>The original source code provided in Marsaglia, et al (2004) has no explicit + * limit but the code fails at mean >= 1941 as the transform to compute p(x=mode) + * produces infinity. Use a conservative limit of 1024.</p> */ - private interface IndexTable { - /** - * @param from Lower bound index (inclusive). - * @param to Upper bound index (exclusive). - * @param value Value. - */ - void fillTable1(int from, int to, int value); - /** - * @param from Lower bound index (inclusive). - * @param to Upper bound index (exclusive). - * @param value Value. - */ - void fillTable2(int from, int to, int value); - /** - * @param from Lower bound index (inclusive). - * @param to Upper bound index (exclusive). - * @param value Value. - */ - void fillTable3(int from, int to, int value); - /** - * @param from Lower bound index (inclusive). - * @param to Upper bound index (exclusive). - * @param value Value. - */ - void fillTable4(int from, int to, int value); - /** - * @param from Lower bound index (inclusive). - * @param to Upper bound index (exclusive). - * @param value Value. - */ - void fillTable5(int from, int to, int value); + private static final double MAX_POISSON_MEAN = 1024; - /** - * @param index Index. - * @return Value. - */ - int getTable1(int index); - /** - * @param index Index. - * @return Value. - */ - int getTable2(int index); - /** - * @param index Index. - * @return Value. - */ - int getTable3(int index); - /** - * @param index Index. - * @return Value. - */ - int getTable4(int index); - /** - * @param index Index. - * @return Value. - */ - int getTable5(int index); - } + /** Underlying source of randomness. */ + protected final UniformRandomProvider rng; + + /** The name of the distribution. */ + private final String distributionName; + + // ========================================================================= + // Implementation note: + // + // This sampler uses prepared look-up tables that are searched using a single + // random int variate. The look-up tables contain the sample value. The tables + // are constructed using probabilities that sum to 2^30. The original paper + // by Marsaglia, et al (2004) describes the use of 5, 3, or 2 look-up tables + // indexed using digits of base 2^6, 2^10 or 2^15. Currently only base 64 (2^6) + // is supported using 5 look-up tables. + // + // The implementations use 8, 16 or 32 bit storage tables to support different + // distribution sizes with optimal storage. Separate class implementations of + // the same algorithm allow array storage to be accessed directly from 1D tables. + // This provides a performance gain over using: abstracted storage accessed via + // an interface; or a single 2D table. + // + // To allow the optimal implementation to be chosen the sampler is created + // using factory methods. The sampler supports any probability distribution + // when provided via an array of probabilities and the Poisson and Binomial + // distributions for a restricted set of parameters. The restrictions are + // imposed by the requirement to compute the entire probability distribution + // from the controlling parameter(s) using a recursive method. + // ========================================================================= /** - * Index table for an 8-bit index. + * An implementation for the sample algorithm based on the decomposition of the + * index in the range {@code [0,2^30)} into 5 base-64 digits with 8-bit backing storage. */ - private static class IndexTable8 implements IndexTable { + private static class MarsagliaTsangWangBase64Int8DiscreteSampler + extends MarsagliaTsangWangDiscreteSampler { /** The mask to convert a {@code byte} to an unsigned 8-bit integer. */ private static final int MASK = 0xff; + /** Limit for look-up table 1. */ + private final int t1; + /** Limit for look-up table 2. */ + private final int t2; + /** Limit for look-up table 3. */ + private final int t3; + /** Limit for look-up table 4. */ + private final int t4; + /** Look-up table table1. */ - private final byte[] table1; + private byte[] table1; /** Look-up table table2. */ - private final byte[] table2; + private byte[] table2; /** Look-up table table3. */ - private final byte[] table3; + private byte[] table3; /** Look-up table table4. */ - private final byte[] table4; + private byte[] table4; /** Look-up table table5. */ - private final byte[] table5; + private byte[] table5; /** - * @param n1 Size of table 1. - * @param n2 Size of table 2. - * @param n3 Size of table 3. - * @param n4 Size of table 4. - * @param n5 Size of table 5. + * @param rng Generator of uniformly distributed random numbers. + * @param distributionName Distribution name. + * @param prob The probabilities. + * @param offset The offset (must be positive). */ - IndexTable8(int n1, int n2, int n3, int n4, int n5) { + MarsagliaTsangWangBase64Int8DiscreteSampler(UniformRandomProvider rng, + String distributionName, + int[] prob, + int offset) { + super(rng, distributionName); + + // Get table sizes for each base-64 digit + int n1 = 0; + int n2 = 0; + int n3 = 0; + int n4 = 0; + int n5 = 0; + for (final int m : prob) { + n1 += getBase64Digit(m, 1); + n2 += getBase64Digit(m, 2); + n3 += getBase64Digit(m, 3); + n4 += getBase64Digit(m, 4); + n5 += getBase64Digit(m, 5); + } + table1 = new byte[n1]; table2 = new byte[n2]; table3 = new byte[n3]; table4 = new byte[n4]; table5 = new byte[n5]; - } - @Override - public void fillTable1(int from, int to, int value) { fill(table1, from, to, value); } - @Override - public void fillTable2(int from, int to, int value) { fill(table2, from, to, value); } - @Override - public void fillTable3(int from, int to, int value) { fill(table3, from, to, value); } - @Override - public void fillTable4(int from, int to, int value) { fill(table4, from, to, value); } - @Override - public void fillTable5(int from, int to, int value) { fill(table5, from, to, value); } + // Compute offsets + t1 = n1 << 24; + t2 = t1 + (n2 << 18); + t3 = t2 + (n3 << 12); + t4 = t3 + (n4 << 6); + n1 = n2 = n3 = n4 = n5 = 0; + + // Fill tables + for (int i = 0; i < prob.length; i++) { + final int m = prob[i]; + // Primitive type conversion will extract lower 8 bits + final byte k = (byte) (i + offset); + fill(table1, n1, n1 += getBase64Digit(m, 1), k); + fill(table2, n2, n2 += getBase64Digit(m, 2), k); + fill(table3, n3, n3 += getBase64Digit(m, 3), k); + fill(table4, n4, n4 += getBase64Digit(m, 4), k); + fill(table5, n5, n5 += getBase64Digit(m, 5), k); + } + } /** * Fill the table with the value. @@ -182,68 +194,115 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { * @param to Upper bound index (exclusive) * @param value Value. */ - private static void fill(byte[] table, int from, int to, int value) { + private static void fill(byte[] table, int from, int to, byte value) { while (from < to) { - // Primitive type conversion will extract lower 8 bits - table[from++] = (byte) value; + table[from++] = value; } } + /** {@inheritDoc} */ @Override - public int getTable1(int index) { return table1[index] & MASK; } - @Override - public int getTable2(int index) { return table2[index] & MASK; } - @Override - public int getTable3(int index) { return table3[index] & MASK; } - @Override - public int getTable4(int index) { return table4[index] & MASK; } - @Override - public int getTable5(int index) { return table5[index] & MASK; } + public int sample() { + final int j = rng.nextInt() >>> 2; + if (j < t1) { + return table1[j >>> 24] & MASK; + } + if (j < t2) { + return table2[(j - t1) >>> 18] & MASK; + } + if (j < t3) { + return table3[(j - t2) >>> 12] & MASK; + } + if (j < t4) { + return table4[(j - t3) >>> 6] & MASK; + } + // Note the tables are filled on the assumption that the sum of the probabilities. + // is >=2^30. If this is not true then the final table table5 will be smaller by the + // difference. So the tables *must* be constructed correctly. + return table5[j - t4] & MASK; + } } /** - * Index table for a 16-bit index. + * An implementation for the sample algorithm based on the decomposition of the + * index in the range {@code [0,2^30)} into 5 base-64 digits with 16-bit backing storage. */ - private static class IndexTable16 implements IndexTable { - /** The mask to convert a {@code short} to an unsigned 16-bit integer. */ + private static class MarsagliaTsangWangBase64Int16DiscreteSampler + extends MarsagliaTsangWangDiscreteSampler { + /** The mask to convert a {@code byte} to an unsigned 16-bit integer. */ private static final int MASK = 0xffff; + /** Limit for look-up table 1. */ + private final int t1; + /** Limit for look-up table 2. */ + private final int t2; + /** Limit for look-up table 3. */ + private final int t3; + /** Limit for look-up table 4. */ + private final int t4; + /** Look-up table table1. */ - private final short[] table1; + private short[] table1; /** Look-up table table2. */ - private final short[] table2; + private short[] table2; /** Look-up table table3. */ - private final short[] table3; + private short[] table3; /** Look-up table table4. */ - private final short[] table4; + private short[] table4; /** Look-up table table5. */ - private final short[] table5; + private short[] table5; /** - * @param n1 Size of table 1. - * @param n2 Size of table 2. - * @param n3 Size of table 3. - * @param n4 Size of table 4. - * @param n5 Size of table 5. + * @param rng Generator of uniformly distributed random numbers. + * @param distributionName Distribution name. + * @param prob The probabilities. + * @param offset The offset (must be positive). */ - IndexTable16(int n1, int n2, int n3, int n4, int n5) { + MarsagliaTsangWangBase64Int16DiscreteSampler(UniformRandomProvider rng, + String distributionName, + int[] prob, + int offset) { + super(rng, distributionName); + + // Get table sizes for each base-64 digit + int n1 = 0; + int n2 = 0; + int n3 = 0; + int n4 = 0; + int n5 = 0; + for (final int m : prob) { + n1 += getBase64Digit(m, 1); + n2 += getBase64Digit(m, 2); + n3 += getBase64Digit(m, 3); + n4 += getBase64Digit(m, 4); + n5 += getBase64Digit(m, 5); + } + table1 = new short[n1]; table2 = new short[n2]; table3 = new short[n3]; table4 = new short[n4]; table5 = new short[n5]; - } - @Override - public void fillTable1(int from, int to, int value) { fill(table1, from, to, value); } - @Override - public void fillTable2(int from, int to, int value) { fill(table2, from, to, value); } - @Override - public void fillTable3(int from, int to, int value) { fill(table3, from, to, value); } - @Override - public void fillTable4(int from, int to, int value) { fill(table4, from, to, value); } - @Override - public void fillTable5(int from, int to, int value) { fill(table5, from, to, value); } + // Compute offsets + t1 = n1 << 24; + t2 = t1 + (n2 << 18); + t3 = t2 + (n3 << 12); + t4 = t3 + (n4 << 6); + n1 = n2 = n3 = n4 = n5 = 0; + + // Fill tables + for (int i = 0; i < prob.length; i++) { + final int m = prob[i]; + // Primitive type conversion will extract lower 16 bits + final short k = (short) (i + offset); + fill(table1, n1, n1 += getBase64Digit(m, 1), k); + fill(table2, n2, n2 += getBase64Digit(m, 2), k); + fill(table3, n3, n3 += getBase64Digit(m, 3), k); + fill(table4, n4, n4 += getBase64Digit(m, 4), k); + fill(table5, n5, n5 += getBase64Digit(m, 5), k); + } + } /** * Fill the table with the value. @@ -253,65 +312,112 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { * @param to Upper bound index (exclusive) * @param value Value. */ - private static void fill(short[] table, int from, int to, int value) { + private static void fill(short[] table, int from, int to, short value) { while (from < to) { - // Primitive type conversion will extract lower 16 bits - table[from++] = (short) value; + table[from++] = value; } } + /** {@inheritDoc} */ @Override - public int getTable1(int index) { return table1[index] & MASK; } - @Override - public int getTable2(int index) { return table2[index] & MASK; } - @Override - public int getTable3(int index) { return table3[index] & MASK; } - @Override - public int getTable4(int index) { return table4[index] & MASK; } - @Override - public int getTable5(int index) { return table5[index] & MASK; } + public int sample() { + final int j = rng.nextInt() >>> 2; + if (j < t1) { + return table1[j >>> 24] & MASK; + } + if (j < t2) { + return table2[(j - t1) >>> 18] & MASK; + } + if (j < t3) { + return table3[(j - t2) >>> 12] & MASK; + } + if (j < t4) { + return table4[(j - t3) >>> 6] & MASK; + } + // Note the tables are filled on the assumption that the sum of the probabilities. + // is >=2^30. If this is not true then the final table table5 will be smaller by the + // difference. So the tables *must* be constructed correctly. + return table5[j - t4] & MASK; + } } /** - * Index table for a 32-bit index. + * An implementation for the sample algorithm based on the decomposition of the + * index in the range {@code [0,2^30)} into 5 base-64 digits with 32-bit backing storage. */ - private static class IndexTable32 implements IndexTable { + private static class MarsagliaTsangWangBase64Int32DiscreteSampler + extends MarsagliaTsangWangDiscreteSampler { + + /** Limit for look-up table 1. */ + private final int t1; + /** Limit for look-up table 2. */ + private final int t2; + /** Limit for look-up table 3. */ + private final int t3; + /** Limit for look-up table 4. */ + private final int t4; + /** Look-up table table1. */ - private final int[] table1; + private int[] table1; /** Look-up table table2. */ - private final int[] table2; + private int[] table2; /** Look-up table table3. */ - private final int[] table3; + private int[] table3; /** Look-up table table4. */ - private final int[] table4; + private int[] table4; /** Look-up table table5. */ - private final int[] table5; + private int[] table5; /** - * @param n1 Size of table 1. - * @param n2 Size of table 2. - * @param n3 Size of table 3. - * @param n4 Size of table 4. - * @param n5 Size of table 5. + * @param rng Generator of uniformly distributed random numbers. + * @param distributionName Distribution name. + * @param prob The probabilities. + * @param offset The offset (must be positive). */ - IndexTable32(int n1, int n2, int n3, int n4, int n5) { + MarsagliaTsangWangBase64Int32DiscreteSampler(UniformRandomProvider rng, + String distributionName, + int[] prob, + int offset) { + super(rng, distributionName); + + // Get table sizes for each base-64 digit + int n1 = 0; + int n2 = 0; + int n3 = 0; + int n4 = 0; + int n5 = 0; + for (final int m : prob) { + n1 += getBase64Digit(m, 1); + n2 += getBase64Digit(m, 2); + n3 += getBase64Digit(m, 3); + n4 += getBase64Digit(m, 4); + n5 += getBase64Digit(m, 5); + } + table1 = new int[n1]; table2 = new int[n2]; table3 = new int[n3]; table4 = new int[n4]; table5 = new int[n5]; - } - @Override - public void fillTable1(int from, int to, int value) { fill(table1, from, to, value); } - @Override - public void fillTable2(int from, int to, int value) { fill(table2, from, to, value); } - @Override - public void fillTable3(int from, int to, int value) { fill(table3, from, to, value); } - @Override - public void fillTable4(int from, int to, int value) { fill(table4, from, to, value); } - @Override - public void fillTable5(int from, int to, int value) { fill(table5, from, to, value); } + // Compute offsets + t1 = n1 << 24; + t2 = t1 + (n2 << 18); + t3 = t2 + (n3 << 12); + t4 = t3 + (n4 << 6); + n1 = n2 = n3 = n4 = n5 = 0; + + // Fill tables + for (int i = 0; i < prob.length; i++) { + final int m = prob[i]; + final int k = i + offset; + fill(table1, n1, n1 += getBase64Digit(m, 1), k); + fill(table2, n2, n2 += getBase64Digit(m, 2), k); + fill(table3, n3, n3 += getBase64Digit(m, 3), k); + fill(table4, n4, n4 += getBase64Digit(m, 4), k); + fill(table5, n5, n5 += getBase64Digit(m, 5), k); + } + } /** * Fill the table with the value. @@ -327,16 +433,121 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { } } + /** {@inheritDoc} */ @Override - public int getTable1(int index) { return table1[index]; } + public int sample() { + final int j = rng.nextInt() >>> 2; + if (j < t1) { + return table1[j >>> 24]; + } + if (j < t2) { + return table2[(j - t1) >>> 18]; + } + if (j < t3) { + return table3[(j - t2) >>> 12]; + } + if (j < t4) { + return table4[(j - t3) >>> 6]; + } + // Note the tables are filled on the assumption that the sum of the probabilities. + // is >=2^30. If this is not true then the final table table5 will be smaller by the + // difference. So the tables *must* be constructed correctly. + return table5[j - t4]; + } + } + + /** + * Return a fixed result for the Binomial distribution. This is a special class to handle + * an edge case of probability of success equal to 0 or 1. + */ + private static class MarsagliaTsangWangFixedResultBinomialSampler + extends MarsagliaTsangWangDiscreteSampler { + /** The result. */ + private final int result; + + /** + * @param result Result. + */ + MarsagliaTsangWangFixedResultBinomialSampler(int result) { + super(null, BINOMIAL_NAME); + this.result = result; + } + @Override - public int getTable2(int index) { return table2[index]; } + public int sample() { + return result; + } + + /** {@inheritDoc} */ @Override - public int getTable3(int index) { return table3[index]; } + public String toString() { + return BINOMIAL_NAME + " deviate"; + } + } + + /** + * Return an inversion result for the Binomial distribution. This assumes the + * following: + * + * <pre> + * Binomial(n, p) = 1 - Binomial(n, 1 - p) + * </pre> + */ + private static class MarsagliaTsangWangInversionBinomialSampler + extends MarsagliaTsangWangDiscreteSampler { + /** The number of trials. */ + private final int trials; + /** The Binomial distribution sampler. */ + private final MarsagliaTsangWangDiscreteSampler sampler; + + /** + * @param trials Number of trials. + * @param sampler Binomial distribution sampler. + */ + MarsagliaTsangWangInversionBinomialSampler(int trials, + MarsagliaTsangWangDiscreteSampler sampler) { + super(null, BINOMIAL_NAME); + this.trials = trials; + this.sampler = sampler; + } + @Override - public int getTable4(int index) { return table4[index]; } + public int sample() { + return trials - sampler.sample(); + } + + /** {@inheritDoc} */ @Override - public int getTable5(int index) { return table5[index]; } + public String toString() { + return sampler.toString(); + } + } + + /** + * @param rng Generator of uniformly distributed random numbers. + * @param distributionName Distribution name. + */ + protected MarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng, + String distributionName) { + this.rng = rng; + this.distributionName = distributionName; + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return "Marsaglia Tsang Wang " + distributionName + " deviate [" + rng.toString() + "]"; + } + + /** + * Gets the k<sup>th</sup> base 64 digit of {@code m}. + * + * @param m the value m. + * @param k the digit. + * @return the base 64 digit + */ + private static int getBase64Digit(int m, int k) { + return (m >>> (30 - 6 * k)) & 63; } /** @@ -346,89 +557,57 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { * <p>The sum of the probabilities must be >= 2<sup>30</sup>. Only the * values for cumulative probability up to 2<sup>30</sup> will be sampled.</p> * - * <p>Note: This is package-private for use by discrete distribution samplers that can - * compute their probability distribution.</p> - * * @param rng Generator of uniformly distributed random numbers. + * @param distributionName Distribution name. * @param prob The probabilities. * @param offset The offset (must be positive). - * @throws IllegalArgumentException if the offset is negative or the maximum sample index - * exceeds the maximum positive {@code int} value (2<sup>31</sup> - 1). + * @return Sampler. */ - MarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng, - int[] prob, - int offset) { - if (offset < 0) { - throw new IllegalArgumentException("Unsupported offset: " + offset); - } - if ((long) prob.length + offset > Integer.MAX_VALUE) { - throw new IllegalArgumentException("Unsupported sample index: " + (prob.length + offset)); - } - - this.rng = rng; + private static MarsagliaTsangWangDiscreteSampler createSampler(UniformRandomProvider rng, + String distributionName, + int[] prob, + int offset) { + // Note: No argument checks for private method. - // Get table sizes for each base-64 digit - int n1 = 0; - int n2 = 0; - int n3 = 0; - int n4 = 0; - int n5 = 0; - for (final int m : prob) { - n1 += getBase64Digit(m, 1); - n2 += getBase64Digit(m, 2); - n3 += getBase64Digit(m, 3); - n4 += getBase64Digit(m, 4); - n5 += getBase64Digit(m, 5); - } - - // Allocate tables based on the maximum index + // Choose implementation based on the maximum index final int maxIndex = prob.length + offset - 1; - if (maxIndex < UNSIGNED_INT_8) { - indexTable = new IndexTable8(n1, n2, n3, n4, n5); - } else if (maxIndex < UNSIGNED_INT_16) { - indexTable = new IndexTable16(n1, n2, n3, n4, n5); - } else { - indexTable = new IndexTable32(n1, n2, n3, n4, n5); + if (maxIndex < INT_8) { + return new MarsagliaTsangWangBase64Int8DiscreteSampler(rng, distributionName, prob, offset); } - - // Compute offsets - t1 = n1 << 24; - t2 = t1 + (n2 << 18); - t3 = t2 + (n3 << 12); - t4 = t3 + (n4 << 6); - n1 = n2 = n3 = n4 = n5 = 0; - - // Fill tables - for (int i = 0; i < prob.length; i++) { - final int m = prob[i]; - final int k = i + offset; - indexTable.fillTable1(n1, n1 += getBase64Digit(m, 1), k); - indexTable.fillTable2(n2, n2 += getBase64Digit(m, 2), k); - indexTable.fillTable3(n3, n3 += getBase64Digit(m, 3), k); - indexTable.fillTable4(n4, n4 += getBase64Digit(m, 4), k); - indexTable.fillTable5(n5, n5 += getBase64Digit(m, 5), k); + if (maxIndex < INT_16) { + return new MarsagliaTsangWangBase64Int16DiscreteSampler(rng, distributionName, prob, offset); } + return new MarsagliaTsangWangBase64Int32DiscreteSampler(rng, distributionName, prob, offset); } + // ========================================================================= + // The following factory methods are the public API to construct a sampler for: + // - Any discrete probability distribution (from provided double[] probabilities) + // - Poisson distribution for mean <= 1024 + // - Binomial distribution for trials <= 65535 + // ========================================================================= + /** - * Creates a sampler. + * Creates a sampler for a given probability distribution. * - * <p>The probabilities will be normalised using their sum. The only requirement is the sum - * is positive.</p> + * <p>The probabilities will be normalised using their sum. The only requirement + * is the sum is positive.</p> * - * <p>The sum of the probabilities is normalised to 2<sup>30</sup>. Any probability less - * than 2<sup>-30</sup> will not be observed in samples. An adjustment is made to the maximum - * probability to compensate for round-off during conversion.</p> + * <p>The sum of the probabilities is normalised to 2<sup>30</sup>. Note that + * probabilities are adjusted to the nearest 1<sup>-30</sup> due to round-off during + * the normalisation conversion. Consequently any probability less than 2<sup>-31</sup> + * will not be observed in samples.</p> * * @param rng Generator of uniformly distributed random numbers. * @param probabilities The list of probabilities. + * @return Sampler. * @throws IllegalArgumentException if {@code probabilities} is null or empty, a * probability is negative, infinite or {@code NaN}, or the sum of all * probabilities is not strictly positive. */ - public MarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng, - double[] probabilities) { - this(rng, normaliseProbabilities(probabilities), 0); + public static MarsagliaTsangWangDiscreteSampler createDiscreteDistribution(UniformRandomProvider rng, + double[] probabilities) { + return createSampler(rng, DISCRETE_NAME, normaliseProbabilities(probabilities), 0); } /** @@ -444,7 +623,7 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { final double sumProb = validateProbabilities(probabilities); // Compute the normalisation: 2^30 / sum - final double normalisation = (1 << 30) / sumProb; + final double normalisation = INT_30 / sumProb; final int[] prob = new int[probabilities.length]; int sum = 0; int max = 0; @@ -463,7 +642,7 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { // The sum must be >= 2^30. // Here just compensate the difference onto the highest probability. - prob[mode] += (1 << 30) - sum; + prob[mode] += INT_30 - sum; return prob; } @@ -500,41 +679,272 @@ public class MarsagliaTsangWangDiscreteSampler implements DiscreteSampler { } /** - * Gets the k<sup>th</sup> base 64 digit of {@code m}. + * Creates a sampler for the Poisson distribution. * - * @param m the value m. - * @param k the digit. - * @return the base 64 digit + * <p>Any probability less than 2<sup>-31</sup> will not be observed in samples.</p> + * + * <p>Storage requirements depend on the tabulated probability values. Example storage + * requirements are listed below.</p> + * + * <pre> + * mean table size kB + * 0.25 882 0.88 + * 0.5 1135 1.14 + * 1 1200 1.20 + * 2 1451 1.45 + * 4 1955 1.96 + * 8 2961 2.96 + * 16 4410 4.41 + * 32 6115 6.11 + * 64 8499 8.50 + * 128 11528 11.53 + * 256 15935 31.87 + * 512 20912 41.82 + * 1024 30614 61.23 + * </pre> + * + * <p>Note: Storage changes to 2 bytes per index between {@code mean=128} and {@code mean=256}.</p> + * + * @param rng Generator of uniformly distributed random numbers. + * @param mean Mean. + * @return Sampler. + * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 1024}. */ - private static int getBase64Digit(int m, int k) { - return (m >>> (30 - 6 * k)) & 63; + public static MarsagliaTsangWangDiscreteSampler createPoissonDistribution(UniformRandomProvider rng, + double mean) { + if (mean <= 0) { + throw new IllegalArgumentException("mean is not strictly positive: " + mean); + } + // The algorithm is not valid if Math.floor(mean) is not an integer. + if (mean > MAX_POISSON_MEAN) { + throw new IllegalArgumentException("mean " + mean + " > " + MAX_POISSON_MEAN); + } + + // Probabilities are 30-bit integers, assumed denominator 2^30 + int[] prob; + // This is the minimum sample value: prob[x - offset] = p(x) + int offset; + + // Generate P's from 0 if mean < 21.4 + if (mean < 21.4) { + final double p0 = Math.exp(-mean); + + // Recursive update of Poisson probability until the value is too small + // p(x + 1) = p(x) * mean / (x + 1) + double p = p0; + int i; + for (i = 1; p * DOUBLE_31 >= 1; i++) { + p *= mean / i; + } + + // Fill P as (30-bit integers) + offset = 0; + final int size = i - 1; + prob = new int[size]; + + p = p0; + prob[0] = toUnsignedInt30(p); + // The sum must exceed 2^30. In edges cases this is false due to round-off. + int sum = prob[0]; + for (i = 1; i < prob.length; i++) { + p *= mean / i; + prob[i] = toUnsignedInt30(p); + sum += prob[i]; + } + + // If the sum is < 2^30 add the remaining sum to the mode (floor(mean)). + prob[(int) mean] += Math.max(0, INT_30 - sum); + } else { + // If mean >= 21.4, generate from largest p-value up, then largest down. + // The largest p-value will be at the mode (floor(mean)). + + // Find p(x=mode) + final int mode = (int) mean; + // This transform is stable until mean >= 1941 where p will result in Infinity + // before the divisor i is large enough to start reducing the product (i.e. i > c). + final double c = mean * Math.exp(-mean / mode); + double p = 1.0; + int i; + for (i = 1; i <= mode; i++) { + p *= c / i; + } + final double pMode = p; + // Note this will exit when i overflows to negative so no check on the range + for (i = mode + 1; p * DOUBLE_31 >= 1; i++) { + p *= mean / i; + } + final int last = i - 2; + p = pMode; + int j = -1; + for (i = mode - 1; i >= 0; i--) { + p *= (i + 1) / mean; + if (p * DOUBLE_31 < 1) { + j = i; + break; + } + } + + // Fill P as (30-bit integers) + offset = j + 1; + final int size = last - offset + 1; + prob = new int[size]; + + p = pMode; + prob[mode - offset] = toUnsignedInt30(p); + // The sum must exceed 2^30. In edges cases this is false due to round-off. + int sum = prob[mode - offset]; + for (i = mode + 1; i <= last; i++) { + p *= mean / i; + prob[i - offset] = toUnsignedInt30(p); + sum += prob[i - offset]; + } + p = pMode; + for (i = mode - 1; i >= offset; i--) { + p *= (i + 1) / mean; + prob[i - offset] = toUnsignedInt30(p); + sum += prob[i - offset]; + } + + // If the sum is < 2^30 add the remaining sum to the mode. + // If above 2^30 then the effect is truncation of the long tail of the distribution. + prob[mode - offset] += Math.max(0, INT_30 - sum); + } + + return createSampler(rng, POISSON_NAME, prob, offset); } - /** {@inheritDoc} */ - @Override - public int sample() { - final int j = rng.nextInt() >>> 2; - if (j < t1) { - return indexTable.getTable1(j >>> 24); + /** + * Creates a sampler for the Binomial distribution. + * + * <p>Any probability less than 2<sup>-31</sup> will not be observed in samples.</p> + * + * <p>Storage requirements depend on the tabulated probability values. Example storage + * requirements are listed below (in kB).</p> + * + * <pre> + * p + * trials 0.5 0.1 0.01 0.001 + * 4 0.06 0.63 0.44 0.44 + * 16 0.69 1.14 0.76 0.44 + * 64 4.73 2.40 1.14 0.51 + * 256 8.63 5.17 1.89 0.82 + * 1024 31.12 9.45 3.34 0.89 + * </pre> + * + * <p>The method requires that the Binomial distribution probability at {@code x=0} can be computed. + * This will fail when {@code (1 - p)^trials == 0} which requires {@code trials} to be large + * and/or {@code p} to be small. In this case an exception is raised.</p> + * + * @param rng Generator of uniformly distributed random numbers. + * @param trials Number of trials. + * @param p Probability of success. + * @return Sampler. + * @throws IllegalArgumentException if {@code trials < 0} or {@code trials >= 2^16}, + * {@code p} is not in the range {@code [0-1]}, or the probability distribution cannot + * be computed. + */ + public static MarsagliaTsangWangDiscreteSampler createBinomialDistribution(UniformRandomProvider rng, + int trials, + double p) { + if (trials < 0) { + throw new IllegalArgumentException("Trials is not positive: " + trials); + } + if (p < 0 || p > 1) { + throw new IllegalArgumentException("Probability is not in range [0,1]: " + p); + } + + // Handle edge cases + if (p == 0) { + return new MarsagliaTsangWangFixedResultBinomialSampler(0); + } + if (p == 1) { + return new MarsagliaTsangWangFixedResultBinomialSampler(trials); } - if (j < t2) { - return indexTable.getTable2((j - t1) >>> 18); + + // A simple check using the supported index size. + if (trials >= INT_16) { + throw new IllegalArgumentException("Unsupported number of trials: " + trials); } - if (j < t3) { - return indexTable.getTable3((j - t2) >>> 12); + + // The maximum supported value for Math.exp is approximately -744. + // This occurs when trials is large and p is close to 1. + // Handle this by using an inversion: generate j=Binomial(n,1-p), return n-j + final boolean inversion = p > 0.5; + if (inversion) { + p = 1 - p; } - if (j < t4) { - return indexTable.getTable4((j - t3) >>> 6); + + // Check if the distribution can be computed + final double p0 = Math.exp(trials * Math.log(1 - p)); + if (p0 < Double.MIN_VALUE) { + throw new IllegalArgumentException("Unable to compute distribution"); } - // Note the tables are filled on the assumption that the sum of the probabilities. - // is >=2^30. If this is not true then the final table table5 will be smaller by the - // difference. So the tables *must* be constructed correctly. - return indexTable.getTable5(j - t4); + + // First find size of probability array + double t = p0; + final double h = p / (1 - p); + // Find first probability + int begin = 0; + if (t * DOUBLE_31 < 1) { + // Somewhere after p(0) + // Note: + // If this loop is entered p(0) is < 2^-31. + // This has been tested at the extreme for p(0)=Double.MIN_VALUE and either + // p=0.5 or trials=2^16-1 and does not fail to find the beginning. + for (int i = 1; i <= trials; i++) { + t *= (trials + 1 - i) * h / i; + if (t * DOUBLE_31 >= 1) { + begin = i; + break; + } + } + } + // Find last probability + int end = trials; + for (int i = begin + 1; i <= trials; i++) { + t *= (trials + 1 - i) * h / i; + if (t * DOUBLE_31 < 1) { + end = i - 1; + break; + } + } + final int size = end - begin + 1; + final int offset = begin; + + // Then assign probability values as 30-bit integers + final int[] prob = new int[size]; + t = p0; + for (int i = 1; i <= begin; i++) { + t *= (trials + 1 - i) * h / i; + } + int sum = toUnsignedInt30(t); + prob[0] = sum; + for (int i = begin + 1; i <= end; i++) { + t *= (trials + 1 - i) * h / i; + prob[i - begin] = toUnsignedInt30(t); + sum += prob[i - begin]; + } + + // If the sum is < 2^30 add the remaining sum to the mode (floor((n+1)p))). + // If above 2^30 then the effect is truncation of the long tail of the distribution. + final int mode = (int) ((trials + 1) * p) - offset; + prob[mode] += Math.max(0, INT_30 - sum); + + final MarsagliaTsangWangDiscreteSampler sampler = createSampler(rng, BINOMIAL_NAME, prob, offset); + + if (inversion) { + return new MarsagliaTsangWangInversionBinomialSampler(trials, sampler); + } + return sampler; } - /** {@inheritDoc} */ - @Override - public String toString() { - return "Marsaglia Tsang Wang discrete deviate [" + rng.toString() + "]"; + /** + * Convert the probability to an unsigned integer in the range [0,2^30]. + * + * @param p the probability + * @return the integer + */ + private static int toUnsignedInt30(double p) { + return (int) (p * INT_30 + 0.5); } } diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangSmallMeanPoissonSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangSmallMeanPoissonSampler.java deleted file mode 100644 index 4ac66e8..0000000 --- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangSmallMeanPoissonSampler.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rng.sampling.distribution; - -import org.apache.commons.rng.UniformRandomProvider; - -/** - * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson - * distribution</a> using an optimised look-up table. - * - * <ul> - * <li> - * A Poisson process is simulated using pre-tabulated probabilities, as described - * in George Marsaglia, Wai Wan Tsang, Jingbo Wang (2004) Fast Generation of Discrete - * Random Variables. Journal of Statistical Software. Vol. 11, Issue. 3, pp. 1-11. - * </li> - * </ul> - * - * <p>This sampler is suitable for {@code mean <= 1024}. Larger means accumulate errors - * when tabulating the Poisson probability. For large means, {@link LargeMeanPoissonSampler} - * should be used instead.</p> - * - * <p>Note: The algorithm ignores any observation where for a sample size of - * 2<sup>31</sup> the expected number of occurrences is {@code < 0.5}.</p> - * - * <p>Sampling uses 1 call to {@link UniformRandomProvider#nextInt()}. Storage requirements - * depend on the tabulated probability values. Example storage requirements are listed below.</p> - * - * <pre> - * mean table size kB - * 0.25 882 0.88 - * 0.5 1135 1.14 - * 1 1200 1.20 - * 2 1451 1.45 - * 4 1955 1.96 - * 8 2961 2.96 - * 16 4410 4.41 - * 32 6115 6.11 - * 64 8499 8.50 - * 128 11528 11.53 - * 256 15935 31.87 - * 512 20912 41.82 - * 1024 30614 61.23 - * </pre> - * - * <p>Note: Storage changes to 2 bytes per index when {@code mean=256}.</p> - * - * @since 1.3 - * @see <a href="http://dx.doi.org/10.18637/jss.v011.i03">Margsglia, et al (2004) JSS Vol. - * 11, Issue 3</a> - */ -public class MarsagliaTsangWangSmallMeanPoissonSampler implements DiscreteSampler { - /** - * The value 2<sup>30</sup> as an {@code int}.</p> - */ - private static final int INT_30 = 1 << 30; - /** - * The value 2<sup>31</sup> as an {@code double}.</p> - */ - private static final double DOUBLE_31 = 1L << 31; - /** - * Upper bound to avoid exceeding the table sizes. - * - * <p>The number of possible values of the distribution should not exceed 2^16.</p> - * - * <p>The original source code provided in Marsaglia, et al (2004) has no explicit - * limit but the code fails at mean >= 1941 as the transform to compute p(x=mode) - * produces infinity. Use a conservative limit of 1024.</p> - */ - private static final double MAX_MEAN = 1024; - - /** The delegate. */ - private final DiscreteSampler delegate; - - /** - * Create a new instance. - * - * @param rng Generator of uniformly distributed random numbers. - * @param mean Mean. - * @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 1024}. - */ - public MarsagliaTsangWangSmallMeanPoissonSampler(UniformRandomProvider rng, double mean) { - if (mean <= 0) { - throw new IllegalArgumentException("mean is not strictly positive: " + mean); - } - // The algorithm is not valid if Math.floor(mean) is not an integer. - if (mean > MAX_MEAN) { - throw new IllegalArgumentException("mean " + mean + " > " + MAX_MEAN); - } - - // Probabilities are 30-bit integers, assumed denominator 2^30 - int[] prob; - // This is the minimum sample value: prob[x - offset] = p(x) - int offset; - - // Generate P's from 0 if mean < 21.4 - if (mean < 21.4) { - final double p0 = Math.exp(-mean); - - // Recursive update of Poisson probability until the value is too small - // p(x + 1) = p(x) * mean / (x + 1) - double p = p0; - int i; - for (i = 1; p * DOUBLE_31 >= 1; i++) { - p *= mean / i; - } - - // Fill P as (30-bit integers) - offset = 0; - final int size = i - 1; - prob = new int[size]; - - p = p0; - prob[0] = toUnsignedInt30(p); - // The sum must exceed 2^30. In edges cases this is false due to round-off. - int sum = prob[0]; - for (i = 1; i < prob.length; i++) { - p *= mean / i; - prob[i] = toUnsignedInt30(p); - sum += prob[i]; - } - - // If the sum is < 2^30 add the remaining sum to the mode (floor(mean)). - prob[(int) mean] += Math.max(0, INT_30 - sum); - } else { - // If mean >= 21.4, generate from largest p-value up, then largest down. - // The largest p-value will be at the mode (floor(mean)). - - // Find p(x=mode) - final int mode = (int) mean; - // This transform is stable until mean >= 1941 where p will result in Infinity - // before the divisor i is large enough to start reducing the product (i.e. i > c). - final double c = mean * Math.exp(-mean / mode); - double p = 1.0; - int i; - for (i = 1; i <= mode; i++) { - p *= c / i; - } - final double pX = p; - // Note this will exit when i overflows to negative so no check on the range - for (i = mode + 1; p * DOUBLE_31 >= 1; i++) { - p *= mean / i; - } - final int last = i - 2; - p = pX; - int j = -1; - for (i = mode - 1; i >= 0; i--) { - p *= (i + 1) / mean; - if (p * DOUBLE_31 < 1) { - j = i; - break; - } - } - - // Fill P as (30-bit integers) - offset = j + 1; - final int size = last - offset + 1; - prob = new int[size]; - - p = pX; - prob[mode - offset] = toUnsignedInt30(p); - // The sum must exceed 2^30. In edges cases this is false due to round-off. - int sum = prob[mode - offset]; - for (i = mode + 1; i <= last; i++) { - p *= mean / i; - prob[i - offset] = toUnsignedInt30(p); - sum += prob[i - offset]; - } - p = pX; - for (i = mode - 1; i >= offset; i--) { - p *= (i + 1) / mean; - prob[i - offset] = toUnsignedInt30(p); - sum += prob[i - offset]; - } - - // If the sum is < 2^30 add the remaining sum to the mode - prob[mode - offset] += Math.max(0, INT_30 - sum); - } - - delegate = new MarsagliaTsangWangDiscreteSampler(rng, prob, offset); - } - - /** - * Convert the probability to an unsigned integer in the range [0,2^30]. - * - * @param p the probability - * @return the integer - */ - private static int toUnsignedInt30(double p) { - return (int) (p * INT_30 + 0.5); - } - - /** {@inheritDoc} */ - @Override - public int sample() { - return delegate.sample(); - } - - /** {@inheritDoc} */ - @Override - public String toString() { - return "Small Mean Poisson " + delegate.toString(); - } -} diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java index 1cb06c4..c69a853 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java @@ -53,12 +53,12 @@ public class DiscreteSamplersList { add(LIST, new org.apache.commons.math3.distribution.BinomialDistribution(unusedRng, trialsBinomial, probSuccessBinomial), // range [9,16] MathArrays.sequence(8, 9, 1), - new MarsagliaTsangWangBinomialSampler(RandomSource.create(RandomSource.WELL_19937_A), trialsBinomial, probSuccessBinomial)); + MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(RandomSource.create(RandomSource.WELL_19937_A), trialsBinomial, probSuccessBinomial)); // Inverted add(LIST, new org.apache.commons.math3.distribution.BinomialDistribution(unusedRng, trialsBinomial, 1 - probSuccessBinomial), // range [4,11] = [20-16, 20-9] MathArrays.sequence(8, 4, 1), - new MarsagliaTsangWangBinomialSampler(RandomSource.create(RandomSource.WELL_19937_C), trialsBinomial, 1 - probSuccessBinomial)); + MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(RandomSource.create(RandomSource.WELL_19937_C), trialsBinomial, 1 - probSuccessBinomial)); // Geometric ("inverse method"). final double probSuccessGeometric = 0.21; @@ -139,7 +139,7 @@ public class DiscreteSamplersList { new KempSmallMeanPoissonSampler(RandomSource.create(RandomSource.XO_SHI_RO_128_PLUS), meanPoisson)); add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(unusedRng, meanPoisson, epsilonPoisson, maxIterationsPoisson), MathArrays.sequence(10, 0, 1), - new MarsagliaTsangWangSmallMeanPoissonSampler(RandomSource.create(RandomSource.MT), meanPoisson)); + MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(RandomSource.create(RandomSource.XO_SHI_RO_128_PLUS), meanPoisson)); // Poisson (40 < mean < 80). final double largeMeanPoisson = 67.89; add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(unusedRng, largeMeanPoisson, epsilonPoisson, maxIterationsPoisson), @@ -151,7 +151,7 @@ public class DiscreteSamplersList { new LargeMeanPoissonSampler(RandomSource.create(RandomSource.SPLIT_MIX_64), largeMeanPoisson)); add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(unusedRng, largeMeanPoisson, epsilonPoisson, maxIterationsPoisson), MathArrays.sequence(50, (int) (largeMeanPoisson - 25), 1), - new MarsagliaTsangWangSmallMeanPoissonSampler(RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PLUS), largeMeanPoisson)); + MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(RandomSource.create(RandomSource.XO_RO_SHI_RO_128_PLUS), largeMeanPoisson)); // Poisson (mean >> 40). final double veryLargeMeanPoisson = 543.21; add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(unusedRng, veryLargeMeanPoisson, epsilonPoisson, maxIterationsPoisson), @@ -163,12 +163,12 @@ public class DiscreteSamplersList { new LargeMeanPoissonSampler(RandomSource.create(RandomSource.SPLIT_MIX_64), veryLargeMeanPoisson)); add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(unusedRng, veryLargeMeanPoisson, epsilonPoisson, maxIterationsPoisson), MathArrays.sequence(100, (int) (veryLargeMeanPoisson - 50), 1), - new MarsagliaTsangWangSmallMeanPoissonSampler(RandomSource.create(RandomSource.XO_RO_SHI_RO_64_SS), veryLargeMeanPoisson)); + MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(RandomSource.create(RandomSource.XO_RO_SHI_RO_64_SS), veryLargeMeanPoisson)); // Any discrete distribution - double[] discreteProbabilities = new double[] { 0.1, 0.2, 0.3, 0.4, 0.5 }; + final double[] discreteProbabilities = new double[] { 0.1, 0.2, 0.3, 0.4, 0.5 }; add(LIST, discreteProbabilities, - new MarsagliaTsangWangDiscreteSampler(RandomSource.create(RandomSource.XO_SHI_RO_512_PLUS), discreteProbabilities)); + MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(RandomSource.create(RandomSource.XO_SHI_RO_512_PLUS), discreteProbabilities)); } catch (Exception e) { System.err.println("Unexpected exception while creating the list of samplers: " + e); e.printStackTrace(System.err); diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangBinomialSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangBinomialSamplerTest.java deleted file mode 100644 index bfe5052..0000000 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangBinomialSamplerTest.java +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rng.sampling.distribution; - -import org.apache.commons.rng.UniformRandomProvider; -import org.junit.Test; - -import org.junit.Assert; - -/** - * Test for the {@link MarsagliaTsangWangBinomialSampler}. The tests hit edge cases for - * the sampler. - */ -public class MarsagliaTsangWangBinomialSamplerTest { - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithTrialsBelow0() { - final UniformRandomProvider rng = new FixedRNG(0); - final int trials = -1; - final double p = 0.5; - @SuppressWarnings("unused") - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - } - - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithTrialsAboveMax() { - final UniformRandomProvider rng = new FixedRNG(0); - final int trials = 1 << 16; // 2^16 - final double p = 0.5; - @SuppressWarnings("unused") - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - } - - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithProbabilityBelow0() { - final UniformRandomProvider rng = new FixedRNG(0); - final int trials = 1; - final double p = -0.5; - @SuppressWarnings("unused") - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - } - - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithProbabilityAbove1() { - final UniformRandomProvider rng = new FixedRNG(0); - final int trials = 1; - final double p = 1.5; - @SuppressWarnings("unused") - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - } - - /** - * Test the constructor with distribution parameters that create a very small p(0) - * with a high probability of success. - */ - @Test - public void testSamplerWithSmallestP0ValueAndHighestProbabilityOfSuccess() { - final UniformRandomProvider rng = new FixedRNG(0xffffffff); - // p(0) = Math.exp(trials * Math.log(1-p)) - // p(0) will be smaller as Math.log(1-p) is more negative, which occurs when p is - // larger. - // Since the sampler uses inversion the largest value for p is 0.5. - // At the extreme for p = 0.5: - // trials = Math.log(p(0)) / Math.log(1-p) - // = Math.log(Double.MIN_VALUE) / Math.log(0.5) - // = 1074 - final int trials = (int) Math.floor(Math.log(Double.MIN_VALUE) / Math.log(0.5)); - final double p = 0.5; - // Validate set-up - Assert.assertEquals("Invalid test set-up for p(0)", Double.MIN_VALUE, getP0(trials, p), 0); - Assert.assertEquals("Invalid test set-up for p(0)", 0, getP0(trials + 1, p), 0); - - // This will throw if the table does not sum to 2^30 - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - sampler.sample(); - } - - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWhenP0IsZero() { - final UniformRandomProvider rng = new FixedRNG(0); - // As above but increase the trials so p(0) should be zero - final int trials = 1 + (int) Math.floor(Math.log(Double.MIN_VALUE) / Math.log(0.5)); - final double p = 0.5; - // Validate set-up - Assert.assertEquals("Invalid test set-up for p(0)", 0, getP0(trials, p), 0); - @SuppressWarnings("unused") - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - } - - /** - * Test the constructor with distribution parameters that create a very small p(0) - * with a high number of trials. - */ - @Test - public void testSamplerWithLargestTrialsAndSmallestProbabilityOfSuccess() { - final UniformRandomProvider rng = new FixedRNG(0xffffffff); - // p(0) = Math.exp(trials * Math.log(1-p)) - // p(0) will be smaller as Math.log(1-p) is more negative, which occurs when p is - // larger. - // Since the sampler uses inversion the largest value for p is 0.5. - // At the extreme for trials = 2^16-1: - // p = 1 - Math.exp(Math.log(p(0)) / trials) - // = 1 - Math.exp(Math.log(Double.MIN_VALUE) / trials) - // = 0.011295152668039599 - final int trials = (1 << 16) - 1; - double p = 1 - Math.exp(Math.log(Double.MIN_VALUE) / trials); - - // Validate set-up - Assert.assertEquals("Invalid test set-up for p(0)", Double.MIN_VALUE, getP0(trials, p), 0); - - // Search for larger p until Math.nextAfter(p, 1) produces 0 - double upper = p * 2; - Assert.assertEquals("Invalid test set-up for p(0)", 0, getP0(trials, upper), 0); - - double lower = p; - while (Double.doubleToRawLongBits(lower) + 1 < Double.doubleToRawLongBits(upper)) { - final double mid = (upper + lower) / 2; - if (getP0(trials, mid) == 0) { - upper = mid; - } else { - lower = mid; - } - } - p = lower; - - // Re-validate - Assert.assertEquals("Invalid test set-up for p(0)", Double.MIN_VALUE, getP0(trials, p), 0); - Assert.assertEquals("Invalid test set-up for p(0)", 0, getP0(trials, Math.nextAfter(p, 1)), 0); - - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - // This will throw if the table does not sum to 2^30 - sampler.sample(); - } - - /** - * Gets the p(0) value. - * - * @param trials the trials - * @param probabilityOfSuccess the probability of success - * @return the p(0) value - */ - private static double getP0(int trials, double probabilityOfSuccess) { - return Math.exp(trials * Math.log(1 - probabilityOfSuccess)); - } - - @Test - public void testSamplerWithProbability0() { - final UniformRandomProvider rng = new FixedRNG(0); - final int trials = 1000000; - final double p = 0; - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - for (int i = 0; i < 5; i++) { - Assert.assertEquals(0, sampler.sample()); - } - // Hit the toString() method - Assert.assertTrue(sampler.toString().contains("Binomial")); - } - - @Test - public void testSamplerWithProbability1() { - final UniformRandomProvider rng = new FixedRNG(0); - final int trials = 1000000; - final double p = 1; - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - for (int i = 0; i < 5; i++) { - Assert.assertEquals(trials, sampler.sample()); - } - // Hit the toString() method - Assert.assertTrue(sampler.toString().contains("Binomial")); - } - - /** - * Test the sampler with a large number of trials. This tests the sampler can create the - * Binomial distribution for a large size when a limiting distribution (e.g. the Normal distribution) - * could be used instead. - */ - @Test - public void testSamplerWithLargeNumberOfTrials() { - final UniformRandomProvider rng = new FixedRNG(0xffffffff); - final int trials = 65000; - final double p = 0.01; - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - // This will throw if the table does not sum to 2^30 - sampler.sample(); - } - - /** - * Test the sampler with a probability of 0.5. This should hit the edge case in the loop to - * search for the last probability of the Binomial distribution. - */ - @Test - public void testSamplerWithProbability0_5() { - final UniformRandomProvider rng = new FixedRNG(0xffffffff); - final int trials = 10; - final double p = 0.5; - final MarsagliaTsangWangBinomialSampler sampler = new MarsagliaTsangWangBinomialSampler(rng, trials, p); - // This will throw if the table does not sum to 2^30 - sampler.sample(); - } - - /** - * A RNG returning a fixed value. - */ - private static class FixedRNG implements UniformRandomProvider { - /** The value. */ - private final int value; - - /** - * @param value the value - */ - FixedRNG(int value) { - this.value = value; - } - - @Override - public int nextInt() { - return value; - } - - public void nextBytes(byte[] bytes) {} - public void nextBytes(byte[] bytes, int start, int len) {} - public int nextInt(int n) { return 0; } - public long nextLong() { return 0; } - public long nextLong(long n) { return 0; } - public boolean nextBoolean() { return false; } - public float nextFloat() { return 0; } - public double nextDouble() { return 0; } - } -} diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSamplerTest.java index d1fce42..bb17c9e 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSamplerTest.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangDiscreteSamplerTest.java @@ -26,100 +26,67 @@ import org.junit.Test; /** * Test for the {@link MarsagliaTsangWangDiscreteSampler}. The tests hit edge cases for - * the sampler. + * the sampler factory methods that build the normalised probability distribution. + * + * <p>Statistical testing of the sampler is performed using entries in {@link DiscreteSamplersList}.</p> */ public class MarsagliaTsangWangDiscreteSamplerTest { - // Tests for the package-private constructor using int[] + offset - - /** - * Test constructor throws with max index above integer max. - */ - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithMaxIndexAboveIntegerMax() { - final int[] prob = new int[1]; - final int offset = Integer.MAX_VALUE; - createSampler(prob, offset); - } - - /** - * Test constructor throws with negative offset. - */ - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithNegativeOffset() { - final int[] prob = new int[1]; - final int offset = -1; - createSampler(prob, offset); - } - - /** - * Test construction is allowed or when max index equals integer max. - */ - @Test - public void testConstructorWhenMaxIndexEqualsIntegerMax() { - final int[] prob = new int[1]; - prob[0] = 1 << 30; // So the total probability is 2^30 - final int offset = Integer.MAX_VALUE - 1; - createSampler(prob, offset); - } - - /** - * Creates the sampler. - * - * @param prob the probabilities - * @param offset the offset - * @return the sampler - */ - private static MarsagliaTsangWangDiscreteSampler createSampler(final int[] probabilities, int offset) { - final UniformRandomProvider rng = new SplitMix64(0L); - return new MarsagliaTsangWangDiscreteSampler(rng, probabilities, offset); + @Test(expected=IllegalArgumentException.class) + public void testCreateDiscreteDistributionThrowsWithNullProbabilites() { + createDiscreteDistributionSampler(null); } - // Tests for the public constructor using double[] - @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithNullProbabilites() { - createSampler(null); + public void testCreateDiscreteDistributionThrowsWithZeroLengthProbabilites() { + createDiscreteDistributionSampler(new double[0]); } @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithZeroLengthProbabilites() { - createSampler(new double[0]); + public void testCreateDiscreteDistributionThrowsWithNegativeProbabilites() { + createDiscreteDistributionSampler(new double[] { -1, 0.1, 0.2 }); } @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithNegativeProbabilites() { - createSampler(new double[] { -1, 0.1, 0.2 }); + public void testCreateDiscreteDistributionThrowsWithNaNProbabilites() { + createDiscreteDistributionSampler(new double[] { 0.1, Double.NaN, 0.2 }); } @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithNaNProbabilites() { - createSampler(new double[] { 0.1, Double.NaN, 0.2 }); + public void testCreateDiscreteDistributionThrowsWithInfiniteProbabilites() { + createDiscreteDistributionSampler(new double[] { 0.1, Double.POSITIVE_INFINITY, 0.2 }); } @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithInfiniteProbabilites() { - createSampler(new double[] { 0.1, Double.POSITIVE_INFINITY, 0.2 }); + public void testCreateDiscreteDistributionThrowsWithInfiniteSumProbabilites() { + createDiscreteDistributionSampler(new double[] { Double.MAX_VALUE, Double.MAX_VALUE }); } @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithInfiniteSumProbabilites() { - createSampler(new double[] { Double.MAX_VALUE, Double.MAX_VALUE }); + public void testCreateDiscreteDistributionThrowsWithZeroSumProbabilites() { + createDiscreteDistributionSampler(new double[4]); } - @Test(expected=IllegalArgumentException.class) - public void testConstructorThrowsWithZeroSumProbabilites() { - createSampler(new double[4]); + /** + * Test the {@link Object#toString()} method contains the algorithm author names. + */ + @Test + public void testToString() { + final DiscreteSampler sampler = createDiscreteDistributionSampler(new double[] { 0.5, 0.5 }); + String text = sampler.toString(); + for (String item : new String[] { "Marsaglia", "Tsang", "Wang" }) { + Assert.assertTrue("toString missing: " + item, text.contains(item)); + } } /** * Creates the sampler. * - * @param probabilities the probabilities + * @param probabilities Probabilities. * @return the sampler */ - private static MarsagliaTsangWangDiscreteSampler createSampler(double[] probabilities) { + private static MarsagliaTsangWangDiscreteSampler createDiscreteDistributionSampler(double[] probabilities) { final UniformRandomProvider rng = new SplitMix64(0L); - return new MarsagliaTsangWangDiscreteSampler(rng, probabilities); + return MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(rng, probabilities); } // Sampling tests @@ -174,9 +141,13 @@ public class MarsagliaTsangWangDiscreteSamplerTest { final int offset2 = 1 << 8; final int offset3 = 1 << 16; - final MarsagliaTsangWangDiscreteSampler sampler1 = new MarsagliaTsangWangDiscreteSampler(rng1, prob, offset1); - final MarsagliaTsangWangDiscreteSampler sampler2 = new MarsagliaTsangWangDiscreteSampler(rng2, prob, offset2); - final MarsagliaTsangWangDiscreteSampler sampler3 = new MarsagliaTsangWangDiscreteSampler(rng3, prob, offset3); + final double[] p1 = createProbabilities(offset1, prob); + final double[] p2 = createProbabilities(offset2, prob); + final double[] p3 = createProbabilities(offset3, prob); + + final MarsagliaTsangWangDiscreteSampler sampler1 = MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(rng1, p1); + final MarsagliaTsangWangDiscreteSampler sampler2 = MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(rng2, p2); + final MarsagliaTsangWangDiscreteSampler sampler3 = MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(rng3, p3); for (int i = 0; i < values.length; i++) { // Remove offsets @@ -189,6 +160,21 @@ public class MarsagliaTsangWangDiscreteSamplerTest { } /** + * Creates the probabilities using zero padding below the values. + * + * @param offset the offset + * @param prob the probability values + * @return the zero-padded probabilities + */ + private static double[] createProbabilities(int offset, int[] prob) { + double[] probabilities = new double[offset + prob.length]; + for (int i = 0; i < prob.length; i++) { + probabilities[i + offset] = prob[i]; + } + return probabilities; + } + + /** * Test samples from a distribution expressed using {@code double} probabilities. */ @Test @@ -202,12 +188,12 @@ public class MarsagliaTsangWangDiscreteSamplerTest { // First test the table is completely filled to 2^30 final UniformRandomProvider dummyRng = new FixedSequenceIntProvider(new int[] { 0xffffffff}); - final MarsagliaTsangWangDiscreteSampler dummySampler = new MarsagliaTsangWangDiscreteSampler(dummyRng, probabilities); + final MarsagliaTsangWangDiscreteSampler dummySampler = MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(dummyRng, probabilities); // This will throw if the table is incomplete as it hits the upper limit dummySampler.sample(); // Do a test of the actual sampler - final MarsagliaTsangWangDiscreteSampler sampler = new MarsagliaTsangWangDiscreteSampler(rng, probabilities); + final MarsagliaTsangWangDiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createDiscreteDistribution(rng, probabilities); final int numberOfSamples = 10000; final long[] samples = new long[probabilities.length]; @@ -306,9 +292,292 @@ public class MarsagliaTsangWangDiscreteSamplerTest { } /** + * Test the constructor with a bad mean. + */ + @Test(expected = IllegalArgumentException.class) + public void testCreatePoissonDistributionThrowsWithMeanLargerThanUpperBound() { + final UniformRandomProvider rng = new FixedRNG(); + final double mean = 1025; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(rng, mean); + } + + /** + * Test the Poisson distribution with a bad mean. + */ + @Test(expected = IllegalArgumentException.class) + public void testCreatePoissonDistributionThrowsWithZeroMean() { + final UniformRandomProvider rng = new FixedRNG(); + final double mean = 0; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(rng, mean); + } + + /** + * Test the Poisson distribution with the maximum mean. + */ + @Test + public void testCreatePoissonDistributionWithMaximumMean() { + final UniformRandomProvider rng = new FixedRNG(); + final double mean = 1024; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(rng, mean); + } + + /** + * Test the Poisson distribution with a small mean that hits the edge case where the + * probability sum is not 2^30. + */ + @Test + public void testCreatePoissonDistributionWithSmallMean() { + final UniformRandomProvider rng = new FixedRNG(); + final double mean = 0.25; + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(rng, mean); + // This will throw if the table does not sum to 2^30 + sampler.sample(); + } + + /** + * Test the Poisson distribution with a medium mean that is at the switch point + * for how the probability distribution is computed. This hits the edge case + * where the loop from the mean decrements to reach zero. + */ + @Test + public void testCreatePoissonDistributionWithMediumMean() { + final UniformRandomProvider rng = new FixedRNG(); + final double mean = 21.4; + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createPoissonDistribution(rng, mean); + // This will throw if the table does not sum to 2^30 + sampler.sample(); + } + + /** + * Test the Binomial distribution with a bad number of trials. + */ + @Test(expected = IllegalArgumentException.class) + public void testCreateBinomialDistributionThrowsWithTrialsBelow0() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = -1; + final double p = 0.5; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + } + + /** + * Test the Binomial distribution with an unsupported number of trials. + */ + @Test(expected = IllegalArgumentException.class) + public void testCreateBinomialDistributionThrowsWithTrialsAboveMax() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 1 << 16; // 2^16 + final double p = 0.5; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + } + + /** + * Test the Binomial distribution with probability {@code < 0}. + */ + @Test(expected = IllegalArgumentException.class) + public void testCreateBinomialDistributionThrowsWithProbabilityBelow0() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 1; + final double p = -0.5; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + } + + /** + * Test the Binomial distribution with probability {@code > 1}. + */ + @Test(expected = IllegalArgumentException.class) + public void testCreateBinomialDistributionThrowsWithProbabilityAbove1() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 1; + final double p = 1.5; + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + } + + /** + * Test the Binomial distribution with distribution parameters that create a very small p(0) + * with a high probability of success. + */ + @Test + public void testCreateBinomialDistributionWithSmallestP0ValueAndHighestProbabilityOfSuccess() { + final UniformRandomProvider rng = new FixedRNG(); + // p(0) = Math.exp(trials * Math.log(1-p)) + // p(0) will be smaller as Math.log(1-p) is more negative, which occurs when p is + // larger. + // Since the sampler uses inversion the largest value for p is 0.5. + // At the extreme for p = 0.5: + // trials = Math.log(p(0)) / Math.log(1-p) + // = Math.log(Double.MIN_VALUE) / Math.log(0.5) + // = 1074 + final int trials = (int) Math.floor(Math.log(Double.MIN_VALUE) / Math.log(0.5)); + final double p = 0.5; + // Validate set-up + Assert.assertEquals("Invalid test set-up for p(0)", Double.MIN_VALUE, getBinomialP0(trials, p), 0); + Assert.assertEquals("Invalid test set-up for p(0)", 0, getBinomialP0(trials + 1, p), 0); + + // This will throw if the table does not sum to 2^30 + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + sampler.sample(); + } + + /** + * Test the Binomial distribution with distribution parameters that create a p(0) + * that is zero (thus the distribution cannot be computed). + */ + @Test(expected = IllegalArgumentException.class) + public void testCreateBinomialDistributionThrowsWhenP0IsZero() { + final UniformRandomProvider rng = new FixedRNG(); + // As above but increase the trials so p(0) should be zero + final int trials = 1 + (int) Math.floor(Math.log(Double.MIN_VALUE) / Math.log(0.5)); + final double p = 0.5; + // Validate set-up + Assert.assertEquals("Invalid test set-up for p(0)", 0, getBinomialP0(trials, p), 0); + @SuppressWarnings("unused") + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + } + + /** + * Test the Binomial distribution with distribution parameters that create a very small p(0) + * with a high number of trials. + */ + @Test + public void testCreateBinomialDistributionWithLargestTrialsAndSmallestProbabilityOfSuccess() { + final UniformRandomProvider rng = new FixedRNG(); + // p(0) = Math.exp(trials * Math.log(1-p)) + // p(0) will be smaller as Math.log(1-p) is more negative, which occurs when p is + // larger. + // Since the sampler uses inversion the largest value for p is 0.5. + // At the extreme for trials = 2^16-1: + // p = 1 - Math.exp(Math.log(p(0)) / trials) + // = 1 - Math.exp(Math.log(Double.MIN_VALUE) / trials) + // = 0.011295152668039599 + final int trials = (1 << 16) - 1; + double p = 1 - Math.exp(Math.log(Double.MIN_VALUE) / trials); + + // Validate set-up + Assert.assertEquals("Invalid test set-up for p(0)", Double.MIN_VALUE, getBinomialP0(trials, p), 0); + + // Search for larger p until Math.nextAfter(p, 1) produces 0 + double upper = p * 2; + Assert.assertEquals("Invalid test set-up for p(0)", 0, getBinomialP0(trials, upper), 0); + + double lower = p; + while (Double.doubleToRawLongBits(lower) + 1 < Double.doubleToRawLongBits(upper)) { + final double mid = (upper + lower) / 2; + if (getBinomialP0(trials, mid) == 0) { + upper = mid; + } else { + lower = mid; + } + } + p = lower; + + // Re-validate + Assert.assertEquals("Invalid test set-up for p(0)", Double.MIN_VALUE, getBinomialP0(trials, p), 0); + Assert.assertEquals("Invalid test set-up for p(0)", 0, getBinomialP0(trials, Math.nextAfter(p, 1)), 0); + + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + // This will throw if the table does not sum to 2^30 + sampler.sample(); + } + + /** + * Gets the p(0) value for the Binomial distribution. + * + * @param trials the trials + * @param probabilityOfSuccess the probability of success + * @return the p(0) value + */ + private static double getBinomialP0(int trials, double probabilityOfSuccess) { + return Math.exp(trials * Math.log(1 - probabilityOfSuccess)); + } + + /** + * Test the Binomial distribution with a probability of 0 where the sampler should equal 0. + */ + @Test + public void testCreateBinomialDistributionWithProbability0() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 1000000; + final double p = 0; + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + for (int i = 0; i < 5; i++) { + Assert.assertEquals(0, sampler.sample()); + } + // Hit the toString() method + Assert.assertTrue(sampler.toString().contains("Binomial")); + } + + /** + * Test the Binomial distribution with a probability of 1 where the sampler should equal + * the number of trials. + */ + @Test + public void testCreateBinomialDistributionWithProbability1() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 1000000; + final double p = 1; + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + for (int i = 0; i < 5; i++) { + Assert.assertEquals(trials, sampler.sample()); + } + // Hit the toString() method + Assert.assertTrue(sampler.toString().contains("Binomial")); + } + + /** + * Test the sampler with a large number of trials. This tests the sampler can create the + * Binomial distribution for a large size when a limiting distribution (e.g. the Normal distribution) + * could be used instead. + */ + @Test + public void testCreateBinomialDistributionWithLargeNumberOfTrials() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 65000; + final double p = 0.01; + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + // This will throw if the table does not sum to 2^30 + sampler.sample(); + } + + /** + * Test the sampler with a probability of 0.5. This should hit the edge case in the loop to + * search for the last probability of the Binomial distribution. + */ + @Test + public void testCreateBinomialDistributionWithProbability0_5() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 10; + final double p = 0.5; + final DiscreteSampler sampler = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p); + // This will throw if the table does not sum to 2^30 + sampler.sample(); + } + + /** + * Test the sampler with a probability that requires inversion has the same name for + * {@link Object#toString()}. + */ + @Test + public void testBinomialSamplerToString() { + final UniformRandomProvider rng = new FixedRNG(); + final int trials = 10; + final double p1 = 0.4; + final double p2 = 1 - p1; + final DiscreteSampler sampler1 = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p1); + final DiscreteSampler sampler2 = MarsagliaTsangWangDiscreteSampler.createBinomialDistribution(rng, trials, p2); + Assert.assertEquals(sampler1.toString(), sampler2.toString()); + } + + /** * Return a fixed sequence of {@code int} output. */ - private class FixedSequenceIntProvider extends IntProvider { + private static class FixedSequenceIntProvider extends IntProvider { /** The count of values output. */ private int count; /** The values. */ @@ -329,4 +598,14 @@ public class MarsagliaTsangWangDiscreteSamplerTest { return values[count++ % values.length]; } } + + /** + * A RNG returning a fixed {@code int} value with all the bits set. + */ + private static class FixedRNG extends IntProvider { + @Override + public int next() { + return 0xffffffff; + } + } } diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangSmallMeanPoissonSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangSmallMeanPoissonSamplerTest.java deleted file mode 100644 index 207840f..0000000 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/MarsagliaTsangWangSmallMeanPoissonSamplerTest.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.rng.sampling.distribution; - -import org.apache.commons.rng.UniformRandomProvider; -import org.junit.Test; - -/** - * Test for the {@link MarsagliaTsangWangSmallMeanPoissonSampler}. The tests hit edge - * cases for the sampler. - */ -public class MarsagliaTsangWangSmallMeanPoissonSamplerTest { - /** - * Test the constructor with a bad mean. - */ - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithMeanLargerThanUpperBound() { - final UniformRandomProvider rng = new FixedRNG(0); - final double mean = 1025; - @SuppressWarnings("unused") - final MarsagliaTsangWangSmallMeanPoissonSampler sampler = new MarsagliaTsangWangSmallMeanPoissonSampler(rng, - mean); - } - - /** - * Test the constructor with a bad mean. - */ - @Test(expected = IllegalArgumentException.class) - public void testConstructorThrowsWithZeroMean() { - final UniformRandomProvider rng = new FixedRNG(0); - final double mean = 0; - @SuppressWarnings("unused") - final MarsagliaTsangWangSmallMeanPoissonSampler sampler = new MarsagliaTsangWangSmallMeanPoissonSampler(rng, - mean); - } - - /** - * Test the constructor with the maximum mean. - */ - @Test - public void testConstructorWithMaximumMean() { - final UniformRandomProvider rng = new FixedRNG(0); - final double mean = 1024; - @SuppressWarnings("unused") - final MarsagliaTsangWangSmallMeanPoissonSampler sampler = new MarsagliaTsangWangSmallMeanPoissonSampler(rng, - mean); - } - - /** - * Test the constructor with a small mean that hits the edge case where the - * probability sum is not 2^30. - */ - @Test - public void testConstructorWithSmallMean() { - final UniformRandomProvider rng = new FixedRNG(0xffffffff); - final double mean = 0.25; - final MarsagliaTsangWangSmallMeanPoissonSampler sampler = new MarsagliaTsangWangSmallMeanPoissonSampler(rng, - mean); - // This will throw if the table does not sum to 2^30 - sampler.sample(); - } - - /** - * Test the constructor with a medium mean that is at the switch point for how the probability - * distribution is computed. - */ - @Test - public void testConstructorWithMediumMean() { - final UniformRandomProvider rng = new FixedRNG(0xffffffff); - final double mean = 21.4; - final MarsagliaTsangWangSmallMeanPoissonSampler sampler = new MarsagliaTsangWangSmallMeanPoissonSampler(rng, - mean); - // This will throw if the table does not sum to 2^30 - sampler.sample(); - } - - /** - * A RNG returning a fixed value. - */ - private static class FixedRNG implements UniformRandomProvider { - /** The value. */ - private final int value; - - /** - * @param value the value - */ - FixedRNG(int value) { - this.value = value; - } - - @Override - public int nextInt() { - return value; - } - - public void nextBytes(byte[] bytes) {} - public void nextBytes(byte[] bytes, int start, int len) {} - public int nextInt(int n) { return 0; } - public long nextLong() { return 0; } - public long nextLong(long n) { return 0; } - public boolean nextBoolean() { return false; } - public float nextFloat() { return 0; } - public double nextDouble() { return 0; } - } -}