This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-rng.git
commit edcdf9affa4648609120e5882f83facf2fb1ef05 Author: aherbert <a.herb...@sussex.ac.uk> AuthorDate: Mon Jan 28 16:24:54 2019 +0000 RNG-69: Added a Geometric sampler This outperforms using the InverseTransformDiscreteSampler with an appropriate Geometric inverse cumulative probability function. A JMH benchmark has been added to demonstrate the performance. --- .../distribution/GeometricSamplersPerformance.java | 190 +++++++++++++++++++++ .../jmh/distribution/SamplersPerformance.java | 19 ++- .../sampling/distribution/GeometricSampler.java | 144 ++++++++++++++++ .../distribution/DiscreteSamplersList.java | 8 +- .../distribution/GeometricSamplerTest.java | 91 ++++++++++ 5 files changed, 446 insertions(+), 6 deletions(-) diff --git a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/GeometricSamplersPerformance.java b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/GeometricSamplersPerformance.java new file mode 100644 index 0000000..60e86ad --- /dev/null +++ b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/GeometricSamplersPerformance.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.rng.examples.jmh.distribution; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.infra.Blackhole; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.simple.RandomSource; +import org.apache.commons.rng.sampling.distribution.DiscreteInverseCumulativeProbabilityFunction; +import org.apache.commons.rng.sampling.distribution.DiscreteSampler; +import org.apache.commons.rng.sampling.distribution.GeometricSampler; +import org.apache.commons.rng.sampling.distribution.InverseTransformDiscreteSampler; + +/** + * Executes a benchmark to compare the speed of generation of Geometric random numbers + * using different methods. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@State(Scope.Benchmark) +@Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"}) +public class GeometricSamplersPerformance { + /** Number of samples per run. */ + private static final int NUM_SAMPLES = 10000000; + + /** + * The RandomSource's to use for testing. + */ + @State(Scope.Benchmark) + public static class Sources { + /** + * RNG providers. + */ + @Param({"SPLIT_MIX_64", + "MWC_256", + "JDK" }) + private String randomSourceName; + + /** RNG. */ + private UniformRandomProvider generator; + + /** + * @return the RNG. + */ + public UniformRandomProvider getGenerator() { + return generator; + } + + /** Instantiates generator. */ + @Setup + public void setup() { + final RandomSource randomSource = RandomSource.valueOf(randomSourceName); + generator = RandomSource.create(randomSource); + } + } + + /** + * The probability of success for testing. + */ + @State(Scope.Benchmark) + public static class ProbabilityOfSuccess { + /** + * The probability. + */ + @Param({ "0.1", "0.3"}) + private double probability; + + /** + * Gets the probability of success. + * + * @return the probability of success + */ + public double getProbability() { + return probability; + } + } + + /** + * Define the inverse cumulative probability function for the Geometric distribution. + * + * <p>Adapted from org.apache.commons.math3.distribution.GeometricDistribution. + */ + private static class GeometricDiscreteInverseCumulativeProbabilityFunction + implements DiscreteInverseCumulativeProbabilityFunction { + /** + * {@code log(1 - p)} where p is the probability of success. + */ + private final double log1mProbabilityOfSuccess; + + /** + * @param probabilityOfSuccess the probability of success + */ + GeometricDiscreteInverseCumulativeProbabilityFunction(double probabilityOfSuccess) { + // No validation that 0 < p <= 1 + log1mProbabilityOfSuccess = Math.log1p(-probabilityOfSuccess); + } + + @Override + public int inverseCumulativeProbability(double cumulativeProbability) { + // This is the equivalent of floor(log(u)/ln(1-p)) + // where: + // u = cumulative probability + // p = probability of success + // See: https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions + // --- + // Note: if cumulativeProbability == 0 then log1p(-0) is zero and the result + // after the range check is 0. + // Note: if cumulativeProbability == 1 then log1p(-1) is negative infinity, the result of + // the divide is positive infinity and the result after the range check is Integer.MAX_VALUE. + return Math.max(0, (int) Math.ceil(Math.log1p(-cumulativeProbability) / log1mProbabilityOfSuccess - 1)); + } + } + + /** + * Exercises a discrete sampler. + * + * @param sampler Sampler. + * @param bh Data sink. + */ + private static void runSample(DiscreteSampler sampler, + Blackhole bh) { + for (int i = 0; i < NUM_SAMPLES; i++) { + bh.consume(sampler.sample()); + } + } + + // Benchmarks methods below. + + /** + * Run geometric sampler. + * + * @param sources Source of randomness. + * @param success The probability of success. + * @param bh Data sink. + */ + @Benchmark + public void runGeometricSampler(Sources sources, + ProbabilityOfSuccess success, + Blackhole bh) { + runSample(new GeometricSampler(sources.getGenerator(), success.getProbability()), bh); + } + + /** + * Run geometric sampler. + * + * @param sources Source of randomness. + * @param success The probability of success. + * @param bh Data sink. + */ + @Benchmark + public void runGeometricInverseTranformSampler(Sources sources, + ProbabilityOfSuccess success, + Blackhole bh) { + final DiscreteInverseCumulativeProbabilityFunction geometricFunction = + new GeometricDiscreteInverseCumulativeProbabilityFunction(success.getProbability()); + final DiscreteSampler inverseMethodSampler = + new InverseTransformDiscreteSampler(sources.getGenerator(), + geometricFunction); + runSample(inverseMethodSampler, bh); + } +} diff --git a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java index 963d8bd..ad2d096 100644 --- a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java +++ b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java @@ -44,6 +44,7 @@ import org.apache.commons.rng.sampling.distribution.LogNormalSampler; import org.apache.commons.rng.sampling.distribution.ChengBetaSampler; import org.apache.commons.rng.sampling.distribution.ContinuousUniformSampler; import org.apache.commons.rng.sampling.distribution.DiscreteUniformSampler; +import org.apache.commons.rng.sampling.distribution.GeometricSampler; import org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler; import org.apache.commons.rng.sampling.distribution.PoissonSampler; @@ -110,8 +111,8 @@ public class SamplersPerformance { * @param sampler Sampler. * @param bh Data sink. */ - private void runSample(ContinuousSampler sampler, - Blackhole bh) { + private static void runSample(ContinuousSampler sampler, + Blackhole bh) { for (int i = 0; i < NUM_SAMPLES; i++) { bh.consume(sampler.sample()); } @@ -123,8 +124,8 @@ public class SamplersPerformance { * @param sampler Sampler. * @param bh Data sink. */ - private void runSample(DiscreteSampler sampler, - Blackhole bh) { + private static void runSample(DiscreteSampler sampler, + Blackhole bh) { for (int i = 0; i < NUM_SAMPLES; i++) { bh.consume(sampler.sample()); } @@ -261,4 +262,14 @@ public class SamplersPerformance { Blackhole bh) { runSample(new PoissonSampler(sources.getGenerator(), 8.9), bh); } + + /** + * @param sources Source of randomness. + * @param bh Data sink. + */ + @Benchmark + public void runGeometricSampler(Sources sources, + Blackhole bh) { + runSample(new GeometricSampler(sources.getGenerator(), 0.21), bh); + } } diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/GeometricSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/GeometricSampler.java new file mode 100644 index 0000000..ab2a6c4 --- /dev/null +++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/GeometricSampler.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.sampling.distribution; + +import org.apache.commons.rng.UniformRandomProvider; + +/** + * Sampling from a <a href="https://en.wikipedia.org/wiki/Geometric_distribution">geometric + * distribution</a>. + * + * <p>This distribution samples the number of failures before the first success taking values in the + * set {@code [0, 1, 2, ...]}. + * + * <p>The sample is computed using a related an exponential distribution. If X is an exponentially + * distributed random variable with parameter λ, then Y = floor(X) is a geometrically distributed + * random variable with parameter p = 1 − e<sup>−λ</sup>, with {@code p} the probability of success. + * + * <p>Note: As the probability of success ({@code p}) tends towards zero the mean of the + * distribution ({@code (1-p)/p}) tends towards infinity and due to the use of {@code int} for the + * sample this results in truncation of the distribution. + * + * @see <a + * href="https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions">geometric + * distribution - related distributions</a> + * + * @since 1.3 + */ +public class GeometricSampler implements DiscreteSampler { + /** The appropriate geometric sampler for the parameters. */ + private final DiscreteSampler delegate; + + /** + * Sample from the geometric distribution when the probability of success is 1. + */ + private static class GeometricP1Sampler implements DiscreteSampler { + /** The single instance. */ + static final GeometricP1Sampler INSTANCE = new GeometricP1Sampler(); + + @Override + public int sample() { + // When probability of success is 1 the sample is always zero + return 0; + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return "Geometric(p=1) deviate"; + } + } + + /** + * Sample from the geometric distribution by using a related exponential distribution. + */ + private static class GeometricExponentialSampler implements DiscreteSampler { + /** Underlying source of randomness. Used only for the {@link #toString()} method. */ + private final UniformRandomProvider rng; + /** The related exponential sampler for the geometric distribution. */ + private final AhrensDieterExponentialSampler exponentialSampler; + + /** + * @param rng Generator of uniformly distributed random numbers + * @param probabilityOfSuccess The probability of success (must be {@code 0<p<1}) + */ + GeometricExponentialSampler(UniformRandomProvider rng, double probabilityOfSuccess) { + this.rng = rng; + // Use a related exponential distribution: + // λ = −ln(1 − probabilityOfSuccess) + // exponential mean = 1 / λ + // -- + // Note on validation: + // If probabilityOfSuccess == Math.nextDown(1.0) the exponential mean is >0 (valid). + // If probabilityOfSuccess == Double.MIN_VALUE the exponential mean is +Infinity + // and the sample will always be Integer.MAX_VALUE (the distribution is truncated). It + // is noted in the class Javadoc that the use of a small p leads to truncation so + // no checks are made for this case. + final double exponentialMean = 1.0 / (-Math.log1p(-probabilityOfSuccess)); + exponentialSampler = new AhrensDieterExponentialSampler(rng, exponentialMean); + } + + @Override + public int sample() { + // Return the floor of the exponential sample + return (int) Math.floor(exponentialSampler.sample()); + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return "Geometric deviate [" + rng.toString() + "]"; + } + } + + /** + * Creates a new geometric distribution sampler. The samples will be provided in the set + * {@code k=[0, 1, 2, ...]} where {@code k} indicates the number of failures before the first + * success. + * + * @param rng Generator of uniformly distributed random numbers + * @param probabilityOfSuccess The probability of success + * @throws IllegalArgumentException if {@code probabilityOfSuccess} is not in the range [0 < + * probabilityOfSuccess <= 1] + */ + public GeometricSampler(UniformRandomProvider rng, double probabilityOfSuccess) { + if (probabilityOfSuccess <= 0 || probabilityOfSuccess > 1) { + throw new IllegalArgumentException( + "Probability of success must be in the range [0 < p <= 1]: " + probabilityOfSuccess); + } + delegate = probabilityOfSuccess == 1 ? + GeometricP1Sampler.INSTANCE : + new GeometricExponentialSampler(rng, probabilityOfSuccess); + } + + /** + * Create a sample from a geometric distribution. + * + * <p>The sample will take the values in the set {@code [0, 1, 2, ...]}, equivalent to the number of + * failures before the first success. + */ + @Override + public int sample() { + return delegate.sample(); + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return delegate.toString(); + } +} diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java index 23122c7..3f9adb8 100644 --- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java @@ -46,9 +46,13 @@ public class DiscreteSamplersList { // Geometric ("inverse method"). final double probSuccessGeometric = 0.21; - add(LIST, new org.apache.commons.math3.distribution.GeometricDistribution(probSuccessGeometric), + add(LIST, new org.apache.commons.math3.distribution.GeometricDistribution(null, probSuccessGeometric), MathArrays.sequence(10, 0, 1), RandomSource.create(RandomSource.ISAAC)); + // Geometric. + add(LIST, new org.apache.commons.math3.distribution.GeometricDistribution(probSuccessGeometric), + MathArrays.sequence(10, 0, 1), + new GeometricSampler(RandomSource.create(RandomSource.XOR_SHIFT_1024_S), probSuccessGeometric)); // Hypergeometric ("inverse method"). final int popSizeHyper = 34; @@ -197,7 +201,7 @@ public class DiscreteSamplersList { /** * @param dist Distribution. * @param points Points. - * @return the probabilities of the given points according to the distribution. + * @return the probabilities of the given points according to the distribution. */ private static double[] getProbabilities(org.apache.commons.math3.distribution.IntegerDistribution dist, int[] points) { diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/GeometricSamplerTest.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/GeometricSamplerTest.java new file mode 100644 index 0000000..8c5a39e --- /dev/null +++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/GeometricSamplerTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.rng.sampling.distribution; + +import org.apache.commons.rng.UniformRandomProvider; +import org.apache.commons.rng.simple.RandomSource; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test for the {@link GeometricSampler}. The tests hit edge cases for the sampler. + */ +public class GeometricSamplerTest { + /** + * Test the edge case where the probability of success is 1. This is a valid geometric distribution + * where the sample should always be 0. + */ + @Test + public void testProbabilityOfSuccessIsOneGeneratesZeroForSamples() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64); + final GeometricSampler sampler = new GeometricSampler(rng, 1); + // All samples should be 0 + for (int i = 0; i < 10; i++) { + Assert.assertEquals("p=1 should have 0 for all samples", 0, sampler.sample()); + } + } + + /** + * Test the edge case where the probability of success is 1 since it uses a different + * {@link Object#toString()} method to the normal case tested elsewhere. + */ + @Test + public void testProbabilityOfSuccessIsOneSamplerToString() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64); + final GeometricSampler sampler = new GeometricSampler(rng, 1); + Assert.assertTrue("Missing 'Geometric' from toString", sampler.toString().contains("Geometric")); + } + + /** + * Test the edge case where the probability of success is nearly 0. This is a valid geometric + * distribution but the sample is clipped to max integer value because the underlying exponential + * has a mean of positive infinity (effectively the sample is from a truncated distribution). + * + * <p>This test can be changed in future if a lower bound limit for the probability of success is + * introduced. + */ + @Test + public void testProbabilityOfSuccessIsAlmostZeroGeneratesMaxValueForSamples() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64); + final GeometricSampler sampler = new GeometricSampler(rng, Double.MIN_VALUE); + // All samples should be max value + for (int i = 0; i < 10; i++) { + Assert.assertEquals("p=(almost 0) should have Integer.MAX_VALUE for all samples", Integer.MAX_VALUE, + sampler.sample()); + } + } + + /** + * Test probability of success {@code >1} is not allowed. + */ + @SuppressWarnings("unused") + @Test(expected = IllegalArgumentException.class) + public void testProbabilityOfSuccessAboveOneThrows() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, Long.valueOf(0)); + new GeometricSampler(rng, Math.nextUp(1.0)); + } + + /** + * Test probability of success {@code 0} is not allowed. + */ + @SuppressWarnings("unused") + @Test(expected = IllegalArgumentException.class) + public void testProbabilityOfSuccessIsZeroThrows() { + final UniformRandomProvider rng = RandomSource.create(RandomSource.SPLIT_MIX_64, Long.valueOf(0)); + new GeometricSampler(rng, 0); + } +}