This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git

commit edcdf9affa4648609120e5882f83facf2fb1ef05
Author: aherbert <a.herb...@sussex.ac.uk>
AuthorDate: Mon Jan 28 16:24:54 2019 +0000

    RNG-69: Added a Geometric sampler
    
    This outperforms using the InverseTransformDiscreteSampler with an
    appropriate Geometric inverse cumulative probability function. A JMH
    benchmark has been added to demonstrate the performance.
---
 .../distribution/GeometricSamplersPerformance.java | 190 +++++++++++++++++++++
 .../jmh/distribution/SamplersPerformance.java      |  19 ++-
 .../sampling/distribution/GeometricSampler.java    | 144 ++++++++++++++++
 .../distribution/DiscreteSamplersList.java         |   8 +-
 .../distribution/GeometricSamplerTest.java         |  91 ++++++++++
 5 files changed, 446 insertions(+), 6 deletions(-)

diff --git 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/GeometricSamplersPerformance.java
 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/GeometricSamplersPerformance.java
new file mode 100644
index 0000000..60e86ad
--- /dev/null
+++ 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/GeometricSamplersPerformance.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.examples.jmh.distribution;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.infra.Blackhole;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.simple.RandomSource;
+import 
org.apache.commons.rng.sampling.distribution.DiscreteInverseCumulativeProbabilityFunction;
+import org.apache.commons.rng.sampling.distribution.DiscreteSampler;
+import org.apache.commons.rng.sampling.distribution.GeometricSampler;
+import 
org.apache.commons.rng.sampling.distribution.InverseTransformDiscreteSampler;
+
+/**
+ * Executes a benchmark to compare the speed of generation of Geometric random 
numbers
+ * using different methods.
+ */
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = {"-server", "-Xms128M", "-Xmx128M"})
+public class GeometricSamplersPerformance {
+    /** Number of samples per run. */
+    private static final int NUM_SAMPLES = 10000000;
+
+    /**
+     * The RandomSource's to use for testing.
+     */
+    @State(Scope.Benchmark)
+    public static class Sources {
+        /**
+         * RNG providers.
+         */
+        @Param({"SPLIT_MIX_64",
+                "MWC_256",
+                "JDK" })
+        private String randomSourceName;
+
+        /** RNG. */
+        private UniformRandomProvider generator;
+
+        /**
+         * @return the RNG.
+         */
+        public UniformRandomProvider getGenerator() {
+            return generator;
+        }
+
+        /** Instantiates generator. */
+        @Setup
+        public void setup() {
+            final RandomSource randomSource = 
RandomSource.valueOf(randomSourceName);
+            generator = RandomSource.create(randomSource);
+        }
+    }
+
+    /**
+     * The probability of success for testing.
+     */
+    @State(Scope.Benchmark)
+    public static class ProbabilityOfSuccess {
+        /**
+         * The probability.
+         */
+        @Param({ "0.1", "0.3"})
+        private double probability;
+
+        /**
+         * Gets the probability of success.
+         *
+         * @return the probability of success
+         */
+        public double getProbability() {
+            return probability;
+        }
+    }
+
+    /**
+     * Define the inverse cumulative probability function for the Geometric 
distribution.
+     *
+     * <p>Adapted from 
org.apache.commons.math3.distribution.GeometricDistribution.
+     */
+    private static class GeometricDiscreteInverseCumulativeProbabilityFunction
+            implements DiscreteInverseCumulativeProbabilityFunction {
+        /**
+         * {@code log(1 - p)} where p is the probability of success.
+         */
+        private final double log1mProbabilityOfSuccess;
+
+        /**
+         * @param probabilityOfSuccess the probability of success
+         */
+        GeometricDiscreteInverseCumulativeProbabilityFunction(double 
probabilityOfSuccess) {
+            // No validation that 0 < p <= 1
+            log1mProbabilityOfSuccess = Math.log1p(-probabilityOfSuccess);
+        }
+
+        @Override
+        public int inverseCumulativeProbability(double cumulativeProbability) {
+            // This is the equivalent of floor(log(u)/ln(1-p))
+            // where:
+            // u = cumulative probability
+            // p = probability of success
+            // See: 
https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions
+            // ---
+            // Note: if cumulativeProbability == 0 then log1p(-0) is zero and 
the result
+            // after the range check is 0.
+            // Note: if cumulativeProbability == 1 then log1p(-1) is negative 
infinity, the result of
+            // the divide is positive infinity and the result after the range 
check is Integer.MAX_VALUE.
+            return Math.max(0, (int) 
Math.ceil(Math.log1p(-cumulativeProbability) / log1mProbabilityOfSuccess - 1));
+        }
+    }
+
+    /**
+     * Exercises a discrete sampler.
+     *
+     * @param sampler Sampler.
+     * @param bh Data sink.
+     */
+    private static void runSample(DiscreteSampler sampler,
+                                  Blackhole bh) {
+        for (int i = 0; i < NUM_SAMPLES; i++) {
+            bh.consume(sampler.sample());
+        }
+    }
+
+    // Benchmarks methods below.
+
+    /**
+     * Run geometric sampler.
+     *
+     * @param sources Source of randomness.
+     * @param success The probability of success.
+     * @param bh Data sink.
+     */
+    @Benchmark
+    public void runGeometricSampler(Sources sources,
+                                    ProbabilityOfSuccess success,
+                                    Blackhole bh) {
+        runSample(new GeometricSampler(sources.getGenerator(), 
success.getProbability()), bh);
+    }
+
+    /**
+     * Run geometric sampler.
+     *
+     * @param sources Source of randomness.
+     * @param success The probability of success.
+     * @param bh Data sink.
+     */
+    @Benchmark
+    public void runGeometricInverseTranformSampler(Sources sources,
+                                                   ProbabilityOfSuccess 
success,
+                                                   Blackhole bh) {
+        final DiscreteInverseCumulativeProbabilityFunction geometricFunction =
+                new 
GeometricDiscreteInverseCumulativeProbabilityFunction(success.getProbability());
+        final DiscreteSampler inverseMethodSampler =
+                new InverseTransformDiscreteSampler(sources.getGenerator(),
+                                                    geometricFunction);
+        runSample(inverseMethodSampler, bh);
+    }
+}
diff --git 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java
 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java
index 963d8bd..ad2d096 100644
--- 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java
+++ 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/distribution/SamplersPerformance.java
@@ -44,6 +44,7 @@ import 
org.apache.commons.rng.sampling.distribution.LogNormalSampler;
 import org.apache.commons.rng.sampling.distribution.ChengBetaSampler;
 import org.apache.commons.rng.sampling.distribution.ContinuousUniformSampler;
 import org.apache.commons.rng.sampling.distribution.DiscreteUniformSampler;
+import org.apache.commons.rng.sampling.distribution.GeometricSampler;
 import 
org.apache.commons.rng.sampling.distribution.RejectionInversionZipfSampler;
 import org.apache.commons.rng.sampling.distribution.PoissonSampler;
 
@@ -110,8 +111,8 @@ public class SamplersPerformance {
      * @param sampler Sampler.
      * @param bh Data sink.
      */
-    private void runSample(ContinuousSampler sampler,
-                           Blackhole bh) {
+    private static void runSample(ContinuousSampler sampler,
+                                  Blackhole bh) {
         for (int i = 0; i < NUM_SAMPLES; i++) {
             bh.consume(sampler.sample());
         }
@@ -123,8 +124,8 @@ public class SamplersPerformance {
      * @param sampler Sampler.
      * @param bh Data sink.
      */
-    private void runSample(DiscreteSampler sampler,
-                           Blackhole bh) {
+    private static void runSample(DiscreteSampler sampler,
+                                  Blackhole bh) {
         for (int i = 0; i < NUM_SAMPLES; i++) {
             bh.consume(sampler.sample());
         }
@@ -261,4 +262,14 @@ public class SamplersPerformance {
                                   Blackhole bh) {
         runSample(new PoissonSampler(sources.getGenerator(), 8.9), bh);
     }
+
+    /**
+     * @param sources Source of randomness.
+     * @param bh Data sink.
+     */
+    @Benchmark
+    public void runGeometricSampler(Sources sources,
+                                    Blackhole bh) {
+        runSample(new GeometricSampler(sources.getGenerator(), 0.21), bh);
+    }
 }
diff --git 
a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/GeometricSampler.java
 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/GeometricSampler.java
new file mode 100644
index 0000000..ab2a6c4
--- /dev/null
+++ 
b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/GeometricSampler.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.distribution;
+
+import org.apache.commons.rng.UniformRandomProvider;
+
+/**
+ * Sampling from a <a 
href="https://en.wikipedia.org/wiki/Geometric_distribution";>geometric
+ * distribution</a>.
+ *
+ * <p>This distribution samples the number of failures before the first 
success taking values in the
+ * set {@code [0, 1, 2, ...]}.
+ *
+ * <p>The sample is computed using a related an exponential distribution. If X 
is an exponentially
+ * distributed random variable with parameter λ, then Y = floor(X) is a 
geometrically distributed
+ * random variable with parameter p = 1 − e<sup>−λ</sup>, with {@code p} the 
probability of success.
+ *
+ * <p>Note: As the probability of success ({@code p}) tends towards zero the 
mean of the
+ * distribution ({@code (1-p)/p}) tends towards infinity and due to the use of 
{@code int} for the
+ * sample this results in truncation of the distribution.
+ *
+ * @see <a
+ * 
href="https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions";>geometric
+ * distribution - related distributions</a>
+ *
+ * @since 1.3
+ */
+public class GeometricSampler implements DiscreteSampler {
+    /** The appropriate geometric sampler for the parameters. */
+    private final DiscreteSampler delegate;
+
+    /**
+     * Sample from the geometric distribution when the probability of success 
is 1.
+     */
+    private static class GeometricP1Sampler implements DiscreteSampler {
+        /** The single instance. */
+        static final GeometricP1Sampler INSTANCE = new GeometricP1Sampler();
+
+        @Override
+        public int sample() {
+            // When probability of success is 1 the sample is always zero
+            return 0;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public String toString() {
+            return "Geometric(p=1) deviate";
+        }
+    }
+
+    /**
+     * Sample from the geometric distribution by using a related exponential 
distribution.
+     */
+    private static class GeometricExponentialSampler implements 
DiscreteSampler {
+        /** Underlying source of randomness. Used only for the {@link 
#toString()} method. */
+        private final UniformRandomProvider rng;
+        /** The related exponential sampler for the geometric distribution. */
+        private final AhrensDieterExponentialSampler exponentialSampler;
+
+        /**
+         * @param rng Generator of uniformly distributed random numbers
+         * @param probabilityOfSuccess The probability of success (must be 
{@code 0<p<1})
+         */
+        GeometricExponentialSampler(UniformRandomProvider rng, double 
probabilityOfSuccess) {
+            this.rng = rng;
+            // Use a related exponential distribution:
+            // λ = −ln(1 − probabilityOfSuccess)
+            // exponential mean = 1 / λ
+            // --
+            // Note on validation:
+            // If probabilityOfSuccess == Math.nextDown(1.0) the exponential 
mean is >0 (valid).
+            // If probabilityOfSuccess == Double.MIN_VALUE the exponential 
mean is +Infinity
+            // and the sample will always be Integer.MAX_VALUE (the 
distribution is truncated). It
+            // is noted in the class Javadoc that the use of a small p leads 
to truncation so
+            // no checks are made for this case.
+            final double exponentialMean = 1.0 / 
(-Math.log1p(-probabilityOfSuccess));
+            exponentialSampler = new AhrensDieterExponentialSampler(rng, 
exponentialMean);
+        }
+
+        @Override
+        public int sample() {
+            // Return the floor of the exponential sample
+            return (int) Math.floor(exponentialSampler.sample());
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public String toString() {
+            return "Geometric deviate [" + rng.toString() + "]";
+        }
+    }
+
+    /**
+     * Creates a new geometric distribution sampler. The samples will be 
provided in the set
+     * {@code k=[0, 1, 2, ...]} where {@code k} indicates the number of 
failures before the first
+     * success.
+     *
+     * @param rng Generator of uniformly distributed random numbers
+     * @param probabilityOfSuccess The probability of success
+     * @throws IllegalArgumentException if {@code probabilityOfSuccess} is not 
in the range [0 <
+     * probabilityOfSuccess <= 1]
+     */
+    public GeometricSampler(UniformRandomProvider rng, double 
probabilityOfSuccess) {
+        if (probabilityOfSuccess <= 0 || probabilityOfSuccess > 1) {
+            throw new IllegalArgumentException(
+                    "Probability of success must be in the range [0 < p <= 1]: 
" + probabilityOfSuccess);
+        }
+        delegate = probabilityOfSuccess == 1 ?
+            GeometricP1Sampler.INSTANCE :
+            new GeometricExponentialSampler(rng, probabilityOfSuccess);
+    }
+
+    /**
+     * Create a sample from a geometric distribution.
+     *
+     * <p>The sample will take the values in the set {@code [0, 1, 2, ...]}, 
equivalent to the number of
+     * failures before the first success.
+     */
+    @Override
+    public int sample() {
+        return delegate.sample();
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public String toString() {
+        return delegate.toString();
+    }
+}
diff --git 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java
 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java
index 23122c7..3f9adb8 100644
--- 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java
+++ 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java
@@ -46,9 +46,13 @@ public class DiscreteSamplersList {
 
             // Geometric ("inverse method").
             final double probSuccessGeometric = 0.21;
-            add(LIST, new 
org.apache.commons.math3.distribution.GeometricDistribution(probSuccessGeometric),
+            add(LIST, new 
org.apache.commons.math3.distribution.GeometricDistribution(null, 
probSuccessGeometric),
                 MathArrays.sequence(10, 0, 1),
                 RandomSource.create(RandomSource.ISAAC));
+            // Geometric.
+            add(LIST, new 
org.apache.commons.math3.distribution.GeometricDistribution(probSuccessGeometric),
+                MathArrays.sequence(10, 0, 1),
+                new 
GeometricSampler(RandomSource.create(RandomSource.XOR_SHIFT_1024_S), 
probSuccessGeometric));
 
             // Hypergeometric ("inverse method").
             final int popSizeHyper = 34;
@@ -197,7 +201,7 @@ public class DiscreteSamplersList {
     /**
      * @param dist Distribution.
      * @param points Points.
-     * @return the probabilities of the given points according to the 
distribution. 
+     * @return the probabilities of the given points according to the 
distribution.
      */
     private static double[] 
getProbabilities(org.apache.commons.math3.distribution.IntegerDistribution dist,
                                              int[] points) {
diff --git 
a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/GeometricSamplerTest.java
 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/GeometricSamplerTest.java
new file mode 100644
index 0000000..8c5a39e
--- /dev/null
+++ 
b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/GeometricSamplerTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.distribution;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.simple.RandomSource;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test for the {@link GeometricSampler}. The tests hit edge cases for the 
sampler.
+ */
+public class GeometricSamplerTest {
+    /**
+     * Test the edge case where the probability of success is 1. This is a 
valid geometric distribution
+     * where the sample should always be 0.
+     */
+    @Test
+    public void testProbabilityOfSuccessIsOneGeneratesZeroForSamples() {
+        final UniformRandomProvider rng = 
RandomSource.create(RandomSource.SPLIT_MIX_64);
+        final GeometricSampler sampler = new GeometricSampler(rng, 1);
+        // All samples should be 0
+        for (int i = 0; i < 10; i++) {
+            Assert.assertEquals("p=1 should have 0 for all samples", 0, 
sampler.sample());
+        }
+    }
+
+    /**
+     * Test the edge case where the probability of success is 1 since it uses 
a different
+     * {@link Object#toString()} method to the normal case tested elsewhere.
+     */
+    @Test
+    public void testProbabilityOfSuccessIsOneSamplerToString() {
+        final UniformRandomProvider rng = 
RandomSource.create(RandomSource.SPLIT_MIX_64);
+        final GeometricSampler sampler = new GeometricSampler(rng, 1);
+        Assert.assertTrue("Missing 'Geometric' from toString", 
sampler.toString().contains("Geometric"));
+    }
+
+    /**
+     * Test the edge case where the probability of success is nearly 0. This 
is a valid geometric
+     * distribution but the sample is clipped to max integer value because the 
underlying exponential
+     * has a mean of positive infinity (effectively the sample is from a 
truncated distribution).
+     *
+     * <p>This test can be changed in future if a lower bound limit for the 
probability of success is
+     * introduced.
+     */
+    @Test
+    public void 
testProbabilityOfSuccessIsAlmostZeroGeneratesMaxValueForSamples() {
+        final UniformRandomProvider rng = 
RandomSource.create(RandomSource.SPLIT_MIX_64);
+        final GeometricSampler sampler = new GeometricSampler(rng, 
Double.MIN_VALUE);
+        // All samples should be max value
+        for (int i = 0; i < 10; i++) {
+            Assert.assertEquals("p=(almost 0) should have Integer.MAX_VALUE 
for all samples", Integer.MAX_VALUE,
+                    sampler.sample());
+        }
+    }
+
+    /**
+     * Test probability of success {@code >1} is not allowed.
+     */
+    @SuppressWarnings("unused")
+    @Test(expected = IllegalArgumentException.class)
+    public void testProbabilityOfSuccessAboveOneThrows() {
+        final UniformRandomProvider rng = 
RandomSource.create(RandomSource.SPLIT_MIX_64, Long.valueOf(0));
+        new GeometricSampler(rng, Math.nextUp(1.0));
+    }
+
+    /**
+     * Test probability of success {@code 0} is not allowed.
+     */
+    @SuppressWarnings("unused")
+    @Test(expected = IllegalArgumentException.class)
+    public void testProbabilityOfSuccessIsZeroThrows() {
+        final UniformRandomProvider rng = 
RandomSource.create(RandomSource.SPLIT_MIX_64, Long.valueOf(0));
+        new GeometricSampler(rng, 0);
+    }
+}

Reply via email to