[commons-rng] 01/02: Update ziggurat performance benchmark with more methods

aherbert Wed, 25 Aug 2021 15:35:18 -0700

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git


commit e9a67d339fe4032101305086f89085bc1c18486d
Author: Alex Herbert <aherb...@apache.org>
AuthorDate: Wed Aug 25 21:02:41 2021 +0100

    Update ziggurat performance benchmark with more methods
    
    Use only bit shifts for random positive longs.
    
    Use a different method to sample y.
    
    Benchmark methods to interpolate X or Y
---
 .../distribution/ZigguratSamplerPerformance.java   | 271 ++++++++++++++++++++-
 1 file changed, 268 insertions(+), 3 deletions(-)

diff --git 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/distribution/ZigguratSamplerPerformance.java
 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/distribution/ZigguratSamplerPerformance.java
index f681ecc..58e1f41 100644
--- 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/distribution/ZigguratSamplerPerformance.java
+++ 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/sampling/distribution/ZigguratSamplerPerformance.java
@@ -35,6 +35,7 @@ import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.Warmup;
+import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -79,6 +80,8 @@ public class ZigguratSamplerPerformance {
     private static final String MOD_GAUSSIAN_SIMPLE_OVERHANGS = 
"ModGaussianSimpleOverhangs";
     /** The name for the {@link 
ModifiedZigguratNormalizedGaussianSamplerInlining}. */
     private static final String MOD_GAUSSIAN_INLINING = "ModGaussianInlining";
+    /** The name for the {@link 
ModifiedZigguratNormalizedGaussianSamplerInliningShift}. */
+    private static final String MOD_GAUSSIAN_INLINING_SHIFT = 
"ModGaussianInliningShift";
     /** The name for the {@link 
ModifiedZigguratNormalizedGaussianSamplerInliningSimpleOverhangs}. */
     private static final String MOD_GAUSSIAN_INLINING_SIMPLE_OVERHANGS = 
"ModGaussianInliningSimpleOverhangs";
     /** The name for the {@link 
ModifiedZigguratNormalizedGaussianSamplerIntMap}. */
@@ -248,6 +251,87 @@ public class ZigguratSamplerPerformance {
     }
 
     /**
+     * Defines method to use for interpolating the X or Y tables from unsigned 
{@code long} values.
+     */
+    @State(Scope.Benchmark)
+    public static class InterpolationSources {
+        /** The method to obtain the long. */
+        @Param({"U1", "1minusU2", "U_1minusU"})
+        private String method;
+
+        /** The sampler. */
+        private ContinuousSampler sampler;
+
+        /**
+         * @return the sampler.
+         */
+        public ContinuousSampler getSampler() {
+            return sampler;
+        }
+
+        /** Instantiates sampler. */
+        @Setup
+        public void setup() {
+            // Use a fast generator
+            final UniformRandomProvider rng = 
RandomSource.XO_RO_SHI_RO_128_PP.create();
+            // Get an x table. This is length 254.
+            // We will sample from this internally to avoid index 
out-of-bounds issues.
+            final int start = 42;
+            final double[] x = Arrays.copyOfRange(
+                    ModifiedZigguratNormalizedGaussianSampler.getX(), start, 
start + 129);
+            final int mask = 127;
+            if ("U1".equals(method)) {
+                sampler = () -> {
+                    final long u = rng.nextLong() >>> 1;
+                    final int j = 1 + (((int) u) & mask);
+                    // double multiply
+                    // double add
+                    // double subtract
+                    // long convert to double
+                    // double multiply
+                    // int subtract
+                    // three index loads
+                    return x[j] * TWO_POW_63 + (x[j - 1] - x[j]) * u;
+                };
+            } else if ("1minusU2".equals(method)) {
+                sampler = () -> {
+                    final long u = rng.nextLong() >>> 1;
+                    final int j = 1 + (((int) u) & mask);
+                    // Since u is in [0, 2^63) to create (1 - u) using 
Long.MIN_VALUE
+                    // as an unsigned integer of 2^63.
+                    // double multiply
+                    // double add
+                    // double subtract
+                    // long subtract
+                    // long convert to double
+                    // double multiply
+                    // int subtract * 2
+                    // three index loads
+                    return x[j - 1] * TWO_POW_63 + (x[j] - x[j - 1]) * 
(Long.MIN_VALUE - u);
+                };
+            } else if ("U_1minusU".equals(method)) {
+                sampler = () -> {
+                    final long u = rng.nextLong() >>> 1;
+                    final int j = 1 + (((int) u) & mask);
+                    // Interpolation between bounds a and b using:
+                    // a * u + b * (1 - u) == b + u * (a - b)
+                    // long convert to double
+                    // double multiply
+                    // double add
+                    // long subtract
+                    // long convert to double
+                    // double multiply
+                    // int subtract
+                    // two index loads
+                    return x[j - 1] * u + x[j] * (Long.MIN_VALUE - u);
+                };
+            } else {
+                throwIllegalStateException(method);
+            }
+        }
+    }
+
+    /**
      * The samplers to use for testing the ziggurat method.
      */
     @State(Scope.Benchmark)
@@ -260,7 +344,8 @@ public class ZigguratSamplerPerformance {
                 // Experimental Marsaglia exponential ziggurat sampler
                 EXPONENTIAL,
                 // Experimental McFarland Gaussian ziggurat samplers
-                MOD_GAUSSIAN2, MOD_GAUSSIAN_SIMPLE_OVERHANGS, 
MOD_GAUSSIAN_INLINING,
+                MOD_GAUSSIAN2, MOD_GAUSSIAN_SIMPLE_OVERHANGS,
+                MOD_GAUSSIAN_INLINING, MOD_GAUSSIAN_INLINING_SHIFT,
                 MOD_GAUSSIAN_INLINING_SIMPLE_OVERHANGS, MOD_GAUSSIAN_INT_MAP, 
MOD_GAUSSIAN_512,
                 // Experimental McFarland Gaussian ziggurat samplers
                 MOD_EXPONENTIAL2, MOD_EXPONENTIAL_SIMPLE_OVERHANGS, 
MOD_EXPONENTIAL_INLINING,
@@ -292,6 +377,8 @@ public class ZigguratSamplerPerformance {
                 return new 
ModifiedZigguratNormalizedGaussianSamplerSimpleOverhangs(rng);
             } else if (MOD_GAUSSIAN_INLINING.equals(type)) {
                 return new 
ModifiedZigguratNormalizedGaussianSamplerInlining(rng);
+            } else if (MOD_GAUSSIAN_INLINING_SHIFT.equals(type)) {
+                return new 
ModifiedZigguratNormalizedGaussianSamplerInliningShift(rng);
             } else if (MOD_GAUSSIAN_INLINING_SIMPLE_OVERHANGS.equals(type)) {
                 return new 
ModifiedZigguratNormalizedGaussianSamplerInliningSimpleOverhangs(rng);
             } else if (MOD_GAUSSIAN_INT_MAP.equals(type)) {
@@ -1101,6 +1188,17 @@ public class ZigguratSamplerPerformance {
             exponential = new ModifiedZigguratExponentialSampler(rng);
         }
 
+        /**
+         * Provide access to the precomputed ziggurat lengths.
+         *
+         * <p>This is package-private to allow usage in the interpolation test.
+         *
+         * @return x
+         */
+        static double[] getX() {
+            return X;
+        }
+
         /** {@inheritDoc} */
         @Override
         public double sample() {
@@ -1478,6 +1576,160 @@ public class ZigguratSamplerPerformance {
      *
      * <p>Uses the algorithm from McFarland, C.D. (2016).
      *
+     * <p>This implementation separates sampling of the main ziggurat and 
sampling from the edge
+     * into different methods. This allows inlining of the main sample method.
+     *
+     * <p>Positive longs are created using bit shifts (not masking). The y 
coordinate is
+     * generated with u2 not (1-u2) which avoids a subtraction.
+     */
+    static class ModifiedZigguratNormalizedGaussianSamplerInliningShift
+        extends ModifiedZigguratNormalizedGaussianSampler {
+
+        /**
+         * @param rng Generator of uniformly distributed random numbers.
+         */
+        
ModifiedZigguratNormalizedGaussianSamplerInliningShift(UniformRandomProvider 
rng) {
+            super(rng);
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public double sample() {
+            // Ideally this method byte code size should be below 
-XX:MaxInlineSize
+            // (which defaults to 35 bytes). This compiles to 33 bytes.
+
+            final long xx = nextLong();
+            // Float multiplication squashes these last 8 bits, so they can be 
used to sample i
+            final int i = ((int) xx) & 0xff;
+
+            if (i < I_MAX) {
+                // Early exit.
+                // Branch frequency: 0.988283
+                return X[i] * xx;
+            }
+
+            return edgeSample(xx);
+        }
+
+        /**
+         * Create the sample from the edge of the ziggurat.
+         *
+         * <p>This method has been extracted to fit the main sample method 
within 35 bytes (the
+         * default size for a JVM to inline a method).
+         *
+         * @param xx Initial random deviate
+         * @return a sample
+         */
+        private double edgeSample(long xx) {
+            // Recycle bits.
+            // Remove sign bit to create u1.
+            long u1 = (xx << 1) >>> 1;
+            // Extract the sign bit:
+            // Use 2 - 1 or 0 - 1
+            final double signBit = ((xx >>> 62) & 0x2) - 1.0;
+            final int j = normSampleA();
+            // Four kinds of overhangs:
+            //  j = 0                :  Sample from tail
+            //  0 < j < J_INFLECTION :  Overhang is concave; only sample from 
Lower-Left triangle
+            //  j = J_INFLECTION     :  Must sample from entire overhang 
rectangle
+            //  j > J_INFLECTION     :  Overhangs are convex; implicitly 
accept point in Lower-Left triangle
+            //
+            // Conditional statements are arranged such that the more likely 
outcomes are first.
+            double x;
+            if (j > J_INFLECTION) {
+                // Convex overhang
+                // Branch frequency: 0.00892897
+                // Loop repeat frequency: 0.389804
+                for (;;) {
+                    x = interpolateSample(X, j, u1);
+                    final long uDiff = (nextLong() >>> 1) - u1;
+                    if (uDiff >= 0) {
+                        // Lower-left triangle
+                        break;
+                    }
+                    if (uDiff >= MAX_IE &&
+                        // Within maximum distance of f(x) from the triangle 
hypotenuse.
+                        // Frequency (per upper-right triangle): 0.431497
+                        // Reject frequency: 0.489630
+                        // u2 = (u1 + uDiff)
+                        interpolateSample(Y, j, u1 + uDiff) < Math.exp(-0.5 * 
x * x)) {
+                        break;
+                    }
+                    // uDiff < MAX_IE (upper-right triangle) or rejected as 
above the curve
+                    u1 = randomInt63();
+                }
+            } else if (j < J_INFLECTION) {
+                if (j == 0) {
+                    // Tail
+                    // Branch frequency: 0.000276358
+                    // Note: Although less frequent than the next branch, j == 
0 is a subset of
+                    // j < J_INFLECTION and must be first.
+                    // Loop repeat frequency: 0.0634786
+                    do {
+                        x = ONE_OVER_X_0 * exponential.sample();
+                    } while (exponential.sample() < 0.5 * x * x);
+                    x += X_0;
+                } else {
+                    // Concave overhang
+                    // Branch frequency: 0.00249563
+                    // Loop repeat frequency: 0.0123784
+                    for (;;) {
+                        // U_x <- min(U_1, U_2)
+                        // distance <- | U_1 - U_2 |
+                        // U_y <- 1 - (U_x + distance)
+                        long uDiff = (nextLong() >>> 1) - u1;
+                        if (uDiff < 0) {
+                            uDiff = -uDiff;
+                            u1 -= uDiff;
+                        }
+                        x = interpolateSample(X, j, u1);
+                        if (uDiff > MIN_IE ||
+                            interpolateSample(Y, j, u1 + uDiff) < 
Math.exp(-0.5 * x * x)) {
+                            break;
+                        }
+                        u1 = nextLong() >>> 1;
+                    }
+                }
+            } else {
+                // Inflection point
+                // Branch frequency: 0.0000159359
+                // Loop repeat frequency: 0.500213
+                for (;;) {
+                    x = interpolateSample(X, j, u1);
+                    if (interpolateSample(Y, j, nextLong() >>> 1) < 
Math.exp(-0.5 * x * x)) {
+                        break;
+                    }
+                    u1 = nextLong() >>> 1;
+                }
+            }
+            return signBit * x;
+        }
+
+        /**
+         * Interpolate x from the uniform deviate.
+         * <pre>
+         *  value = x[j] + u * (x[j-1] - x[j])
+         * </pre>
+         * <p>If x is the precomputed lengths of the ziggurat (X) then x[j-1] 
is larger and this adds
+         * a delta to x[j].
+         * <p>If x is the precomputed pdf for the lengths of the ziggurat (Y) 
then x[j-1] is smaller
+         * larger and this subtracts a delta from x[j].
+         *
+         * @param x x
+         * @param j j
+         * @param u uniform deviate
+         * @return the sample
+         */
+        private static double interpolateSample(double[] x, int j, long u) {
+            return x[j] * TWO_POW_63 + (x[j - 1] - x[j]) * u;
+        }
+    }
+
+    /**
+     * Modified Ziggurat method for sampling from a Gaussian distribution with 
mean 0 and standard deviation 1.
+     *
+     * <p>Uses the algorithm from McFarland, C.D. (2016).
+     *
      * <p>This implementation extracts the separates the sample method for the 
main ziggurat
      * from the edge sampling. This allows inlining of the main sample method.
      *
@@ -2899,12 +3151,12 @@ public class ZigguratSamplerPerformance {
         protected double expOverhang(int j, long xx) {
             // Recycle the initial random deviate.
             // Shift right to make an unsigned long.
-            for (long ux = xx >>> 1;; ux = randomInt63()) {
+            for (long ux = xx >>> 1;; ux = nextLong() >>> 1) {
                 // To sample a unit right-triangle:
                 // U_x <- min(U_1, U_2)
                 // distance <- | U_1 - U_2 |
                 // U_y <- 1 - (U_x + distance)
-                long uDistance = randomInt63() - ux;
+                long uDistance = (nextLong() >>> 1) - ux;
                 if (uDistance < 0) {
                     uDistance = -uDistance;
                     ux -= uDistance;
@@ -3668,6 +3920,19 @@ public class ZigguratSamplerPerformance {
     }
 
     /**
+     * Benchmark methods for obtaining an interpolated value from an unsigned 
long.
+     *
+     * <p>Note: To be disabled. The speed is: U1, 1minusU2, U_1minusU.
+     *
+     * @param sources Source of randomness.
+     * @return the sample value
+     */
+    @Benchmark
+    public double interpolate(InterpolationSources sources) {
+        return sources.getSampler().sample();
+    }
+
+    /**
      * Run the sampler.
      *
      * @param sources Source of randomness.

[commons-rng] 01/02: Update ziggurat performance benchmark with more methods

Reply via email to