This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git
The following commit(s) were added to refs/heads/master by this push:
new e0259b83 RNG-191: Benchmark LXM generators with a 128-bit LCG
e0259b83 is described below
commit e0259b836a0fac0d0269f3fd033dd553e936cf03
Author: Alex Herbert <[email protected]>
AuthorDate: Mon Mar 2 12:09:06 2026 +0000
RNG-191: Benchmark LXM generators with a 128-bit LCG
Test use of Math multiply high methods to compute the upper 64-bits of
the 128-bit result of the 64-bit multiply.
---
.../rng/examples/jmh/core/LXMBenchmark.java | 31 +-
.../jmh/core/LXMGenerationPerformance.java | 595 +++++++++++++++++++++
2 files changed, 623 insertions(+), 3 deletions(-)
diff --git
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
index 14637e6e..1e9810c5 100644
---
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
+++
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
@@ -99,6 +99,11 @@ public class LXMBenchmark {
* java.lang.Math.unsignedMultiplyHigh if Java 18, otherwise
* a default implementation. */
private static final LongBinaryOperator MATH_UNSIGNED_MULTIPLY_HIGH;
+ /** Method handle to unsigned multiply using
+ * java.lang.Math.unsignedMultiplyHigh if Java 18,
+ * java.lang.Math.multiplyHigh if Java 9, otherwise
+ * a default implementation. */
+ private static final LongBinaryOperator DYNAMIC_MULTIPLY_HIGH;
static {
final MethodHandles.Lookup lookup = MethodHandles.lookup();
@@ -131,10 +136,12 @@ public class LXMBenchmark {
}
};
} catch (NoSuchMethodException | IllegalAccessException ignored) {
- op2 = UnsignedMultiplyHighSource::unsignedMultiplyHigh;
+ op2 = null;
}
// CHECKSTYLE: resume IllegalCatch
- MATH_UNSIGNED_MULTIPLY_HIGH = op2;
+ MATH_UNSIGNED_MULTIPLY_HIGH = op2 == null ?
UnsignedMultiplyHighSource::unsignedMultiplyHigh : op2;
+ // Bind to the highest available method
+ DYNAMIC_MULTIPLY_HIGH = op2 == null ? op1 : op2;
}
/**
@@ -148,7 +155,7 @@ public class LXMBenchmark {
UNSIGNED_MULTIPLY_HIGH, "unsignedMultiplyHighWithML",
"unsignedMultiplyHighML",
"unsignedMultiplyHighPlusMultiplyLow",
"unsignedMultiplyHighAndLow",
- "mhMultiplyHigh", "mhUnsignedMultiplyHigh",
+ "mhMultiplyHigh", "mhUnsignedMultiplyHigh",
"mhDynamicMultiplyHigh",
})
private String method;
@@ -251,6 +258,8 @@ public class LXMBenchmark {
gen = () -> mhMultiplyHigh(ga.getAsLong(), gb.getAsLong());
} else if ("mhUnsignedMultiplyHigh".equals(method)) {
gen = () -> mhUnsignedMultiplyHigh(ga.getAsLong(),
gb.getAsLong());
+ } else if ("mhDynamicMultiplyHigh".equals(method)) {
+ gen = () -> mhDynamicMultiplyHigh(ga.getAsLong(),
gb.getAsLong());
} else {
throw new IllegalStateException("Unknown method: " + method);
}
@@ -448,6 +457,22 @@ public class LXMBenchmark {
static long mhUnsignedMultiplyHigh(long a, long b) {
return MATH_UNSIGNED_MULTIPLY_HIGH.applyAsLong(a, b);
}
+
+ /**
+ * Compute the unsigned multiply of two values using in descending
order:
+ * <ul>
+ * <li>Method handle to Math.unsignedMultiplyHigh (JDK 18);
+ * <li>Method handle to Math.multiplyHigh (JDK 9); or
+ * <li>Default software implementation.
+ * </ul>
+ *
+ * @param a First value
+ * @param b Second value
+ * @return the upper 64-bits of the 128-bit result
+ */
+ static long mhDynamicMultiplyHigh(long a, long b) {
+ return DYNAMIC_MULTIPLY_HIGH.applyAsLong(a, b);
+ }
}
/**
diff --git
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMGenerationPerformance.java
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMGenerationPerformance.java
new file mode 100644
index 00000000..65a474aa
--- /dev/null
+++
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMGenerationPerformance.java
@@ -0,0 +1,595 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.examples.jmh.core;
+
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.commons.rng.UniformRandomProvider;
+import
org.apache.commons.rng.examples.jmh.core.LXMBenchmark.UnsignedMultiplyHighSource;
+import org.apache.commons.rng.simple.RandomSource;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+
+/**
+ * Executes benchmark to compare the speed of generation of random numbers
from the
+ * various 128-bit LXM providers. This targets use of different methods to
compute the
+ * 128-bit multiplication from two 64-bit numbers.
+ */
+public class LXMGenerationPerformance extends AbstractBenchmark {
+ /** The long value. Must NOT be final to prevent JVM optimisation! */
+ private long longValue;
+
+ /**
+ * The benchmark state (retrieve the various "RandomSource"s).
+ */
+ @State(Scope.Benchmark)
+ public static class Sources {
+ /**
+ * RNG providers.
+ */
+ @Param({"L128_X128_MIX_ORIGINAL",
+ "L128_X256_MIX_ORIGINAL",
+ "L128_X1024_MIX_ORIGINAL",
+ "L128_X128_MIX_MH",
+ "L128_X128_MIX_MH",
+ "L128_X1024_MIX_MH",
+ "L128_X128_MIX",
+ "L128_X256_MIX",
+ "L128_X1024_MIX"})
+ private String randomSourceName;
+
+ /** RNG. */
+ private UniformRandomProvider provider;
+
+ /**
+ * Gets the generator.
+ *
+ * @return the RNG.
+ */
+ public UniformRandomProvider getGenerator() {
+ return provider;
+ }
+
+ /** Instantiates generator. This need only be done once per set of
iterations. */
+ @Setup(Level.Trial)
+ public void setup() {
+ if ("L128_X128_MIX_ORIGINAL".equals(randomSourceName)) {
+ provider = new L128X128MixOriginal(longSeed(6));
+ } else if ("L128_X256_MIX_ORIGINAL".equals(randomSourceName)) {
+ provider = new L128X256MixOriginal(longSeed(8));
+ } else if ("L128_X1024_MIX_ORIGINAL".equals(randomSourceName)) {
+ provider = new L128X1024MixOriginal(longSeed(20));
+ } else if ("L128_X128_MIX_MH".equals(randomSourceName)) {
+ provider = new L128X128MixMH(longSeed(6));
+ } else if ("L128_X256_MIX_MH".equals(randomSourceName)) {
+ provider = new L128X256MixMH(longSeed(8));
+ } else if ("L128_X1024_MIX_MH".equals(randomSourceName)) {
+ provider = new L128X1024MixMH(longSeed(20));
+ } else {
+ final RandomSource randomSource =
RandomSource.valueOf(randomSourceName);
+ provider = randomSource.create();
+ }
+ }
+
+ /**
+ * @param size Seed size.
+ * @return the long[] seed
+ */
+ private static long[] longSeed(int size) {
+ return ThreadLocalRandom.current().longs(size).toArray();
+ }
+ }
+
+ /**
+ * Perform a 64-bit mixing function using Doug Lea's 64-bit mix constants
and shifts.
+ *
+ * <p>This is based on the original 64-bit mix function of Austin Appleby's
+ * MurmurHash3 modified to use a single mix constant and 32-bit shifts,
which may have
+ * a performance advantage on some processors. The code is provided in
Steele and
+ * Vigna's paper.
+ *
+ * @param x the input value
+ * @return the output value
+ */
+ static long lea64(long x) {
+ x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
+ x = (x ^ (x >>> 32)) * 0xdaba0b6eb09322e3L;
+ return x ^ (x >>> 32);
+ }
+
+ /**
+ * Add the two values as if unsigned 64-bit longs to produce the high
64-bits
+ * of the 128-bit unsigned result.
+ *
+ * <h2>Warning</h2>
+ *
+ * <p>This method is computing a carry bit for a 128-bit linear
congruential
+ * generator (LCG). The method is <em>not</em> applicable to all arguments.
+ * Some computations can be dropped if the {@code right} argument is
assumed to
+ * be the LCG addition, which should be odd to ensure a full period LCG.
+ *
+ * @param left the left argument
+ * @param right the right argument (assumed to have the lowest bit set to
1)
+ * @return the carry (either 0 or 1)
+ */
+ static long unsignedAddHigh(long left, long right) {
+ // Method compiles to 13 bytes as Java byte code.
+ // This is below the default of 35 for code inlining.
+ //
+ // The unsigned add of left + right may have a 65-bit result.
+ // If both values are shifted right by 1 then the sum will be
+ // within a 64-bit long. The right is assumed to have a low
+ // bit of 1 which has been lost in the shift. The method must
+ // compute if a 1 was shifted off the left which would have
+ // triggered a carry when adding to the right's assumed 1.
+ // The intermediate 64-bit result is shifted
+ // 63 bits to obtain the most significant bit of the 65-bit result.
+ // Using -1 is the same as a shift of (64 - 1) as only the last 6 bits
+ // are used by the shift but requires 1 less byte in java byte code.
+ //
+ // 01100001 left
+ // + 10011111 right always has low bit set to 1
+ //
+ // 0110000 1 carry last bit of left
+ // + 1001111 |
+ // + 1 <-+
+ // = 10000000 carry bit generated
+ return ((left >>> 1) + (right >>> 1) + (left & 1)) >>> -1;
+ }
+
+ /**
+ * Class adapted from the original implementation of LXM generators.
+ */
+ abstract static class AbstractL128 implements UniformRandomProvider {
+ /** Low half of 128-bit LCG multiplier. The upper half is {@code 1L}.
*/
+ static final long ML = 0xd605bbb58c8abbfdL;
+
+ /** High half of the 128-bit per-instance LCG additive parameter.
+ * Cannot be final to support RestorableUniformRandomProvider. */
+ protected long lah;
+ /** Low half of the 128-bit per-instance LCG additive parameter (must
be odd).
+ * Cannot be final to support RestorableUniformRandomProvider. */
+ protected long lal;
+ /** High half of the 128-bit state of the LCG generator. */
+ protected long lsh;
+ /** Low half of the 128-bit state of the LCG generator. */
+ protected long lsl;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ AbstractL128(long[] seed) {
+ lah = seed[0];
+ // Additive parameter must be odd
+ lal = seed[1] | 1;
+ lsh = seed[2];
+ lsl = seed[3];
+ }
+ }
+
+ /**
+ * Adapted from the original implementation of 128-bit LCG and 128-bit
Xor-based generator.
+ */
+ static class L128X128MixOriginal extends AbstractL128 {
+ /** State 0 of the XBG. */
+ private long x0;
+ /** State 1 of the XBG. */
+ private long x1;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ L128X128MixOriginal(long[] seed) {
+ super(seed);
+ x0 = seed[4];
+ x1 = seed[5];
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public long nextLong() {
+ // LXM generate.
+ // Old state is used for the output allowing parallel pipelining
+ // on processors that support multiple concurrent instructions.
+
+ final long s0 = x0;
+ final long sh = lsh;
+
+ // Mix
+ final long z = lea64(sh + s0);
+
+ // LCG update
+ // The LCG is, in effect, "s = m * s + a" where m = ((1LL << 64) +
ML)
+ final long sl = lsl;
+ final long al = lal;
+ final long u = ML * sl;
+ // High half
+ lsh = ML * sh +
UnsignedMultiplyHighSource.unsignedMultiplyHigh(ML, sl) + sl + lah +
+ // Carry propagation
+ unsignedAddHigh(u, al);
+ // Low half
+ lsl = u + al;
+
+ // XBG update
+ long s1 = x1;
+
+ s1 ^= s0;
+ x0 = Long.rotateLeft(s0, 24) ^ s1 ^ (s1 << 16); // a, b
+ x1 = Long.rotateLeft(s1, 37); // c
+
+ return z;
+ }
+ }
+
+ /**
+ * Adapted from the original implementation of 128-bit LCG and 256-bit
Xor-based generator.
+ */
+ static class L128X256MixOriginal extends AbstractL128 {
+ /** State 0 of the XBG. */
+ private long x0;
+ /** State 1 of the XBG. */
+ private long x1;
+ /** State 2 of the XBG. */
+ private long x2;
+ /** State 3 of the XBG. */
+ private long x3;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ L128X256MixOriginal(long[] seed) {
+ super(seed);
+ x0 = seed[4];
+ x1 = seed[5];
+ x2 = seed[6];
+ x3 = seed[7];
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public long nextLong() {
+ // LXM generate.
+ // Old state is used for the output allowing parallel pipelining
+ // on processors that support multiple concurrent instructions.
+
+ long s0 = x0;
+ final long sh = lsh;
+
+ // Mix
+ final long z = lea64(sh + s0);
+
+ // LCG update
+ // The LCG is, in effect, "s = m * s + a" where m = ((1LL << 64) +
ML)
+ final long sl = lsl;
+ final long al = lal;
+ final long u = ML * sl;
+ // High half
+ lsh = ML * sh +
UnsignedMultiplyHighSource.unsignedMultiplyHigh(ML, sl) + sl + lah +
+ // Carry propagation
+ unsignedAddHigh(u, al);
+ // Low half
+ lsl = u + al;
+
+ // XBG update
+ long s1 = x1;
+ long s2 = x2;
+ long s3 = x3;
+
+ final long t = s1 << 17;
+
+ s2 ^= s0;
+ s3 ^= s1;
+ s1 ^= s2;
+ s0 ^= s3;
+
+ s2 ^= t;
+
+ s3 = Long.rotateLeft(s3, 45);
+
+ x0 = s0;
+ x1 = s1;
+ x2 = s2;
+ x3 = s3;
+
+ return z;
+ }
+ }
+
+ /**
+ * Adapted from the original implementation of 128-bit LCG and 1024-bit
Xor-based generator.
+ */
+ static class L128X1024MixOriginal extends AbstractL128 {
+ /** State of the XBG. */
+ private final long[] x = new long[20];
+ /** Index in "state" array. */
+ private int index;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ L128X1024MixOriginal(long[] seed) {
+ super(seed);
+ System.arraycopy(seed, 4, x, 0, 16);
+ // Initialising to 15 ensures that (index + 1) % 16 == 0 and the
+ // first state picked from the XBG generator is state[0].
+ index = 15;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public long nextLong() {
+ // LXM generate.
+ // Old state is used for the output allowing parallel pipelining
+ // on processors that support multiple concurrent instructions.
+
+ final int q = index;
+ index = (q + 1) & 15;
+ final long s0 = x[index];
+ long s15 = x[q];
+ final long sh = lsh;
+
+ // Mix
+ final long z = lea64(sh + s0);
+
+ // LCG update
+ // The LCG is, in effect, "s = m * s + a" where m = ((1LL << 64) +
ML)
+ final long sl = lsl;
+ final long al = lal;
+ final long u = ML * sl;
+ // High half
+ lsh = ML * sh +
UnsignedMultiplyHighSource.unsignedMultiplyHigh(ML, sl) + sl + lah +
+ // Carry propagation
+ unsignedAddHigh(u, al);
+ // Low half
+ lsl = u + al;
+
+ // XBG update
+ s15 ^= s0;
+ x[q] = Long.rotateLeft(s0, 25) ^ s15 ^ (s15 << 27);
+ x[index] = Long.rotateLeft(s15, 36);
+
+ return z;
+ }
+ }
+
+ /**
+ * Adapted from the original implementation of 128-bit LCG and 128-bit
Xor-based generator
+ * to use a dynamic call to the best available multiply high method.
+ */
+ static class L128X128MixMH extends AbstractL128 {
+ /** State 0 of the XBG. */
+ private long x0;
+ /** State 1 of the XBG. */
+ private long x1;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ L128X128MixMH(long[] seed) {
+ super(seed);
+ x0 = seed[4];
+ x1 = seed[5];
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public long nextLong() {
+ // LXM generate.
+ // Old state is used for the output allowing parallel pipelining
+ // on processors that support multiple concurrent instructions.
+
+ final long s0 = x0;
+ final long sh = lsh;
+
+ // Mix
+ final long z = lea64(sh + s0);
+
+ // LCG update
+ // The LCG is, in effect, "s = m * s + a" where m = ((1LL << 64) +
ML)
+ final long sl = lsl;
+ final long al = lal;
+ final long u = ML * sl;
+ // High half
+ lsh = ML * sh +
UnsignedMultiplyHighSource.mhDynamicMultiplyHigh(ML, sl) + sl + lah +
+ // Carry propagation
+ unsignedAddHigh(u, al);
+ // Low half
+ lsl = u + al;
+
+ // XBG update
+ long s1 = x1;
+
+ s1 ^= s0;
+ x0 = Long.rotateLeft(s0, 24) ^ s1 ^ (s1 << 16); // a, b
+ x1 = Long.rotateLeft(s1, 37); // c
+
+ return z;
+ }
+ }
+
+ /**
+ * Adapted from the original implementation of 128-bit LCG and 256-bit
Xor-based generator
+ * to use a dynamic call to the best available multiply high method.
+ */
+ static class L128X256MixMH extends AbstractL128 {
+ /** State 0 of the XBG. */
+ private long x0;
+ /** State 1 of the XBG. */
+ private long x1;
+ /** State 2 of the XBG. */
+ private long x2;
+ /** State 3 of the XBG. */
+ private long x3;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ L128X256MixMH(long[] seed) {
+ super(seed);
+ x0 = seed[4];
+ x1 = seed[5];
+ x2 = seed[6];
+ x3 = seed[7];
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public long nextLong() {
+ // LXM generate.
+ // Old state is used for the output allowing parallel pipelining
+ // on processors that support multiple concurrent instructions.
+
+ long s0 = x0;
+ final long sh = lsh;
+
+ // Mix
+ final long z = lea64(sh + s0);
+
+ // LCG update
+ // The LCG is, in effect, "s = m * s + a" where m = ((1LL << 64) +
ML)
+ final long sl = lsl;
+ final long al = lal;
+ final long u = ML * sl;
+ // High half
+ lsh = ML * sh +
UnsignedMultiplyHighSource.mhDynamicMultiplyHigh(ML, sl) + sl + lah +
+ // Carry propagation
+ unsignedAddHigh(u, al);
+ // Low half
+ lsl = u + al;
+
+ // XBG update
+ long s1 = x1;
+ long s2 = x2;
+ long s3 = x3;
+
+ final long t = s1 << 17;
+
+ s2 ^= s0;
+ s3 ^= s1;
+ s1 ^= s2;
+ s0 ^= s3;
+
+ s2 ^= t;
+
+ s3 = Long.rotateLeft(s3, 45);
+
+ x0 = s0;
+ x1 = s1;
+ x2 = s2;
+ x3 = s3;
+
+ return z;
+ }
+ }
+
+ /**
+ * Adapted from the original implementation of 128-bit LCG and 1024-bit
Xor-based generator
+ * to use a dynamic call to the best available multiply high method.
+ */
+ static class L128X1024MixMH extends AbstractL128 {
+ /** State of the XBG. */
+ private final long[] x = new long[20];
+ /** Index in "state" array. */
+ private int index;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param seed Initial seed.
+ */
+ L128X1024MixMH(long[] seed) {
+ super(seed);
+ System.arraycopy(seed, 4, x, 0, 16);
+ // Initialising to 15 ensures that (index + 1) % 16 == 0 and the
+ // first state picked from the XBG generator is state[0].
+ index = 15;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public long nextLong() {
+ // LXM generate.
+ // Old state is used for the output allowing parallel pipelining
+ // on processors that support multiple concurrent instructions.
+
+ final int q = index;
+ index = (q + 1) & 15;
+ final long s0 = x[index];
+ long s15 = x[q];
+ final long sh = lsh;
+
+ // Mix
+ final long z = lea64(sh + s0);
+
+ // LCG update
+ // The LCG is, in effect, "s = m * s + a" where m = ((1LL << 64) +
ML)
+ final long sl = lsl;
+ final long al = lal;
+ final long u = ML * sl;
+ // High half
+ lsh = ML * sh +
UnsignedMultiplyHighSource.mhDynamicMultiplyHigh(ML, sl) + sl + lah +
+ // Carry propagation
+ unsignedAddHigh(u, al);
+ // Low half
+ lsl = u + al;
+
+ // XBG update
+ s15 ^= s0;
+ x[q] = Long.rotateLeft(s0, 25) ^ s15 ^ (s15 << 27);
+ x[index] = Long.rotateLeft(s15, 36);
+
+ return z;
+ }
+ }
+
+ /**
+ * Baseline for a JMH method call returning a {@code long}.
+ *
+ * @return the value
+ */
+ @Benchmark
+ public long baselineLong() {
+ return longValue;
+ }
+
+ /**
+ * Exercise the {@link UniformRandomProvider#nextLong()} method.
+ *
+ * @param sources Source of randomness.
+ * @return the long
+ */
+ @Benchmark
+ public long nextLong(Sources sources) {
+ return sources.getGenerator().nextLong();
+ }
+}