Repository: spark Updated Branches: refs/heads/master 71f69d66c -> 5a3ad107c
SPARK-1129: use a predefined seed when seed is zero in XORShiftRandom If the seed is zero, XORShift generates all zeros, which would create unexpected result. JIRA: https://spark-project.atlassian.net/browse/SPARK-1129 Author: Xiangrui Meng <[email protected]> Closes #645 from mengxr/xor and squashes the following commits: 1b086ab [Xiangrui Meng] use MurmurHash3 to set seed in XORShiftRandom 45c6f16 [Xiangrui Meng] minor style change 51f4050 [Xiangrui Meng] use a predefined seed when seed is zero in XORShiftRandom Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a3ad107 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a3ad107 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a3ad107 Branch: refs/heads/master Commit: 5a3ad107c01dbc09cd144376ca808125ee821093 Parents: 71f69d6 Author: Xiangrui Meng <[email protected]> Authored: Wed Feb 26 23:22:30 2014 -0800 Committer: Reynold Xin <[email protected]> Committed: Wed Feb 26 23:22:30 2014 -0800 ---------------------------------------------------------------------- .../apache/spark/util/random/XORShiftRandom.scala | 15 ++++++++++++--- .../spark/util/random/XORShiftRandomSuite.scala | 4 ++++ 2 files changed, 16 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5a3ad107/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala index ca611b6..8a4cdea 100644 --- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala +++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala @@ -17,8 +17,11 @@ package org.apache.spark.util.random +import java.nio.ByteBuffer import java.util.{Random => JavaRandom} +import scala.util.hashing.MurmurHash3 + import org.apache.spark.util.Utils.timeIt /** @@ -36,8 +39,8 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) { def this() = this(System.nanoTime) - private var seed = init - + private var seed = XORShiftRandom.hashSeed(init) + // we need to just override next - this will be called by nextInt, nextDouble, // nextGaussian, nextLong, etc. override protected def next(bits: Int): Int = { @@ -49,13 +52,19 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) { } override def setSeed(s: Long) { - seed = s + seed = XORShiftRandom.hashSeed(s) } } /** Contains benchmark method and main method to run benchmark of the RNG */ private[spark] object XORShiftRandom { + /** Hash seeds to have 0/1 bits throughout. */ + private def hashSeed(seed: Long): Long = { + val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array() + MurmurHash3.bytesHash(bytes) + } + /** * Main method for running benchmark * @param args takes one argument - the number of random numbers to generate http://git-wip-us.apache.org/repos/asf/spark/blob/5a3ad107/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala index c51d12b..757476e 100644 --- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala @@ -72,4 +72,8 @@ class XORShiftRandomSuite extends FunSuite with ShouldMatchers { } + test ("XORShift with zero seed") { + val random = new XORShiftRandom(0L) + assert(random.nextInt() != 0) + } }
