On Wed, May 18, 2022 at 05:08:02PM +1000, Damien Miller wrote:

> On Wed, 18 May 2022, Otto Moerbeek wrote:
> 
> > instrumenting the code to count the number of arc4random calls I see thsi:
> > 
> > openbsd; elapsed = 2.835819; calls = 12340949
> > bitmask; elapsed = 4.335576; calls = 17836216
> > bitmask+reuse; elapsed = 3.710277; calls = 15245337
> > 
> > (this is a different number of trials on a slow machine).
> > 
> > So the promise of less calls is not fulfilled. Sounds like a bug.
> 
> Well, I don't know whether the simple bitmasking approach will really
> result in fewer calls. I *think* our current approach has a higher
> probability per loop of suceeding (at least for small upper_bounds) than
> mask-and-test, but my brain is too addled with a cold to calculate it :/

I *assumed* the speedups seen in the web page were because of less rnd
calls.  That was a mistake.

> What Theo said is 100% right - the cost is dominated by that of the
> underlying RNG. If anyone wants a faster arc4random_uniform() then the
> first place to look it at arc4random().

One more reason to keep the current implementation of arc4random_uniform().

> 
> It's entirely possible that the speedups measured in that article
> are because they are using a omgoptimised (non-crypto) RNG and the
> improvements they saw were due solely to reduction the overhead inside
> the uniformity code even if it came at the cost of more RNG queries.

Right.

> Anyway, here's a tweaked up version of the test harness that fakes out
> arc4random() with a deterministic RNG that counts how often it is called
> in case anyone wants to play with it further.

        -Otto


> 
> ----
> 
> 
> #include <stdlib.h>
> #include <stdio.h>
> 
> #include <stdint.h>
> #include <stdlib.h>
> 
> static uint32_t nqueries;
> 
> /* deterministic, query-counting arc4random() replacement for benchmarking */
> static uint32_t
> fake_random(void)
> {
>       static uint32_t ready, remain, msb;
>       uint32_t ret;
> 
>       if (!ready) {
>               srand_deterministic(31337);
>               ready = 1;
>       }
>       if (remain == 0) {
>               msb = (uint32_t)rand();
>               remain = 31; /* XXX makes assumption re RAND_MAX */
>       }
>       ret = (uint32_t)rand() | (msb << 31);
>       msb >>= 1;
>       remain--;
>       nqueries++;
>       return ret;     
> }
> 
> #define arc4random() fake_random()
> 
> #ifndef __has_builtin
> #define __has_builtin(x) 0
> #endif
> #if __has_builtin(__builtin_clz)
> #define arc4random_clz(x) __builtin_clz(x)
> #else
> #warning lacks __builtin_clz()
> /* Count most-significant zero bits, like __builtin_clz() */
> static int
> arc4random_clz(unsigned int x)
> {
>       int ret = 0;
>       unsigned int n;
>       const int lut[16] = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
> 
>       while (x != 0) {
>               n = (x >> 28) & 0xf;
>               if (n != 0)
>                       return ret + lut[n];
>               x <<= 4;
>               ret += 4;
>       }
>       return 32;
> }
> #endif
> 
> /* Current OpenBSD implementation */
> static uint32_t
> arc4random_uniform1(uint32_t upper_bound)
> {
>       uint32_t r, min;
> 
>       if (upper_bound < 2)
>               return 0;
> 
>       /* 2**32 % x == (2**32 - x) % x */
>       min = -upper_bound % upper_bound;
> 
>       /*
>        * This could theoretically loop forever but each retry has
>        * p > 0.5 (worst case, usually far better) of selecting a
>        * number inside the range we need, so it should rarely need
>        * to re-roll.
>        */
>       for (;;) {
>               r = arc4random();
>               if (r >= min)
>                       break;
>       }
> 
>       return r % upper_bound;
> }
> 
> /*
>  * Like "Bitmask with Rejection" implementation from
>  * https://www.pcg-random.org/posts/bounded-rands.html
>  */
> static uint32_t
> arc4random_uniform2(uint32_t upper_bound)
> {
>       uint32_t mask, r;
> 
>       if (upper_bound < 2)
>               return 0;
> 
>       mask = 0xffffffff >> arc4random_clz((upper_bound - 1) | 1);;
>       for (;;) {
>               r = arc4random() & mask;
>               if (r < upper_bound)
>                       return r;
>       }
>       /* NOTREACHED */
> }
> 
> /*
>  * Like Apple's
>  * 
> https://opensource.apple.com/source/Libc/Libc-1158.50.2/gen/FreeBSD/arc4random.c
>  */
> static uint32_t
> arc4random_uniform3(uint32_t upper_bound)
> {
>       int zeroes, bits, remain;
>       uint32_t mask, r, rm;
> 
>       if (upper_bound < 2)
>               return 0;
> 
>       zeroes = arc4random_clz((upper_bound - 1) | 1);
>       bits = 32 - zeroes;
>       mask = (uint32_t)-1 >> zeroes;
> 
>       for (;;) {
>               r = arc4random();
>               rm = r & mask;
>               if (rm < upper_bound)
>                       return rm;
>               for (remain = zeroes; remain >= bits; remain -= bits) {
>                       r >>= bits;
>                       rm = r & mask;
>                       if (rm < upper_bound)
>                               return rm;
>               }
>       }
>       /* NOTREACHED */
> }
> 
> #include <sys/time.h>
> 
> static uint32_t
> fixture(const char *s, uint32_t (* const rnd)(uint32_t))
> {
>       const uint32_t trials = 20 * 1000 * 1000;
>       const uint32_t max = 0x80000000;
>       const uint32_t mul = 7;
>       unsigned int v, i, r;
>       struct timeval start, finish, delta;
> 
>       nqueries = 0;
>       gettimeofday(&start, NULL);
>       for (v = 3, r = 0; v < max; v *= mul) {
>               /* printf("%u\n", v); */
>               for (i = 0; i < trials; i++)
>                       r |= rnd(v);
>       }
>       gettimeofday(&finish, NULL);
>       timersub(&finish, &start, &delta);
>       printf("%s; elapsed = %lld.%06lld, queries = %u\n", s,
>           (long long)delta.tv_sec, (long long)delta.tv_usec, nqueries);
>       return r;
> }
> 
> int main(void) {
>       fixture("openbsd", arc4random_uniform1);
>       fixture("bitmask", arc4random_uniform2);
>       fixture("bitmask+reuse", arc4random_uniform3);
> }
> 

Reply via email to