------- Comment #11 from jkherciueh at gmx dot net  2007-10-20 11:07 -------
(In reply to comment #6)
> Actually, would be:
> 
> Index: random
> ===================================================================
> --- random      (revision 129456)
> +++ random      (working copy)
> @@ -1607,7 +1607,8 @@
>          {
>           typedef typename __gnu_cxx::__add_unsigned<typename
>             _UniformRandomNumberGenerator::result_type>::__type __utype;
> -         return result_type(__utype(__urng()) % (__max - __min + 1)) + __min;
> +         return result_type((__max - __min + 1.0L) * __utype(__urng())
> +                            / (__utype(__urng.max()) + 1.0L)) + __min;
>         }
> 
>        template<typename _UniformRandomNumberGenerator>
> 

I just tested this. It does a much better job at obfuscating the bias, which is
still there (after all you are rescaling linearly). If you rescale a domain of
size 16 to a range of size 12, some buckets will get 2 and some will get 1.

The following program demonstrates this:

#include <ctime>
#include <cassert>
#include <iostream>
#include <vector>
#include <tr1/random>

void test ( unsigned int num_buckets, unsigned int seed = 3141592 ) {
  std::cout << "using " << num_buckets << " buckets:\n";
  std::tr1::mt19937 eng(seed);
  // we make sure that the range size is a multiple of
  // the number of buckets:
  const unsigned long target_range = ( eng.max() + 1.0L ) * 0.75;
  const unsigned long bucket_size = target_range / num_buckets;
  const unsigned long range = bucket_size * num_buckets - 1;
  assert( bucket_size * num_buckets == range + 1 );
  std::tr1::uniform_int<unsigned long> rnd( 0, range );
  std::vector<unsigned long> bucket ( num_buckets, 0 );
  for ( unsigned int i = 0; i < 1000000; ++i ) {
    ++bucket[ rnd(eng) % num_buckets ];
    //++bucket[ rnd(eng) / bucket_size ];
  }
  for ( unsigned int i = 0; i < num_buckets; ++i ) {
    std::cout << i << ": " << bucket[i] << '\n';
  }
  std::cout << '\n';
}

int main ( void ) {
  test( 2 );
  test( 3 );
  test( 4 );
  test( 5 );
  test( 6 );
  test( 7 );
}

Output:

0: 500742
1: 499258

using 3 buckets:
0: 499275
1: 250467
2: 250258

using 4 buckets:
0: 250024
1: 249533
2: 250718
3: 249725

using 5 buckets:
0: 199340
1: 200365
2: 199559
3: 200129
4: 200607

using 6 buckets:
0: 249861
1: 124864
2: 125278
3: 249414
4: 125603
5: 124980

using 7 buckets:
0: 142866
1: 142908
2: 142945
3: 142964
4: 142671
5: 142679
6: 142967

For 3 and 6 buckets, there is a 2:1 bias.


For a truly uniform distribution, one needs to discard values every once in a
while. Something like the shrink() method below:


#include <iostream>
#include <cstdlib>

template < typename Engine >
class remap_range {
public:

  typedef unsigned long long  value_type;

private:

  typedef Engine engine_type;

  engine_type  the_engine;
  value_type   the_maximum;


  value_type (remap_range::* op) ( void );


  // shrinking the range:
  // ====================
  value_type   bin_size;
  value_type   top_legal_value;

  void set_shrink_pars ( value_type engine_max, value_type range_max ) {
    bin_size = value_type( ( engine_max + 1.0L ) / ( range_max + 1.0L ) );
    value_type num_bins =
      value_type( ( the_engine.max() + 1.0L ) / bin_size );
    top_legal_value = value_type( 1.0L * num_bins * bin_size - 1 );
  }

  value_type shrink ( void ) {
    value_type random_value = the_engine();
    while ( top_legal_value < random_value ) {
      random_value = the_engine();
    }
    return ( random_value / bin_size );
  }

  // expanding the range:
  // ====================
  value_type expand ( void ) {
    return ( remap_to( the_maximum ) );
  }


  // remapping:
  // ==========
  value_type remap_to ( value_type range_max ) {
    if ( range_max <= the_engine.max() ) {
      set_shrink_pars( the_engine.max(), range_max );
      return ( shrink() );
    }
    value_type random_value = 0;
    do {
      random_value =
        value_type
        (
         ( the_engine.max() + 1.0L ) *
         remap_to( value_type( ( range_max + 1.0 )
                               / ( the_engine.max() + 1.0L ) ) )
         +
         the_engine()
         );
    } while ( random_value > range_max );
    return ( random_value );
  }


public:

  remap_range ( Engine eng, value_type max )
    : the_engine  ( eng )
    , the_maximum ( max )
  {
    if ( the_maximum <= the_engine.max() ) {
      set_shrink_pars( the_engine.max(), the_maximum );
      op = &remap_range::shrink;
    } else {
      op = &remap_range::expand;
    }
  }


  value_type max ( void ) const {
    return ( the_maximum );
  }

  value_type operator() ( void ) {
    return ( (this->*op)() );
  }

};



class crand {
public:

  typedef unsigned int value_type;

  crand ( unsigned int seed = 0 ) {
    std::srand( seed );
  }


  value_type max ( void ) const {
    return ( RAND_MAX );
  }

  value_type operator() ( void ) {
    return ( std::rand() );
  }

};


int main ( void ) {
  crand s;
  remap_range< crand > seven ( s, 7 );
  remap_range< remap_range< crand > > thausand ( seven, 20 );
  for ( unsigned int i = 0; i < 100000; ++i ) {
    std::cout << thausand() << '\n';
  }
}


Best

Kai-Uwe Bux


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33815

Reply via email to