------- Comment #11 from jkherciueh at gmx dot net 2007-10-20 11:07 ------- (In reply to comment #6) > Actually, would be: > > Index: random > =================================================================== > --- random (revision 129456) > +++ random (working copy) > @@ -1607,7 +1607,8 @@ > { > typedef typename __gnu_cxx::__add_unsigned<typename > _UniformRandomNumberGenerator::result_type>::__type __utype; > - return result_type(__utype(__urng()) % (__max - __min + 1)) + __min; > + return result_type((__max - __min + 1.0L) * __utype(__urng()) > + / (__utype(__urng.max()) + 1.0L)) + __min; > } > > template<typename _UniformRandomNumberGenerator> >
I just tested this. It does a much better job at obfuscating the bias, which is still there (after all you are rescaling linearly). If you rescale a domain of size 16 to a range of size 12, some buckets will get 2 and some will get 1. The following program demonstrates this: #include <ctime> #include <cassert> #include <iostream> #include <vector> #include <tr1/random> void test ( unsigned int num_buckets, unsigned int seed = 3141592 ) { std::cout << "using " << num_buckets << " buckets:\n"; std::tr1::mt19937 eng(seed); // we make sure that the range size is a multiple of // the number of buckets: const unsigned long target_range = ( eng.max() + 1.0L ) * 0.75; const unsigned long bucket_size = target_range / num_buckets; const unsigned long range = bucket_size * num_buckets - 1; assert( bucket_size * num_buckets == range + 1 ); std::tr1::uniform_int<unsigned long> rnd( 0, range ); std::vector<unsigned long> bucket ( num_buckets, 0 ); for ( unsigned int i = 0; i < 1000000; ++i ) { ++bucket[ rnd(eng) % num_buckets ]; //++bucket[ rnd(eng) / bucket_size ]; } for ( unsigned int i = 0; i < num_buckets; ++i ) { std::cout << i << ": " << bucket[i] << '\n'; } std::cout << '\n'; } int main ( void ) { test( 2 ); test( 3 ); test( 4 ); test( 5 ); test( 6 ); test( 7 ); } Output: 0: 500742 1: 499258 using 3 buckets: 0: 499275 1: 250467 2: 250258 using 4 buckets: 0: 250024 1: 249533 2: 250718 3: 249725 using 5 buckets: 0: 199340 1: 200365 2: 199559 3: 200129 4: 200607 using 6 buckets: 0: 249861 1: 124864 2: 125278 3: 249414 4: 125603 5: 124980 using 7 buckets: 0: 142866 1: 142908 2: 142945 3: 142964 4: 142671 5: 142679 6: 142967 For 3 and 6 buckets, there is a 2:1 bias. For a truly uniform distribution, one needs to discard values every once in a while. Something like the shrink() method below: #include <iostream> #include <cstdlib> template < typename Engine > class remap_range { public: typedef unsigned long long value_type; private: typedef Engine engine_type; engine_type the_engine; value_type the_maximum; value_type (remap_range::* op) ( void ); // shrinking the range: // ==================== value_type bin_size; value_type top_legal_value; void set_shrink_pars ( value_type engine_max, value_type range_max ) { bin_size = value_type( ( engine_max + 1.0L ) / ( range_max + 1.0L ) ); value_type num_bins = value_type( ( the_engine.max() + 1.0L ) / bin_size ); top_legal_value = value_type( 1.0L * num_bins * bin_size - 1 ); } value_type shrink ( void ) { value_type random_value = the_engine(); while ( top_legal_value < random_value ) { random_value = the_engine(); } return ( random_value / bin_size ); } // expanding the range: // ==================== value_type expand ( void ) { return ( remap_to( the_maximum ) ); } // remapping: // ========== value_type remap_to ( value_type range_max ) { if ( range_max <= the_engine.max() ) { set_shrink_pars( the_engine.max(), range_max ); return ( shrink() ); } value_type random_value = 0; do { random_value = value_type ( ( the_engine.max() + 1.0L ) * remap_to( value_type( ( range_max + 1.0 ) / ( the_engine.max() + 1.0L ) ) ) + the_engine() ); } while ( random_value > range_max ); return ( random_value ); } public: remap_range ( Engine eng, value_type max ) : the_engine ( eng ) , the_maximum ( max ) { if ( the_maximum <= the_engine.max() ) { set_shrink_pars( the_engine.max(), the_maximum ); op = &remap_range::shrink; } else { op = &remap_range::expand; } } value_type max ( void ) const { return ( the_maximum ); } value_type operator() ( void ) { return ( (this->*op)() ); } }; class crand { public: typedef unsigned int value_type; crand ( unsigned int seed = 0 ) { std::srand( seed ); } value_type max ( void ) const { return ( RAND_MAX ); } value_type operator() ( void ) { return ( std::rand() ); } }; int main ( void ) { crand s; remap_range< crand > seven ( s, 7 ); remap_range< remap_range< crand > > thausand ( seven, 20 ); for ( unsigned int i = 0; i < 100000; ++i ) { std::cout << thausand() << '\n'; } } Best Kai-Uwe Bux -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33815