[patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax
The std::min, std::max, and std::minmax overloads that take a std::initializer_list all require that the list is not empty. The attached patch adds debug mode checks for this. Thanks, Eelis Index: libstdc++-v3/include/debug/formatter.h === --- libstdc++-v3/include/debug/formatter.h (revision 233636) +++ libstdc++-v3/include/debug/formatter.h (working copy) @@ -87,6 +87,8 @@ __msg_splice_bad, __msg_splice_other, __msg_splice_overlap, +// std::initializer_list checks +__msg_empty_init_list, // iterator checks __msg_init_singular, __msg_init_copy_singular, Index: libstdc++-v3/src/c++11/debug.cc === --- libstdc++-v3/src/c++11/debug.cc (revision 233636) +++ libstdc++-v3/src/c++11/debug.cc (working copy) @@ -139,6 +139,8 @@ "attempt to splice an iterator from a different container", "splice destination %1.name;" " occurs within source range [%2.name;, %3.name;)", +// std::initializer_list checks +"%1;(): empty initializer_list", // iterator checks "attempt to initialize an iterator that will immediately become singular", "attempt to copy-construct an iterator from a singular iterator", Index: libstdc++-v3/include/debug/macros.h === --- libstdc++-v3/include/debug/macros.h (revision 233636) +++ libstdc++-v3/include/debug/macros.h (working copy) @@ -69,6 +69,12 @@ ._M_iterator(_First, #_First) \ ._M_iterator(_Last, #_Last)) +// Verify that the initializer_list is non-empty. +#define __glibcxx_check_non_empty_init_list(_List) \ +_GLIBCXX_DEBUG_VERIFY(_List.size() != 0,\ + _M_message(__gnu_debug::__msg_empty_init_list) \ + ._M_string(__func__)) + /** Verify that we can insert into *this with the iterator _Position. * Insertion into a container at a specific position requires that * the iterator be nonsingular, either dereferenceable or past-the-end, Index: libstdc++-v3/include/debug/debug.h === --- libstdc++-v3/include/debug/debug.h (revision 233636) +++ libstdc++-v3/include/debug/debug.h (working copy) @@ -62,6 +62,7 @@ # define __glibcxx_requires_cond(_Cond,_Msg) # define __glibcxx_requires_valid_range(_First,_Last) +# define __glibcxx_requires_non_empty_init_list(_List) # define __glibcxx_requires_sorted(_First,_Last) # define __glibcxx_requires_sorted_pred(_First,_Last,_Pred) # define __glibcxx_requires_sorted_set(_First1,_Last1,_First2) @@ -99,6 +100,8 @@ # define __glibcxx_requires_cond(_Cond,_Msg) _GLIBCXX_DEBUG_VERIFY(_Cond,_Msg) # define __glibcxx_requires_valid_range(_First,_Last) \ __glibcxx_check_valid_range(_First,_Last) +# define __glibcxx_requires_non_empty_init_list(_List) \ + __glibcxx_check_non_empty_init_list(_List) # define __glibcxx_requires_non_empty_range(_First,_Last) \ __glibcxx_check_non_empty_range(_First,_Last) # define __glibcxx_requires_sorted(_First,_Last) \ Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 233636) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3452,25 +3452,37 @@ _GLIBCXX14_CONSTEXPR inline _Tp min(initializer_list<_Tp> __l) -{ return *std::min_element(__l.begin(), __l.end()); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::min_element(__l.begin(), __l.end()); +} template _GLIBCXX14_CONSTEXPR inline _Tp min(initializer_list<_Tp> __l, _Compare __comp) -{ return *std::min_element(__l.begin(), __l.end(), __comp); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::min_element(__l.begin(), __l.end(), __comp); +} template _GLIBCXX14_CONSTEXPR inline _Tp max(initializer_list<_Tp> __l) -{ return *std::max_element(__l.begin(), __l.end()); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::max_element(__l.begin(), __l.end()); +} template _GLIBCXX14_CONSTEXPR inline _Tp max(initializer_list<_Tp> __l, _Compare __comp) -{ return *std::max_element(__l.begin(), __l.end(), __comp); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::max_element(__l.begin(), __l.end(), __comp); +} template _GLIBCXX14_CONSTEXPR @@ -3477,6 +3489,7 @@ inline pair<_Tp, _Tp> minmax(initializer_list<_Tp> __l) { + __glibcxx_requires_non_empty_init_list(__l); pair __p = std::minmax_element(__l.begin(), __l.end()); return std::make_pair(*__p.first, *__p.second); @@ -3487,6 +3500,7 @@ inline pair<_Tp, _Tp> minma
Re: [PATCH] Implement -fdiagnostics-parseable-fixits
On 2016-06-22 05:25, David Malcolm wrote: Clang has an option -fdiagnostics-parseable-fixits, which emits a machine-readable version of the fixits, for use by IDEs. (The only IDE I know of that supports this format is Xcode [1]; does anyone know of other IDEs supporting this? I'd be very happy if someone implemented Emacs support for it, or could point me at it). This patch implements the option for gcc. It seems prudent to be compatible with Clang here, so I've deliberately used the same option name and attempted to emulate the output format, Thanks a lot for this! I now use this in geordi (http://eelis.net/geordi). Geordi already supported Clang's fix-its, and thanks to your taking care to use the same format, enabling it for GCC was a matter of adding the flag. Cheers!
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
On 2016-09-02 20:20, Eelis van der Weegen wrote: On 2016-08-31 14:45, Jonathan Wakely wrote: Is this significantly faster than just using uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't need to duplicate the logic? (And people maintaining the code won't reconvince themselves it's correct every time they look at it :-) [..] Could we hoist this test out of the loop somehow? If we change the loop condition to be __i+1 < __last we don't need to test it on every iteration, and then after the loop we can just do the final swap if (__urange % 2). Reusing std::uniform_int_distribution seems just as fast, so I've removed __generate_random_index_below. I've hoisted the (__i + 1 == __last) check out of the loop (in a slightly different way), and it seems to shave off a couple more cycles, yay! Updated patch attached. Please ignore that patch, I used __g()&1 but that's invalid (the new "UniformRandomBitGenerator" name is misleading). Updated patch (which uses a proper distribution even for the [0,1] case) attached. Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 239895) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3772,6 +3772,47 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = __g.max() - __g.min(); + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + _RandomAccessIterator __i = __first + 1; + + // Since we know the range isn't empty, an even number of elements + // means an uneven number of elements /to swap/, in which case we + // do the first one up front: + + if ((__urange % 2) == 0) + { + __distr_type __d{0, 1}; + std::iter_swap(__i++, __first + __d(__g)); + } + + // Now we know that __last - __i is even, so we do the rest in pairs, + // using a single distribution invocation to produce swap positions + // for two successive elements at a time: + + while (__i != __last) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + + std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1}; + const __uc_type __pospos = __d(__g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
On 2016-05-01 16:18, Eelis wrote: The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: Gentle ping. :) Did anyone get a chance to take a look at this? Does the idea seem sound? Does the implementation seem correct? Thanks, Eelis
[patch] libstdc++: Make std::shuffle faster by avoiding std::uniform_int_distribution
Hi, The attached patch makes std::shuffle about 33% faster for the following testcase: #include #include #include int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } It achieves this by avoiding std::uniform_int_distribution when the generator's range is large enough, which is almost always the case. This helps a lot, because std::uniform_int_distribution::op() recomputes scaling factors every time. Thoughts? Thanks, Eelis Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3738,12 +3738,40 @@ _DistanceType; typedef typename std::make_unsigned<_DistanceType>::type __ud_type; - typedef typename std::uniform_int_distribution<__ud_type> __distr_type; - typedef typename __distr_type::param_type __p_type; - __distr_type __d; + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; - for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) - std::iter_swap(__i, __first + __d(__g, __p_type(0, __i - __first))); + const __uc_type __urngrange = + _Gen::max() - _Gen::min() + 1; // +1 because the generator range is inclusive + + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange >= __urange) + { + const __uc_type __scaling = __urngrange / __urange; + const __uc_type __past = __urange * __scaling; + + for (_RandomAccessIterator __i = __first; __i != __last; ++__i) + { + __uc_type __j; + do + { + __j = __uc_type(__g()) - _Gen::min(); + } + while (__j >= __past); + + std::iter_swap(__i, __first + __j / __scaling); + } + } + else + { + typedef typename std::uniform_int_distribution<__ud_type> __distr_type; + typedef typename __distr_type::param_type __p_type; + __distr_type __d; + +for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) + std::iter_swap(__i, __first + __d(__g, __p_type(0, __i - __first))); + } } #endif
Re: [patch] libstdc++: Make std::shuffle faster by avoiding std::uniform_int_distribution
Please ignore this, I made the error described here: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#Implementation_errors :) On 2016-04-30 21:15, Eelis wrote: Hi, The attached patch makes std::shuffle about 33% faster for the following testcase: #include #include #include int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } It achieves this by avoiding std::uniform_int_distribution when the generator's range is large enough, which is almost always the case. This helps a lot, because std::uniform_int_distribution::op() recomputes scaling factors every time. Thoughts? Thanks, Eelis
[patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3708,6 +3708,22 @@ #endif #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + + template +inline _IntType +__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g) +{ + const _IntType __urngrange = __g.max() - __g.min() + 1; + const _IntType __scaling = __urngrange / __bound; + const _IntType __past = __bound * __scaling; + + for (;;) + { + const _IntType __r = _IntType(__g()) - __g.min(); + if (__r < __past) return __r / __scaling; + } +} + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. @@ -3740,6 +3756,40 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1; + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + for (_RandomAccessIterator __i = __first + 1; __i != __last; ) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + + if (__i + 1 == __last) + { + const __uc_type __pos = __generate_random_index_below(__swap_range, __g); + std::iter_swap(__i, __first + __pos); + return; + } + + // Use a single generator invocation to produce swap positions for + // both of the next two elements: + + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + const __uc_type __pospos = __generate_random_index_below(__comp_range, __g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
Sorry, forgot to include the libstdc++ list. On 2016-05-01 16:18, Eelis wrote: Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis
[PATCH] Forward to correct function in std::shared_mutex::unlock().
Looks like a typo. :) Index: libstdc++-v3/include/std/shared_mutex === --- libstdc++-v3/include/std/shared_mutex (revision 226840) +++ libstdc++-v3/include/std/shared_mutex (working copy) @@ -331,7 +331,7 @@ void lock() { _M_impl.lock(); } bool try_lock() { return _M_impl.try_lock(); } -void unlock() { _M_impl.try_lock(); } +void unlock() { _M_impl.unlock(); } // Shared ownership
[patch libstdc++] Fix assignability check in uninitialized_copy
std::uninitialized_copy tries to test assignability as follows: is_assignable<_ValueType1, _RefType>::value This is the wrong way around. For example, when the iterators are char*, this ends up as is_assignable::value which is false. It should be is_assignable::value which is true. The result is that uninitialized_copy needlessly picks the slow implementation. Trivial fix attached. (No tests ran because I'm getting "runtest: command not found" errors that I don't know how to fix.) Index: libstdc++-v3/include/bits/stl_uninitialized.h === --- libstdc++-v3/include/bits/stl_uninitialized.h (revision 219070) +++ libstdc++-v3/include/bits/stl_uninitialized.h (working copy) @@ -105,29 +105,29 @@ template inline _ForwardIterator uninitialized_copy(_InputIterator __first, _InputIterator __last, _ForwardIterator __result) { typedef typename iterator_traits<_InputIterator>::value_type _ValueType1; typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2; #if __cplusplus < 201103L const bool __assignable = true; #else // trivial types can have deleted assignment typedef typename iterator_traits<_InputIterator>::reference _RefType; - const bool __assignable = is_assignable<_ValueType1, _RefType>::value; + const bool __assignable = is_assignable<_RefType, _ValueType1>::value; #endif return std::__uninitialized_copy<__is_trivial(_ValueType1) && __is_trivial(_ValueType2) && __assignable>:: __uninit_copy(__first, __last, __result); } template struct __uninitialized_fill { template static void
Re: [patch libstdc++] Fix assignability check in uninitialized_copy
On 2015-01-09 19:03, Jonathan Wakely wrote: The attached patch should be correct. Tested x86_64-linux, committed to trunk and 4.9. Awesome, thanks!
Re: [patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax
On 2016-02-23 23:39, Jonathan Wakely wrote: On 23/02/16 22:03 +0100, Eelis wrote: The std::min, std::max, and std::minmax overloads that take a std::initializer_list all require that the list is not empty. The attached patch adds debug mode checks for this. Nice, thanks for the patch. Hi Jonathan, Thanks for the review! Updated patch attached. I couldn't find precedent in the libstdc++ testsuite of tests that test __glibcxx_assert assertions, so I put these under {min,max,minmax}/assert/, analogous to {min,max,minmax}/debug/. Is that ok? Otherwise this looks good, but will have to wait until after the GCC 6 release now. No hurry at all. :) Cheers, Eelis Index: libstdc++-v3/include/debug/formatter.h === --- libstdc++-v3/include/debug/formatter.h (revision 233636) +++ libstdc++-v3/include/debug/formatter.h (working copy) @@ -127,7 +127,8 @@ // others __msg_equal_allocs, __msg_insert_range_from_self, -__msg_irreflexive_ordering +__msg_irreflexive_ordering, +__msg_empty_init_list }; class _Error_formatter Index: libstdc++-v3/src/c++11/debug.cc === --- libstdc++-v3/src/c++11/debug.cc (revision 233636) +++ libstdc++-v3/src/c++11/debug.cc (working copy) @@ -188,7 +188,8 @@ "allocators must be equal", "attempt to insert with an iterator range [%1.name;, %2.name;) from this" " container", -"comparison doesn't meet irreflexive requirements, assert(!(a < a))" +"comparison doesn't meet irreflexive requirements, assert(!(a < a))", +"%1;(): empty initializer_list" }; void Index: libstdc++-v3/include/debug/macros.h === --- libstdc++-v3/include/debug/macros.h (revision 233636) +++ libstdc++-v3/include/debug/macros.h (working copy) @@ -69,6 +69,12 @@ ._M_iterator(_First, #_First) \ ._M_iterator(_Last, #_Last)) +// Verify that the initializer_list is non-empty. +#define __glibcxx_check_non_empty_init_list(_List) \ +_GLIBCXX_DEBUG_VERIFY(_List.size() != 0,\ + _M_message(__gnu_debug::__msg_empty_init_list) \ + ._M_string(__func__)) + /** Verify that we can insert into *this with the iterator _Position. * Insertion into a container at a specific position requires that * the iterator be nonsingular, either dereferenceable or past-the-end, Index: libstdc++-v3/include/debug/debug.h === --- libstdc++-v3/include/debug/debug.h (revision 233636) +++ libstdc++-v3/include/debug/debug.h (working copy) @@ -87,9 +87,13 @@ // Verify that the container is nonempty # define __glibcxx_requires_nonempty() \ __glibcxx_assert(! this->empty()) +// Verify that the initializer_list is non-empty. +# define __glibcxx_requires_non_empty_init_list(_List) \ + __glibcxx_assert(_List.size() != 0) #else # define __glibcxx_requires_non_empty_range(_First,_Last) # define __glibcxx_requires_nonempty() +# define __glibcxx_requires_non_empty_init_list(_List) #endif #else @@ -99,6 +103,8 @@ # define __glibcxx_requires_cond(_Cond,_Msg) _GLIBCXX_DEBUG_VERIFY(_Cond,_Msg) # define __glibcxx_requires_valid_range(_First,_Last) \ __glibcxx_check_valid_range(_First,_Last) +# define __glibcxx_requires_non_empty_init_list(_List) \ + __glibcxx_check_non_empty_init_list(_List) # define __glibcxx_requires_non_empty_range(_First,_Last) \ __glibcxx_check_non_empty_range(_First,_Last) # define __glibcxx_requires_sorted(_First,_Last) \ Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 233636) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3452,25 +3452,37 @@ _GLIBCXX14_CONSTEXPR inline _Tp min(initializer_list<_Tp> __l) -{ return *std::min_element(__l.begin(), __l.end()); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::min_element(__l.begin(), __l.end()); +} template _GLIBCXX14_CONSTEXPR inline _Tp min(initializer_list<_Tp> __l, _Compare __comp) -{ return *std::min_element(__l.begin(), __l.end(), __comp); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::min_element(__l.begin(), __l.end(), __comp); +} template _GLIBCXX14_CONSTEXPR inline _Tp max(initializer_list<_Tp> __l) -{ return *std::max_element(__l.begin(), __l.end()); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::max_element(__l.begin(), __l.end()); +} template _GLIBCXX14_CONSTEXPR inline _Tp max(initializer_list<_Tp> __l, _Compare __comp) -{ return *std::max_element(__l.
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
On 2016-09-01 17:14, Jonathan Wakely wrote: On 31/08/16 13:45 +0100, Jonathan Wakely wrote: On 03/05/16 16:42 +0200, Eelis van der Weegen wrote: Ah, thanks, I forgot to re-attach when I sent to include the libstdc++ list. On 2016-05-03 14:38, Jonathan Wakely wrote: ENOPATCH On 1 May 2016 at 15:21, Eelis wrote: Sorry, forgot to include the libstdc++ list. On 2016-05-01 16:18, Eelis wrote: Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h(revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h(working copy) @@ -3708,6 +3708,22 @@ #endif #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + + template We should avoid introducing new names based on "uniform random number generator" and use _UniformRandomBitGenerator as per https://wg21.link/p0346r1 +inline _IntType +__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g) +{ + const _IntType __urngrange = __g.max() - __g.min() + 1; Similarly, let's use __urbgrange here. + const _IntType __scaling = __urngrange / __bound; I think I'd like either a comment on the function documenting the assumption about __bound and __g, or an explicit check: __glibcxx_assert( __scaling >= __bound ); + const _IntType __past = __bound * __scaling; + + for (;;) + { +const _IntType __r = _IntType(__g()) - __g.min(); +if (__r < __past) return __r / __scaling; This is basically the same algorithm as uniform_int_distribution so doesn't introduce any bias, right? Is this significantly faster than just using uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't need to duplicate the logic? (And people maintaining the code won't reconvince themselves it's correct every time they look at it :-) + } +} + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. @@ -3740,6 +3756,40 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1; + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { +for (_RandomAccessIterator __i = __first + 1; __i != __last; ) +{ + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + + if (__i + 1 == __last) Could we hoist this test out of the loop somehow? If we change the loop condition to be __i+1 < __last we don't need to test it on every iteration, and then after the loop we can just do the final swap if (__urange % 2). + { +const __uc_type __pos = __generate_random_index_below(__swap_range, __g); +std::iter_swap(__i, __first + __pos); +return; + } + + // Use a single generator invocation to produce swap positions for + // both of the next two elements: + + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + const __uc_type __pospos = __generate_random_index_below(__comp_range, __g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); I think I've convinced myself this is correct :-) Values of __pospos will be uniformly distributed in [0, __comp_range) iThis is true, but ... and so the / and % results will be too. This isn't. If __swap_range is 3, then __comp_range is 10 and __pospos is uniformly distributed in [0, 9]. (__pospos % __swap_range) is not uniformly distributed, we get P(0) = 0.4, P(1) = 0.3, P(2) = 0.3. Similarly, (__pospos / __swap_range) is not uniform, we get P(0) = 0.3, P(1) = 0.3, P(2) = 0.3, P(3) = 0.1 This means that certain permuations of the input are more likely than others, which fails to meet the requirements of the function. Or is there a flaw in my reasoning? Just that if __sw
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
On 2016-08-31 14:45, Jonathan Wakely wrote: Is this significantly faster than just using uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't need to duplicate the logic? (And people maintaining the code won't reconvince themselves it's correct every time they look at it :-) [..] Could we hoist this test out of the loop somehow? If we change the loop condition to be __i+1 < __last we don't need to test it on every iteration, and then after the loop we can just do the final swap if (__urange % 2). Reusing std::uniform_int_distribution seems just as fast, so I've removed __generate_random_index_below. I've hoisted the (__i + 1 == __last) check out of the loop (in a slightly different way), and it seems to shave off a couple more cycles, yay! Updated patch attached. Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 239895) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3772,6 +3772,46 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = __g.max() - __g.min(); + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + _RandomAccessIterator __i = __first + 1; + + // Since we know the range isn't empty, an even number of elements + // means an uneven number of elements /to swap/, in which case we + // do the first one up front: + + if ((__urange % 2) == 0) + { + std::iter_swap(__i++, __first + (__g() & 1)); + } + + // Now we know that __last - __i is even, so we do the rest in pairs, + // using a single distribution invocation to produce swap positions + // for two successive elements at a time: + + while (__i != __last) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + + std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1}; + const __uc_type __pospos = __d(__g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)
[patch, libstdc++] Optimize selection sampling by using generator to get two values at once
This is the same optimization as was recently applied to std::shuffle. It reduces the runtime of the following program by 20% on my machine: int main() { std::vector a(1), b(1000); std::mt19937 gen; uint64_t c = 0; for (int i = 0; i != 1000; ++i) { std::sample(a.begin(), a.end(), b.begin(), b.size(), gen); c += uint64_t(b[32]); } std::cout << c; } Index: bits/stl_algo.h === --- bits/stl_algo.h (revision 241299) +++ bits/stl_algo.h (working copy) @@ -3741,6 +3741,37 @@ #ifdef _GLIBCXX_USE_C99_STDINT_TR1 /** + * @brief Generate two uniformly distributed integers using a + * single distribution invocation. + * @param __b0The upper bound for the first integer. + * @param __b1The upper bound for the second integer. + * @param __g A UniformRandomBitGenerator. + * @return A pair (i, j) with i and j uniformly distributed + * over [0, __b0) and [0, __b1), respectively. + * + * Requires: __b0 * __b1 <= __g.max() - __g.min(). + * + * Using uniform_int_distribution with a range that is very + * small relative to the range of the generator ends up wasting + * potentially expensively generated randomness, since + * uniform_int_distribution does not store leftover randomness + * between invocations. + * + * If we know we want two integers in ranges that are sufficiently + * small, we can compose the ranges, use a single distribution + * invocation, and significantly reduce the waste. + */ + template +std::pair<_IntType, _IntType> +__gen_two_uniform_ints(_IntType __b0, _IntType __b1, + _UniformRandomBitGenerator&& __g) +{ + _IntType __x = uniform_int_distribution<_IntType>{0, (__b0 * __b1) - 1}(__g); + return std::make_pair(__x / __b1, __x % __b1); +} + + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. * @ingroup mutating_algorithms @@ -3801,13 +3832,12 @@ while (__i != __last) { const __uc_type __swap_range = __uc_type(__i - __first) + 1; - const __uc_type __comp_range = __swap_range * (__swap_range + 1); - std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1}; - const __uc_type __pospos = __d(__g); + const std::pair<__uc_type, __uc_type> __pospos = + __gen_two_uniform_ints(__swap_range, __swap_range + 1); - std::iter_swap(__i++, __first + (__pospos % __swap_range)); - std::iter_swap(__i++, __first + (__pospos / __swap_range)); + std::iter_swap(__i++, __first + __pospos.first); + std::iter_swap(__i++, __first + __pospos.second); } return; @@ -5695,9 +5725,51 @@ { using __distrib_type = uniform_int_distribution<_Size>; using __param_type = typename __distrib_type::param_type; + using _USize = typename std::make_unsigned<_Size>::type; + using _Gen = typename std::remove_reference<_UniformRandomBitGenerator>::type; + using __uc_type = typename std::common_type::type; + __distrib_type __d{}; _Size __unsampled_sz = std::distance(__first, __last); - for (__n = std::min(__n, __unsampled_sz); __n != 0; ++__first) + __n = std::min(__n, __unsampled_sz); + + // If possible, we use __gen_two_uniform_ints to efficiently produce + // two random numbers using a single distribution invocation: + + const __uc_type __urngrange = __g.max() - __g.min(); + if (__urngrange / __uc_type(__unsampled_sz) >= __uc_type(__unsampled_sz)) +// I.e. (__urngrange >= __unsampled_sz * __unsampled_sz) but without wrap issues. +{ + while (__n != 0 && __unsampled_sz >= 2) + { + const std::pair<_Size, _Size> p = + __gen_two_uniform_ints(__unsampled_sz, __unsampled_sz - 1, __g); + + --__unsampled_sz; + if (p.first < __n) + { + *__out++ = *__first; + --__n; + } + + ++__first; + + if (__n == 0) break; + + --__unsampled_sz; + if (p.second < __n) + { + *__out++ = *__first; + --__n; + } + + ++__first; + } +} + + // The loop above is otherwise equivalent to this one-at-a-time version: + + for (; __n != 0; ++__first) if (__d(__g, __param_type{0, --__unsampled_sz}) < __n) { *__out++ = *__first;
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
Ah, thanks, I forgot to re-attach when I sent to include the libstdc++ list. On 2016-05-03 14:38, Jonathan Wakely wrote: ENOPATCH On 1 May 2016 at 15:21, Eelis wrote: Sorry, forgot to include the libstdc++ list. On 2016-05-01 16:18, Eelis wrote: Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3708,6 +3708,22 @@ #endif #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + + template +inline _IntType +__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g) +{ + const _IntType __urngrange = __g.max() - __g.min() + 1; + const _IntType __scaling = __urngrange / __bound; + const _IntType __past = __bound * __scaling; + + for (;;) + { + const _IntType __r = _IntType(__g()) - __g.min(); + if (__r < __past) return __r / __scaling; + } +} + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. @@ -3740,6 +3756,40 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1; + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + for (_RandomAccessIterator __i = __first + 1; __i != __last; ) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + + if (__i + 1 == __last) + { + const __uc_type __pos = __generate_random_index_below(__swap_range, __g); + std::iter_swap(__i, __first + __pos); + return; + } + + // Use a single generator invocation to produce swap positions for + // both of the next two elements: + + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + const __uc_type __pospos = __generate_random_index_below(__comp_range, __g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)