Re: [patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax
On 2016-02-23 23:39, Jonathan Wakely wrote: On 23/02/16 22:03 +0100, Eelis wrote: The std::min, std::max, and std::minmax overloads that take a std::initializer_list all require that the list is not empty. The attached patch adds debug mode checks for this. Nice, thanks for the patch. Hi Jonathan, Thanks for the review! Updated patch attached. I couldn't find precedent in the libstdc++ testsuite of tests that test __glibcxx_assert assertions, so I put these under {min,max,minmax}/assert/, analogous to {min,max,minmax}/debug/. Is that ok? Otherwise this looks good, but will have to wait until after the GCC 6 release now. No hurry at all. :) Cheers, Eelis Index: libstdc++-v3/include/debug/formatter.h === --- libstdc++-v3/include/debug/formatter.h (revision 233636) +++ libstdc++-v3/include/debug/formatter.h (working copy) @@ -127,7 +127,8 @@ // others __msg_equal_allocs, __msg_insert_range_from_self, -__msg_irreflexive_ordering +__msg_irreflexive_ordering, +__msg_empty_init_list }; class _Error_formatter Index: libstdc++-v3/src/c++11/debug.cc === --- libstdc++-v3/src/c++11/debug.cc (revision 233636) +++ libstdc++-v3/src/c++11/debug.cc (working copy) @@ -188,7 +188,8 @@ "allocators must be equal", "attempt to insert with an iterator range [%1.name;, %2.name;) from this" " container", -"comparison doesn't meet irreflexive requirements, assert(!(a < a))" +"comparison doesn't meet irreflexive requirements, assert(!(a < a))", +"%1;(): empty initializer_list" }; void Index: libstdc++-v3/include/debug/macros.h === --- libstdc++-v3/include/debug/macros.h (revision 233636) +++ libstdc++-v3/include/debug/macros.h (working copy) @@ -69,6 +69,12 @@ ._M_iterator(_First, #_First) \ ._M_iterator(_Last, #_Last)) +// Verify that the initializer_list is non-empty. +#define __glibcxx_check_non_empty_init_list(_List) \ +_GLIBCXX_DEBUG_VERIFY(_List.size() != 0,\ + _M_message(__gnu_debug::__msg_empty_init_list) \ + ._M_string(__func__)) + /** Verify that we can insert into *this with the iterator _Position. * Insertion into a container at a specific position requires that * the iterator be nonsingular, either dereferenceable or past-the-end, Index: libstdc++-v3/include/debug/debug.h === --- libstdc++-v3/include/debug/debug.h (revision 233636) +++ libstdc++-v3/include/debug/debug.h (working copy) @@ -87,9 +87,13 @@ // Verify that the container is nonempty # define __glibcxx_requires_nonempty() \ __glibcxx_assert(! this->empty()) +// Verify that the initializer_list is non-empty. +# define __glibcxx_requires_non_empty_init_list(_List) \ + __glibcxx_assert(_List.size() != 0) #else # define __glibcxx_requires_non_empty_range(_First,_Last) # define __glibcxx_requires_nonempty() +# define __glibcxx_requires_non_empty_init_list(_List) #endif #else @@ -99,6 +103,8 @@ # define __glibcxx_requires_cond(_Cond,_Msg) _GLIBCXX_DEBUG_VERIFY(_Cond,_Msg) # define __glibcxx_requires_valid_range(_First,_Last) \ __glibcxx_check_valid_range(_First,_Last) +# define __glibcxx_requires_non_empty_init_list(_List) \ + __glibcxx_check_non_empty_init_list(_List) # define __glibcxx_requires_non_empty_range(_First,_Last) \ __glibcxx_check_non_empty_range(_First,_Last) # define __glibcxx_requires_sorted(_First,_Last) \ Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 233636) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3452,25 +3452,37 @@ _GLIBCXX14_CONSTEXPR inline _Tp min(initializer_list<_Tp> __l) -{ return *std::min_element(__l.begin(), __l.end()); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::min_element(__l.begin(), __l.end()); +} template _GLIBCXX14_CONSTEXPR inline _Tp min(initializer_list<_Tp> __l, _Compare __comp) -{ return *std::min_element(__l.begin(), __l.end(), __comp); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::min_element(__l.begin(), __l.end(), __comp); +} template _GLIBCXX14_CONSTEXPR inline _Tp max(initializer_list<_Tp> __l) -{ return *std::max_element(__l.begin(), __l.end()); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::max_element(__l.begin(), __l.end()); +} template _GLIBCXX14_CONSTEXPR inline _Tp max(initializer_list<_Tp> __l, _Compare __comp) -{ return *std::max_element(__l.begin(), __l.end(), __comp); } +{ + __glibcxx_requires_non_empty_init_list(__l); + return *std::max_element
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
On 2016-09-01 17:14, Jonathan Wakely wrote: On 31/08/16 13:45 +0100, Jonathan Wakely wrote: On 03/05/16 16:42 +0200, Eelis van der Weegen wrote: Ah, thanks, I forgot to re-attach when I sent to include the libstdc++ list. On 2016-05-03 14:38, Jonathan Wakely wrote: ENOPATCH On 1 May 2016 at 15:21, Eelis wrote: Sorry, forgot to include the libstdc++ list. On 2016-05-01 16:18, Eelis wrote: Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h(revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h(working copy) @@ -3708,6 +3708,22 @@ #endif #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + + template We should avoid introducing new names based on "uniform random number generator" and use _UniformRandomBitGenerator as per https://wg21.link/p0346r1 +inline _IntType +__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g) +{ + const _IntType __urngrange = __g.max() - __g.min() + 1; Similarly, let's use __urbgrange here. + const _IntType __scaling = __urngrange / __bound; I think I'd like either a comment on the function documenting the assumption about __bound and __g, or an explicit check: __glibcxx_assert( __scaling >= __bound ); + const _IntType __past = __bound * __scaling; + + for (;;) + { +const _IntType __r = _IntType(__g()) - __g.min(); +if (__r < __past) return __r / __scaling; This is basically the same algorithm as uniform_int_distribution so doesn't introduce any bias, right? Is this significantly faster than just using uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't need to duplicate the logic? (And people maintaining the code won't reconvince themselves it's correct every time they look at it :-) + } +} + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. @@ -3740,6 +3756,40 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1; + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { +for (_RandomAccessIterator __i = __first + 1; __i != __last; ) +{ + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + + if (__i + 1 == __last) Could we hoist this test out of the loop somehow? If we change the loop condition to be __i+1 < __last we don't need to test it on every iteration, and then after the loop we can just do the final swap if (__urange % 2). + { +const __uc_type __pos = __generate_random_index_below(__swap_range, __g); +std::iter_swap(__i, __first + __pos); +return; + } + + // Use a single generator invocation to produce swap positions for + // both of the next two elements: + + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + const __uc_type __pospos = __generate_random_index_below(__comp_range, __g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); I think I've convinced myself this is correct :-) Values of __pospos will be uniformly distributed in [0, __comp_range) iThis is true, but ... and so the / and % results will be too. This isn't. If __swap_range is 3, then __comp_range is 10 and __pospos is uniformly distributed in [0, 9]. (__pospos % __swap_range) is not uniformly distributed, we get P(0) = 0.4, P(1) = 0.3, P(2) = 0.3. Similarly, (__pospos / __swap_range) is not uniform, we get P(0) = 0.3, P(1) = 0.3, P(2) = 0.3, P(3) = 0.1 This means that certain permuations of the input are more likely than others, which fails to meet the requirements of the function. Or is there a flaw in my reasoning? Just that if __sw
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
On 2016-08-31 14:45, Jonathan Wakely wrote: Is this significantly faster than just using uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't need to duplicate the logic? (And people maintaining the code won't reconvince themselves it's correct every time they look at it :-) [..] Could we hoist this test out of the loop somehow? If we change the loop condition to be __i+1 < __last we don't need to test it on every iteration, and then after the loop we can just do the final swap if (__urange % 2). Reusing std::uniform_int_distribution seems just as fast, so I've removed __generate_random_index_below. I've hoisted the (__i + 1 == __last) check out of the loop (in a slightly different way), and it seems to shave off a couple more cycles, yay! Updated patch attached. Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 239895) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3772,6 +3772,46 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = __g.max() - __g.min(); + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + _RandomAccessIterator __i = __first + 1; + + // Since we know the range isn't empty, an even number of elements + // means an uneven number of elements /to swap/, in which case we + // do the first one up front: + + if ((__urange % 2) == 0) + { + std::iter_swap(__i++, __first + (__g() & 1)); + } + + // Now we know that __last - __i is even, so we do the rest in pairs, + // using a single distribution invocation to produce swap positions + // for two successive elements at a time: + + while (__i != __last) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + + std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1}; + const __uc_type __pospos = __d(__g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)
[patch, libstdc++] Optimize selection sampling by using generator to get two values at once
This is the same optimization as was recently applied to std::shuffle. It reduces the runtime of the following program by 20% on my machine: int main() { std::vector a(1), b(1000); std::mt19937 gen; uint64_t c = 0; for (int i = 0; i != 1000; ++i) { std::sample(a.begin(), a.end(), b.begin(), b.size(), gen); c += uint64_t(b[32]); } std::cout << c; } Index: bits/stl_algo.h === --- bits/stl_algo.h (revision 241299) +++ bits/stl_algo.h (working copy) @@ -3741,6 +3741,37 @@ #ifdef _GLIBCXX_USE_C99_STDINT_TR1 /** + * @brief Generate two uniformly distributed integers using a + * single distribution invocation. + * @param __b0The upper bound for the first integer. + * @param __b1The upper bound for the second integer. + * @param __g A UniformRandomBitGenerator. + * @return A pair (i, j) with i and j uniformly distributed + * over [0, __b0) and [0, __b1), respectively. + * + * Requires: __b0 * __b1 <= __g.max() - __g.min(). + * + * Using uniform_int_distribution with a range that is very + * small relative to the range of the generator ends up wasting + * potentially expensively generated randomness, since + * uniform_int_distribution does not store leftover randomness + * between invocations. + * + * If we know we want two integers in ranges that are sufficiently + * small, we can compose the ranges, use a single distribution + * invocation, and significantly reduce the waste. + */ + template +std::pair<_IntType, _IntType> +__gen_two_uniform_ints(_IntType __b0, _IntType __b1, + _UniformRandomBitGenerator&& __g) +{ + _IntType __x = uniform_int_distribution<_IntType>{0, (__b0 * __b1) - 1}(__g); + return std::make_pair(__x / __b1, __x % __b1); +} + + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. * @ingroup mutating_algorithms @@ -3801,13 +3832,12 @@ while (__i != __last) { const __uc_type __swap_range = __uc_type(__i - __first) + 1; - const __uc_type __comp_range = __swap_range * (__swap_range + 1); - std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1}; - const __uc_type __pospos = __d(__g); + const std::pair<__uc_type, __uc_type> __pospos = + __gen_two_uniform_ints(__swap_range, __swap_range + 1); - std::iter_swap(__i++, __first + (__pospos % __swap_range)); - std::iter_swap(__i++, __first + (__pospos / __swap_range)); + std::iter_swap(__i++, __first + __pospos.first); + std::iter_swap(__i++, __first + __pospos.second); } return; @@ -5695,9 +5725,51 @@ { using __distrib_type = uniform_int_distribution<_Size>; using __param_type = typename __distrib_type::param_type; + using _USize = typename std::make_unsigned<_Size>::type; + using _Gen = typename std::remove_reference<_UniformRandomBitGenerator>::type; + using __uc_type = typename std::common_type::type; + __distrib_type __d{}; _Size __unsampled_sz = std::distance(__first, __last); - for (__n = std::min(__n, __unsampled_sz); __n != 0; ++__first) + __n = std::min(__n, __unsampled_sz); + + // If possible, we use __gen_two_uniform_ints to efficiently produce + // two random numbers using a single distribution invocation: + + const __uc_type __urngrange = __g.max() - __g.min(); + if (__urngrange / __uc_type(__unsampled_sz) >= __uc_type(__unsampled_sz)) +// I.e. (__urngrange >= __unsampled_sz * __unsampled_sz) but without wrap issues. +{ + while (__n != 0 && __unsampled_sz >= 2) + { + const std::pair<_Size, _Size> p = + __gen_two_uniform_ints(__unsampled_sz, __unsampled_sz - 1, __g); + + --__unsampled_sz; + if (p.first < __n) + { + *__out++ = *__first; + --__n; + } + + ++__first; + + if (__n == 0) break; + + --__unsampled_sz; + if (p.second < __n) + { + *__out++ = *__first; + --__n; + } + + ++__first; + } +} + + // The loop above is otherwise equivalent to this one-at-a-time version: + + for (; __n != 0; ++__first) if (__d(__g, __param_type{0, --__unsampled_sz}) < __n) { *__out++ = *__first;
Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible
Ah, thanks, I forgot to re-attach when I sent to include the libstdc++ list. On 2016-05-03 14:38, Jonathan Wakely wrote: ENOPATCH On 1 May 2016 at 15:21, Eelis wrote: Sorry, forgot to include the libstdc++ list. On 2016-05-01 16:18, Eelis wrote: Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(1); for (int i = 0; i != 1; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis Index: libstdc++-v3/include/bits/stl_algo.h === --- libstdc++-v3/include/bits/stl_algo.h (revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3708,6 +3708,22 @@ #endif #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + + template +inline _IntType +__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g) +{ + const _IntType __urngrange = __g.max() - __g.min() + 1; + const _IntType __scaling = __urngrange / __bound; + const _IntType __past = __bound * __scaling; + + for (;;) + { + const _IntType __r = _IntType(__g()) - __g.min(); + if (__r < __past) return __r / __scaling; + } +} + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. @@ -3740,6 +3756,40 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1; + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) +// I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + for (_RandomAccessIterator __i = __first + 1; __i != __last; ) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + + if (__i + 1 == __last) + { + const __uc_type __pos = __generate_random_index_below(__swap_range, __g); + std::iter_swap(__i, __first + __pos); + return; + } + + // Use a single generator invocation to produce swap positions for + // both of the next two elements: + + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + const __uc_type __pospos = __generate_random_index_below(__comp_range, __g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)