from:"Eelis"

[patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax

2016-02-23 Thread Eelis


The std::min, std::max, and std::minmax overloads that take a 
std::initializer_list all require that the list is not empty. The attached 
patch adds debug mode checks for this.

Thanks,

Eelis
Index: libstdc++-v3/include/debug/formatter.h
===
--- libstdc++-v3/include/debug/formatter.h	(revision 233636)
+++ libstdc++-v3/include/debug/formatter.h	(working copy)
@@ -87,6 +87,8 @@
 __msg_splice_bad,
 __msg_splice_other,
 __msg_splice_overlap,
+// std::initializer_list checks
+__msg_empty_init_list,
 // iterator checks
 __msg_init_singular,
 __msg_init_copy_singular,
Index: libstdc++-v3/src/c++11/debug.cc
===
--- libstdc++-v3/src/c++11/debug.cc	(revision 233636)
+++ libstdc++-v3/src/c++11/debug.cc	(working copy)
@@ -139,6 +139,8 @@
 "attempt to splice an iterator from a different container",
 "splice destination %1.name;"
 " occurs within source range [%2.name;, %3.name;)",
+// std::initializer_list checks
+"%1;(): empty initializer_list",
 // iterator checks
 "attempt to initialize an iterator that will immediately become singular",
 "attempt to copy-construct an iterator from a singular iterator",
Index: libstdc++-v3/include/debug/macros.h
===
--- libstdc++-v3/include/debug/macros.h	(revision 233636)
+++ libstdc++-v3/include/debug/macros.h	(working copy)
@@ -69,6 +69,12 @@
 		  ._M_iterator(_First, #_First)			\
 		  ._M_iterator(_Last, #_Last))
 
+// Verify that the initializer_list is non-empty.
+#define __glibcxx_check_non_empty_init_list(_List)			\
+_GLIBCXX_DEBUG_VERIFY(_List.size() != 0,\
+		  _M_message(__gnu_debug::__msg_empty_init_list)	\
+		  ._M_string(__func__))
+
 /** Verify that we can insert into *this with the iterator _Position.
  *  Insertion into a container at a specific position requires that
  *  the iterator be nonsingular, either dereferenceable or past-the-end,
Index: libstdc++-v3/include/debug/debug.h
===
--- libstdc++-v3/include/debug/debug.h	(revision 233636)
+++ libstdc++-v3/include/debug/debug.h	(working copy)
@@ -62,6 +62,7 @@
 
 # define __glibcxx_requires_cond(_Cond,_Msg)
 # define __glibcxx_requires_valid_range(_First,_Last)
+# define __glibcxx_requires_non_empty_init_list(_List)
 # define __glibcxx_requires_sorted(_First,_Last)
 # define __glibcxx_requires_sorted_pred(_First,_Last,_Pred)
 # define __glibcxx_requires_sorted_set(_First1,_Last1,_First2)
@@ -99,6 +100,8 @@
 # define __glibcxx_requires_cond(_Cond,_Msg) _GLIBCXX_DEBUG_VERIFY(_Cond,_Msg)
 # define __glibcxx_requires_valid_range(_First,_Last)	\
   __glibcxx_check_valid_range(_First,_Last)
+# define __glibcxx_requires_non_empty_init_list(_List)	\
+  __glibcxx_check_non_empty_init_list(_List)
 # define __glibcxx_requires_non_empty_range(_First,_Last)	\
   __glibcxx_check_non_empty_range(_First,_Last)
 # define __glibcxx_requires_sorted(_First,_Last)	\
Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 233636)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3452,25 +3452,37 @@
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 min(initializer_list<_Tp> __l)
-{ return *std::min_element(__l.begin(), __l.end()); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::min_element(__l.begin(), __l.end());
+}
 
   template
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 min(initializer_list<_Tp> __l, _Compare __comp)
-{ return *std::min_element(__l.begin(), __l.end(), __comp); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::min_element(__l.begin(), __l.end(), __comp);
+}
 
   template
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 max(initializer_list<_Tp> __l)
-{ return *std::max_element(__l.begin(), __l.end()); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::max_element(__l.begin(), __l.end());
+}
 
   template
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 max(initializer_list<_Tp> __l, _Compare __comp)
-{ return *std::max_element(__l.begin(), __l.end(), __comp); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::max_element(__l.begin(), __l.end(), __comp);
+}
 
   template
 _GLIBCXX14_CONSTEXPR
@@ -3477,6 +3489,7 @@
 inline pair<_Tp, _Tp>
 minmax(initializer_list<_Tp> __l)
 {
+  __glibcxx_requires_non_empty_init_list(__l);
   pair __p =
 	std::minmax_element(__l.begin(), __l.end());
   return std::make_pair(*__p.first, *__p.second);
@@ -3487,6 +3500,7 @@
 inline pair<_Tp, _Tp>
 minma

Re: [PATCH] Implement -fdiagnostics-parseable-fixits

2016-08-18 Thread Eelis


On 2016-06-22 05:25, David Malcolm wrote:

Clang has an option -fdiagnostics-parseable-fixits, which emits a
machine-readable version of the fixits, for use by IDEs.  (The only
IDE I know of that supports this format is Xcode [1]; does anyone
know of other IDEs supporting this?  I'd be very happy if someone
implemented Emacs support for it, or could point me at it).

This patch implements the option for gcc.  It seems prudent to be
compatible with Clang here, so I've deliberately used the same option
name and attempted to emulate the output format,


Thanks a lot for this! I now use this in geordi (http://eelis.net/geordi). 
Geordi already supported Clang's fix-its, and thanks to your taking care to use 
the same format, enabling it for GCC was a matter of adding the flag.

Cheers!

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-09-02 Thread Eelis


On 2016-09-02 20:20, Eelis van der Weegen wrote:

On 2016-08-31 14:45, Jonathan Wakely wrote:

Is this significantly faster than just using
uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't
need to duplicate the logic? (And people maintaining the code won't
reconvince themselves it's correct every time they look at it :-)

[..]

Could we hoist this test out of the loop somehow?

If we change the loop condition to be __i+1 < __last we don't need to
test it on every iteration, and then after the loop we can just do
the final swap if (__urange % 2).


Reusing std::uniform_int_distribution seems just as fast, so I've removed 
__generate_random_index_below.

I've hoisted the (__i + 1 == __last) check out of the loop (in a slightly 
different way), and it seems to shave off a couple more cycles, yay!

Updated patch attached.



Please ignore that patch, I used __g()&1 but that's invalid (the new 
"UniformRandomBitGenerator" name is misleading).

Updated patch (which uses a proper distribution even for the [0,1] case) 
attached.
Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 239895)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3772,6 +3772,47 @@
   typedef typename std::make_unsigned<_DistanceType>::type __ud_type;
   typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
   typedef typename __distr_type::param_type __p_type;
+
+  typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
+  typedef typename std::common_type::type __uc_type;
+
+  const __uc_type __urngrange = __g.max() - __g.min();
+  const __uc_type __urange = __uc_type(__last - __first);
+
+  if (__urngrange / __urange >= __urange)
+// I.e. (__urngrange >= __urange * __urange) but without wrap issues.
+  {
+	_RandomAccessIterator __i = __first + 1;
+
+	// Since we know the range isn't empty, an even number of elements
+	// means an uneven number of elements /to swap/, in which case we
+	// do the first one up front:
+
+	if ((__urange % 2) == 0)
+	{
+	  __distr_type __d{0, 1};
+	  std::iter_swap(__i++, __first + __d(__g));
+	}
+
+	// Now we know that __last - __i is even, so we do the rest in pairs,
+	// using a single distribution invocation to produce swap positions
+	// for two successive elements at a time:
+
+	while (__i != __last)
+	{
+	  const __uc_type __swap_range = __uc_type(__i - __first) + 1;
+	  const __uc_type __comp_range = __swap_range * (__swap_range + 1);
+
+	  std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1};
+	  const __uc_type __pospos = __d(__g);
+
+	  std::iter_swap(__i++, __first + (__pospos % __swap_range));
+	  std::iter_swap(__i++, __first + (__pospos / __swap_range));
+	}
+
+	return;
+  }
+
   __distr_type __d;
 
   for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-05-25 Thread Eelis


On 2016-05-01 16:18, Eelis wrote:

The attached patch optimizes std::shuffle for the very common case
where the generator range is large enough that a single invocation
can produce two swap positions.

This reduces the runtime of the following testcase by 37% on my machine:


Gentle ping. :)  Did anyone get a chance to take a look at this?

Does the idea seem sound? Does the implementation seem correct?

Thanks,

Eelis

[patch] libstdc++: Make std::shuffle faster by avoiding std::uniform_int_distribution

2016-04-30 Thread Eelis


Hi,

The attached patch makes std::shuffle about 33% faster for the following 
testcase:

#include 
#include 
#include 

int main()
{
std::mt19937 gen;

std::vector v;
v.reserve(1);

for (int i = 0; i != 1; ++i)
{
v.push_back(i);
std::shuffle(v.begin(), v.end(), gen);
}

std::cout << v.front() << '\n';
}

It achieves this by avoiding std::uniform_int_distribution when the generator's
range is large enough, which is almost always the case. This helps a lot, 
because
std::uniform_int_distribution::op() recomputes scaling factors every time.

Thoughts?

Thanks,

Eelis
Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 235680)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3738,12 +3738,40 @@
 	_DistanceType;
 
   typedef typename std::make_unsigned<_DistanceType>::type __ud_type;
-  typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
-  typedef typename __distr_type::param_type __p_type;
-  __distr_type __d;
+  typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
+  typedef typename std::common_type::type __uc_type;
 
-  for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)
-	std::iter_swap(__i, __first + __d(__g, __p_type(0, __i - __first)));
+  const __uc_type __urngrange =
+	_Gen::max() - _Gen::min() + 1; // +1 because the generator range is inclusive
+
+  const __uc_type __urange = __uc_type(__last - __first);
+
+  if (__urngrange >= __urange)
+  {
+	const __uc_type __scaling = __urngrange / __urange;
+	const __uc_type __past = __urange * __scaling;
+
+	for (_RandomAccessIterator __i = __first; __i != __last; ++__i)
+	{
+	  __uc_type __j;
+	  do
+	  {
+	__j = __uc_type(__g()) - _Gen::min();
+	  }
+	  while (__j >= __past);
+
+	  std::iter_swap(__i, __first + __j / __scaling);
+	}
+  }
+  else
+  {
+	typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
+	typedef typename __distr_type::param_type __p_type;
+	__distr_type __d;
+
+for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)
+	  std::iter_swap(__i, __first + __d(__g, __p_type(0, __i - __first)));
+  }
 }
 #endif

Re: [patch] libstdc++: Make std::shuffle faster by avoiding std::uniform_int_distribution

2016-04-30 Thread Eelis


Please ignore this, I made the error described here:

  
https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#Implementation_errors

:)

On 2016-04-30 21:15, Eelis wrote:

Hi,

The attached patch makes std::shuffle about 33% faster for the following 
testcase:

 #include 
 #include 
 #include 

 int main()
 {
 std::mt19937 gen;

 std::vector v;
 v.reserve(1);

 for (int i = 0; i != 1; ++i)
 {
 v.push_back(i);
 std::shuffle(v.begin(), v.end(), gen);
 }

 std::cout << v.front() << '\n';
 }

It achieves this by avoiding std::uniform_int_distribution when the generator's
range is large enough, which is almost always the case. This helps a lot, 
because
std::uniform_int_distribution::op() recomputes scaling factors every time.

Thoughts?

Thanks,

Eelis

[patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-05-01 Thread Eelis


Hi,

The attached patch optimizes std::shuffle for the very common case
where the generator range is large enough that a single invocation
can produce two swap positions.

This reduces the runtime of the following testcase by 37% on my machine:

int main()
{
std::mt19937 gen;

std::vector v;
v.reserve(1);
for (int i = 0; i != 1; ++i)
{
v.push_back(i);
std::shuffle(v.begin(), v.end(), gen);
}

std::cout << v.front() << '\n';
}

Thoughts?

Thanks,

Eelis

Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 235680)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3708,6 +3708,22 @@
 #endif
 
 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
+
+  template
+inline _IntType
+__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g)
+{
+  const _IntType __urngrange = __g.max() - __g.min() + 1;
+  const _IntType __scaling = __urngrange / __bound;
+  const _IntType __past = __bound * __scaling;
+
+  for (;;)
+  {
+	const _IntType __r = _IntType(__g()) - __g.min();
+	if (__r < __past) return __r / __scaling;
+  }
+}
+
   /**
*  @brief Shuffle the elements of a sequence using a uniform random
* number generator.
@@ -3740,6 +3756,40 @@
   typedef typename std::make_unsigned<_DistanceType>::type __ud_type;
   typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
   typedef typename __distr_type::param_type __p_type;
+
+  typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
+  typedef typename std::common_type::type __uc_type;
+
+  const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1;
+  const __uc_type __urange = __uc_type(__last - __first);
+
+  if (__urngrange / __urange >= __urange)
+// I.e. (__urngrange >= __urange * __urange) but without wrap issues.
+  {
+	for (_RandomAccessIterator __i = __first + 1; __i != __last; )
+	{
+	  const __uc_type __swap_range = __uc_type(__i - __first) + 1;
+
+	  if (__i + 1 == __last)
+	  {
+	const __uc_type __pos = __generate_random_index_below(__swap_range, __g);
+	std::iter_swap(__i, __first + __pos);
+	return;
+	  }
+
+	  // Use a single generator invocation to produce swap positions for
+	  // both of the next two elements:
+
+	  const __uc_type __comp_range = __swap_range * (__swap_range + 1);
+	  const __uc_type __pospos = __generate_random_index_below(__comp_range, __g);
+
+	  std::iter_swap(__i++, __first + (__pospos % __swap_range));
+	  std::iter_swap(__i++, __first + (__pospos / __swap_range));
+	}
+
+	return;
+  }
+
   __distr_type __d;
 
   for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-05-01 Thread Eelis


Sorry, forgot to include the libstdc++ list.

On 2016-05-01 16:18, Eelis wrote:

Hi,

The attached patch optimizes std::shuffle for the very common case
where the generator range is large enough that a single invocation
can produce two swap positions.

This reduces the runtime of the following testcase by 37% on my machine:

 int main()
 {
 std::mt19937 gen;

 std::vector v;
 v.reserve(1);
 for (int i = 0; i != 1; ++i)
 {
 v.push_back(i);
 std::shuffle(v.begin(), v.end(), gen);
 }

 std::cout << v.front() << '\n';
 }

Thoughts?

Thanks,

Eelis

[PATCH] Forward to correct function in std::shared_mutex::unlock().

2015-08-12 Thread Eelis


Looks like a typo. :)
Index: libstdc++-v3/include/std/shared_mutex
===
--- libstdc++-v3/include/std/shared_mutex	(revision 226840)
+++ libstdc++-v3/include/std/shared_mutex	(working copy)
@@ -331,7 +331,7 @@
 
 void lock() { _M_impl.lock(); }
 bool try_lock() { return _M_impl.try_lock(); }
-void unlock() { _M_impl.try_lock(); }
+void unlock() { _M_impl.unlock(); }
 
 // Shared ownership

[patch libstdc++] Fix assignability check in uninitialized_copy

2014-12-27 Thread Eelis


std::uninitialized_copy tries to test assignability as follows:

is_assignable<_ValueType1, _RefType>::value

This is the wrong way around. For example, when the iterators are char*, this 
ends up as

is_assignable::value

which is false. It should be

is_assignable::value

which is true.

The result is that uninitialized_copy needlessly picks the slow implementation.

Trivial fix attached. (No tests ran because I'm getting "runtest: command not 
found" errors that I don't know how to fix.)
Index: libstdc++-v3/include/bits/stl_uninitialized.h
===
--- libstdc++-v3/include/bits/stl_uninitialized.h	(revision 219070)
+++ libstdc++-v3/include/bits/stl_uninitialized.h	(working copy)
@@ -105,29 +105,29 @@
   template
 inline _ForwardIterator
 uninitialized_copy(_InputIterator __first, _InputIterator __last,
 		   _ForwardIterator __result)
 {
   typedef typename iterator_traits<_InputIterator>::value_type
 	_ValueType1;
   typedef typename iterator_traits<_ForwardIterator>::value_type
 	_ValueType2;
 #if __cplusplus < 201103L
   const bool __assignable = true;
 #else
   // trivial types can have deleted assignment
   typedef typename iterator_traits<_InputIterator>::reference _RefType;
-  const bool __assignable = is_assignable<_ValueType1, _RefType>::value;
+  const bool __assignable = is_assignable<_RefType, _ValueType1>::value;
 #endif
 
   return std::__uninitialized_copy<__is_trivial(_ValueType1)
    && __is_trivial(_ValueType2)
    && __assignable>::
 	__uninit_copy(__first, __last, __result);
 }
 
 
   template
 struct __uninitialized_fill
 {
   template
 static void

Re: [patch libstdc++] Fix assignability check in uninitialized_copy

2015-01-09 Thread Eelis


On 2015-01-09 19:03, Jonathan Wakely wrote:

The attached patch should be correct.

Tested x86_64-linux, committed to trunk and 4.9.


Awesome, thanks!

Re: [patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax

2016-02-25 Thread Eelis van der Weegen


On 2016-02-23 23:39, Jonathan Wakely wrote:

On 23/02/16 22:03 +0100, Eelis wrote:

The std::min, std::max, and std::minmax overloads that take a 
std::initializer_list all require that the list is not empty. The attached 
patch adds debug mode checks for this.


Nice, thanks for the patch.


Hi Jonathan,

Thanks for the review! Updated patch attached.

I couldn't find precedent in the libstdc++ testsuite of tests that test 
__glibcxx_assert assertions, so I put these under {min,max,minmax}/assert/, 
analogous to {min,max,minmax}/debug/. Is that ok?


Otherwise this looks good, but will have to wait until after the GCC 6 release 
now.


No hurry at all. :)

Cheers,

Eelis
Index: libstdc++-v3/include/debug/formatter.h
===
--- libstdc++-v3/include/debug/formatter.h	(revision 233636)
+++ libstdc++-v3/include/debug/formatter.h	(working copy)
@@ -127,7 +127,8 @@
 // others
 __msg_equal_allocs,
 __msg_insert_range_from_self,
-__msg_irreflexive_ordering
+__msg_irreflexive_ordering,
+__msg_empty_init_list
   };
 
   class _Error_formatter
Index: libstdc++-v3/src/c++11/debug.cc
===
--- libstdc++-v3/src/c++11/debug.cc	(revision 233636)
+++ libstdc++-v3/src/c++11/debug.cc	(working copy)
@@ -188,7 +188,8 @@
 "allocators must be equal",
 "attempt to insert with an iterator range [%1.name;, %2.name;) from this"
 " container",
-"comparison doesn't meet irreflexive requirements, assert(!(a < a))"
+"comparison doesn't meet irreflexive requirements, assert(!(a < a))",
+"%1;(): empty initializer_list"
   };
 
   void
Index: libstdc++-v3/include/debug/macros.h
===
--- libstdc++-v3/include/debug/macros.h	(revision 233636)
+++ libstdc++-v3/include/debug/macros.h	(working copy)
@@ -69,6 +69,12 @@
 		  ._M_iterator(_First, #_First)			\
 		  ._M_iterator(_Last, #_Last))
 
+// Verify that the initializer_list is non-empty.
+#define __glibcxx_check_non_empty_init_list(_List)			\
+_GLIBCXX_DEBUG_VERIFY(_List.size() != 0,\
+		  _M_message(__gnu_debug::__msg_empty_init_list)	\
+		  ._M_string(__func__))
+
 /** Verify that we can insert into *this with the iterator _Position.
  *  Insertion into a container at a specific position requires that
  *  the iterator be nonsingular, either dereferenceable or past-the-end,
Index: libstdc++-v3/include/debug/debug.h
===
--- libstdc++-v3/include/debug/debug.h	(revision 233636)
+++ libstdc++-v3/include/debug/debug.h	(working copy)
@@ -87,9 +87,13 @@
 // Verify that the container is nonempty
 # define __glibcxx_requires_nonempty() \
   __glibcxx_assert(! this->empty())
+// Verify that the initializer_list is non-empty.
+# define __glibcxx_requires_non_empty_init_list(_List) \
+  __glibcxx_assert(_List.size() != 0)
 #else
 # define __glibcxx_requires_non_empty_range(_First,_Last)
 # define __glibcxx_requires_nonempty()
+# define __glibcxx_requires_non_empty_init_list(_List)
 #endif
 
 #else
@@ -99,6 +103,8 @@
 # define __glibcxx_requires_cond(_Cond,_Msg) _GLIBCXX_DEBUG_VERIFY(_Cond,_Msg)
 # define __glibcxx_requires_valid_range(_First,_Last)	\
   __glibcxx_check_valid_range(_First,_Last)
+# define __glibcxx_requires_non_empty_init_list(_List)	\
+  __glibcxx_check_non_empty_init_list(_List)
 # define __glibcxx_requires_non_empty_range(_First,_Last)	\
   __glibcxx_check_non_empty_range(_First,_Last)
 # define __glibcxx_requires_sorted(_First,_Last)	\
Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 233636)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3452,25 +3452,37 @@
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 min(initializer_list<_Tp> __l)
-{ return *std::min_element(__l.begin(), __l.end()); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::min_element(__l.begin(), __l.end());
+}
 
   template
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 min(initializer_list<_Tp> __l, _Compare __comp)
-{ return *std::min_element(__l.begin(), __l.end(), __comp); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::min_element(__l.begin(), __l.end(), __comp);
+}
 
   template
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 max(initializer_list<_Tp> __l)
-{ return *std::max_element(__l.begin(), __l.end()); }
+{
+  __glibcxx_requires_non_empty_init_list(__l);
+  return *std::max_element(__l.begin(), __l.end());
+}
 
   template
 _GLIBCXX14_CONSTEXPR
 inline _Tp
 max(initializer_list<_Tp> __l, _Compare __comp)
-{ return *std::max_element(__l.

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-09-01 Thread Eelis van der Weegen


On 2016-09-01 17:14, Jonathan Wakely wrote:

On 31/08/16 13:45 +0100, Jonathan Wakely wrote:

On 03/05/16 16:42 +0200, Eelis van der Weegen wrote:

Ah, thanks, I forgot to re-attach when I sent to include the libstdc++ list.

On 2016-05-03 14:38, Jonathan Wakely wrote:

ENOPATCH

On 1 May 2016 at 15:21, Eelis  wrote:

Sorry, forgot to include the libstdc++ list.

On 2016-05-01 16:18, Eelis wrote:


Hi,

The attached patch optimizes std::shuffle for the very common case
where the generator range is large enough that a single invocation
can produce two swap positions.

This reduces the runtime of the following testcase by 37% on my machine:

int main()
{
std::mt19937 gen;

std::vector v;
v.reserve(1);
for (int i = 0; i != 1; ++i)
{
v.push_back(i);
std::shuffle(v.begin(), v.end(), gen);
}

std::cout << v.front() << '\n';
}

Thoughts?

Thanks,

Eelis









Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h(revision 235680)
+++ libstdc++-v3/include/bits/stl_algo.h(working copy)
@@ -3708,6 +3708,22 @@
#endif

#ifdef _GLIBCXX_USE_C99_STDINT_TR1
+
+  template


We should avoid introducing new names based on "uniform random number
generator" and use _UniformRandomBitGenerator as per
https://wg21.link/p0346r1


+inline _IntType
+__generate_random_index_below(_IntType __bound, 
_UniformRandomNumberGenerator& __g)
+{
+  const _IntType __urngrange = __g.max() - __g.min() + 1;


Similarly, let's use __urbgrange here.


+  const _IntType __scaling = __urngrange / __bound;


I think I'd like either a comment on the function documenting the
assumption about __bound and __g, or an explicit check:

  __glibcxx_assert( __scaling >= __bound );


+  const _IntType __past = __bound * __scaling;
+
+  for (;;)
+  {
+const _IntType __r = _IntType(__g()) - __g.min();
+if (__r < __past) return __r / __scaling;


This is basically the same algorithm as uniform_int_distribution so
doesn't introduce any bias, right?

Is this significantly faster than just using
uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't
need to duplicate the logic? (And people maintaining the code won't
reconvince themselves it's correct every time they look at it :-)



+  }
+}
+
 /**
  *  @brief Shuffle the elements of a sequence using a uniform random
  * number generator.
@@ -3740,6 +3756,40 @@
 typedef typename std::make_unsigned<_DistanceType>::type __ud_type;
 typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
 typedef typename __distr_type::param_type __p_type;
+
+  typedef typename 
std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
+  typedef typename std::common_type::type __uc_type;
+
+  const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1;
+  const __uc_type __urange = __uc_type(__last - __first);
+
+  if (__urngrange / __urange >= __urange)
+// I.e. (__urngrange >= __urange * __urange) but without wrap issues.
+  {
+for (_RandomAccessIterator __i = __first + 1; __i != __last; )
+{
+  const __uc_type __swap_range = __uc_type(__i - __first) + 1;
+
+  if (__i + 1 == __last)


Could we hoist this test out of the loop somehow?

If we change the loop condition to be __i+1 < __last we don't need to
test it on every iteration, and then after the loop we can just do
the final swap if (__urange % 2).


+  {
+const __uc_type __pos = __generate_random_index_below(__swap_range, 
__g);
+std::iter_swap(__i, __first + __pos);
+return;
+  }
+
+  // Use a single generator invocation to produce swap positions for
+  // both of the next two elements:
+
+  const __uc_type __comp_range = __swap_range * (__swap_range + 1);
+  const __uc_type __pospos = __generate_random_index_below(__comp_range, 
__g);
+
+  std::iter_swap(__i++, __first + (__pospos % __swap_range));
+  std::iter_swap(__i++, __first + (__pospos / __swap_range));


I think I've convinced myself this is correct :-)

Values of __pospos will be uniformly distributed in [0, __comp_range)


iThis is true, but ...


and so the / and % results will be too.


This isn't.

If __swap_range is 3, then __comp_range is 10 and
__pospos is uniformly distributed in [0, 9].

(__pospos % __swap_range) is not uniformly distributed, we get
P(0) = 0.4, P(1) = 0.3, P(2) = 0.3.

Similarly, (__pospos / __swap_range) is not uniform, we get
P(0) = 0.3, P(1) = 0.3, P(2) = 0.3, P(3) = 0.1

This means that certain permuations of the input are more likely than
others, which fails to meet the requirements of the function.

Or is there a flaw in my reasoning?


Just that if __sw

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-09-02 Thread Eelis van der Weegen


On 2016-08-31 14:45, Jonathan Wakely wrote:

Is this significantly faster than just using
uniform_int_distribution<_IntType>{0, __bound - 1}(__g) so we don't
need to duplicate the logic? (And people maintaining the code won't
reconvince themselves it's correct every time they look at it :-)

[..]

Could we hoist this test out of the loop somehow?

If we change the loop condition to be __i+1 < __last we don't need to
test it on every iteration, and then after the loop we can just do
the final swap if (__urange % 2).


Reusing std::uniform_int_distribution seems just as fast, so I've removed 
__generate_random_index_below.

I've hoisted the (__i + 1 == __last) check out of the loop (in a slightly 
different way), and it seems to shave off a couple more cycles, yay!

Updated patch attached.

Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 239895)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3772,6 +3772,46 @@
   typedef typename std::make_unsigned<_DistanceType>::type __ud_type;
   typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
   typedef typename __distr_type::param_type __p_type;
+
+  typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
+  typedef typename std::common_type::type __uc_type;
+
+  const __uc_type __urngrange = __g.max() - __g.min();
+  const __uc_type __urange = __uc_type(__last - __first);
+
+  if (__urngrange / __urange >= __urange)
+// I.e. (__urngrange >= __urange * __urange) but without wrap issues.
+  {
+	_RandomAccessIterator __i = __first + 1;
+
+	// Since we know the range isn't empty, an even number of elements
+	// means an uneven number of elements /to swap/, in which case we
+	// do the first one up front:
+
+	if ((__urange % 2) == 0)
+	{
+	  std::iter_swap(__i++, __first + (__g() & 1));
+	}
+
+	// Now we know that __last - __i is even, so we do the rest in pairs,
+	// using a single distribution invocation to produce swap positions
+	// for two successive elements at a time:
+
+	while (__i != __last)
+	{
+	  const __uc_type __swap_range = __uc_type(__i - __first) + 1;
+	  const __uc_type __comp_range = __swap_range * (__swap_range + 1);
+
+	  std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1};
+	  const __uc_type __pospos = __d(__g);
+
+	  std::iter_swap(__i++, __first + (__pospos % __swap_range));
+	  std::iter_swap(__i++, __first + (__pospos / __swap_range));
+	}
+
+	return;
+  }
+
   __distr_type __d;
 
   for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)

[patch, libstdc++] Optimize selection sampling by using generator to get two values at once

2016-10-19 Thread Eelis van der Weegen


This is the same optimization as was recently applied to std::shuffle.

It reduces the runtime of the following program by 20% on my machine:

int main()
{
std::vector a(1), b(1000);
std::mt19937 gen;

uint64_t c = 0;

for (int i = 0; i != 1000; ++i)
{
std::sample(a.begin(), a.end(), b.begin(), b.size(), 
gen);
c += uint64_t(b[32]);
}

std::cout << c;
}
Index: bits/stl_algo.h
===
--- bits/stl_algo.h	(revision 241299)
+++ bits/stl_algo.h	(working copy)
@@ -3741,6 +3741,37 @@
 
 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
   /**
+   *  @brief Generate two uniformly distributed integers using a
+   * single distribution invocation.
+   *  @param  __b0The upper bound for the first integer.
+   *  @param  __b1The upper bound for the second integer.
+   *  @param  __g A UniformRandomBitGenerator.
+   *  @return  A pair (i, j) with i and j uniformly distributed
+   *   over [0, __b0) and [0, __b1), respectively.
+   *
+   *  Requires: __b0 * __b1 <= __g.max() - __g.min().
+   *
+   *  Using uniform_int_distribution with a range that is very
+   *  small relative to the range of the generator ends up wasting
+   *  potentially expensively generated randomness, since
+   *  uniform_int_distribution does not store leftover randomness
+   *  between invocations.
+   *
+   *  If we know we want two integers in ranges that are sufficiently
+   *  small, we can compose the ranges, use a single distribution
+   *  invocation, and significantly reduce the waste.
+  */
+  template
+std::pair<_IntType, _IntType>
+__gen_two_uniform_ints(_IntType __b0, _IntType __b1,
+			   _UniformRandomBitGenerator&& __g)
+{
+  _IntType __x = uniform_int_distribution<_IntType>{0, (__b0 * __b1) - 1}(__g);
+  return std::make_pair(__x / __b1, __x % __b1);
+}
+
+  /**
*  @brief Shuffle the elements of a sequence using a uniform random
* number generator.
*  @ingroup mutating_algorithms
@@ -3801,13 +3832,12 @@
 	while (__i != __last)
 	{
 	  const __uc_type __swap_range = __uc_type(__i - __first) + 1;
-	  const __uc_type __comp_range = __swap_range * (__swap_range + 1);
 
-	  std::uniform_int_distribution<__uc_type> __d{0, __comp_range - 1};
-	  const __uc_type __pospos = __d(__g);
+	  const std::pair<__uc_type, __uc_type> __pospos =
+	__gen_two_uniform_ints(__swap_range, __swap_range + 1);
 
-	  std::iter_swap(__i++, __first + (__pospos % __swap_range));
-	  std::iter_swap(__i++, __first + (__pospos / __swap_range));
+	  std::iter_swap(__i++, __first + __pospos.first);
+	  std::iter_swap(__i++, __first + __pospos.second);
 	}
 
 	return;
@@ -5695,9 +5725,51 @@
 {
   using __distrib_type = uniform_int_distribution<_Size>;
   using __param_type = typename __distrib_type::param_type;
+  using _USize = typename std::make_unsigned<_Size>::type;
+  using _Gen = typename std::remove_reference<_UniformRandomBitGenerator>::type;
+  using __uc_type = typename std::common_type::type;
+
   __distrib_type __d{};
   _Size __unsampled_sz = std::distance(__first, __last);
-  for (__n = std::min(__n, __unsampled_sz); __n != 0; ++__first)
+  __n = std::min(__n, __unsampled_sz);
+
+  // If possible, we use __gen_two_uniform_ints to efficiently produce
+  // two random numbers using a single distribution invocation:
+
+  const __uc_type __urngrange = __g.max() - __g.min();
+  if (__urngrange / __uc_type(__unsampled_sz) >= __uc_type(__unsampled_sz))
+// I.e. (__urngrange >= __unsampled_sz * __unsampled_sz) but without wrap issues.
+{
+	  while (__n != 0 && __unsampled_sz >= 2)
+	{
+	  const std::pair<_Size, _Size> p =
+		__gen_two_uniform_ints(__unsampled_sz, __unsampled_sz - 1, __g);
+
+	  --__unsampled_sz;
+	  if (p.first < __n)
+		{
+		  *__out++ = *__first;
+		  --__n;
+		}
+
+	  ++__first;
+
+	  if (__n == 0) break;
+
+	  --__unsampled_sz;
+	  if (p.second < __n)
+		{
+		  *__out++ = *__first;
+		  --__n;
+		}
+
+	  ++__first;
+	}
+}
+
+  // The loop above is otherwise equivalent to this one-at-a-time version:
+
+  for (; __n != 0; ++__first)
 	if (__d(__g, __param_type{0, --__unsampled_sz}) < __n)
 	  {
 	*__out++ = *__first;

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

2016-05-03 Thread Eelis van der Weegen


Ah, thanks, I forgot to re-attach when I sent to include the libstdc++ list.

On 2016-05-03 14:38, Jonathan Wakely wrote:

ENOPATCH

On 1 May 2016 at 15:21, Eelis  wrote:

Sorry, forgot to include the libstdc++ list.

On 2016-05-01 16:18, Eelis wrote:


Hi,

The attached patch optimizes std::shuffle for the very common case
where the generator range is large enough that a single invocation
can produce two swap positions.

This reduces the runtime of the following testcase by 37% on my machine:

  int main()
  {
  std::mt19937 gen;

  std::vector v;
  v.reserve(1);
  for (int i = 0; i != 1; ++i)
  {
  v.push_back(i);
  std::shuffle(v.begin(), v.end(), gen);
  }

  std::cout << v.front() << '\n';
  }

Thoughts?

Thanks,

Eelis






Index: libstdc++-v3/include/bits/stl_algo.h
===
--- libstdc++-v3/include/bits/stl_algo.h	(revision 235680)
+++ libstdc++-v3/include/bits/stl_algo.h	(working copy)
@@ -3708,6 +3708,22 @@
 #endif
 
 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
+
+  template
+inline _IntType
+__generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g)
+{
+  const _IntType __urngrange = __g.max() - __g.min() + 1;
+  const _IntType __scaling = __urngrange / __bound;
+  const _IntType __past = __bound * __scaling;
+
+  for (;;)
+  {
+	const _IntType __r = _IntType(__g()) - __g.min();
+	if (__r < __past) return __r / __scaling;
+  }
+}
+
   /**
*  @brief Shuffle the elements of a sequence using a uniform random
* number generator.
@@ -3740,6 +3756,40 @@
   typedef typename std::make_unsigned<_DistanceType>::type __ud_type;
   typedef typename std::uniform_int_distribution<__ud_type> __distr_type;
   typedef typename __distr_type::param_type __p_type;
+
+  typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen;
+  typedef typename std::common_type::type __uc_type;
+
+  const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1;
+  const __uc_type __urange = __uc_type(__last - __first);
+
+  if (__urngrange / __urange >= __urange)
+// I.e. (__urngrange >= __urange * __urange) but without wrap issues.
+  {
+	for (_RandomAccessIterator __i = __first + 1; __i != __last; )
+	{
+	  const __uc_type __swap_range = __uc_type(__i - __first) + 1;
+
+	  if (__i + 1 == __last)
+	  {
+	const __uc_type __pos = __generate_random_index_below(__swap_range, __g);
+	std::iter_swap(__i, __first + __pos);
+	return;
+	  }
+
+	  // Use a single generator invocation to produce swap positions for
+	  // both of the next two elements:
+
+	  const __uc_type __comp_range = __swap_range * (__swap_range + 1);
+	  const __uc_type __pospos = __generate_random_index_below(__comp_range, __g);
+
+	  std::iter_swap(__i++, __first + (__pospos % __swap_range));
+	  std::iter_swap(__i++, __first + (__pospos / __swap_range));
+	}
+
+	return;
+  }
+
   __distr_type __d;
 
   for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i)

[patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax

Re: [PATCH] Implement -fdiagnostics-parseable-fixits

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

[patch] libstdc++: Make std::shuffle faster by avoiding std::uniform_int_distribution

Re: [patch] libstdc++: Make std::shuffle faster by avoiding std::uniform_int_distribution

[patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

[PATCH] Forward to correct function in std::shared_mutex::unlock().

[patch libstdc++] Fix assignability check in uninitialized_copy

Re: [patch libstdc++] Fix assignability check in uninitialized_copy

Re: [patch, libstdc++] In debug mode, diagnose empty initializer_list in min/max/minmax

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

[patch, libstdc++] Optimize selection sampling by using generator to get two values at once

Re: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible

16 matches

Site Navigation

Mail list logo

Footer information