On 12/21/18 9:57 PM, Jonathan Wakely wrote:
On 29/10/18 07:06 +0100, François Dumont wrote:
Hi
Some feedback regarding this patch ?
Sorry this got missed, please resubmit during stage 1.
You haven't CC'd the original patch author (chang jc) to give them a
chance to comment on your proposed changes to the patch.
The attached PDF on PR libstdc++/83938 has extensive discussion of the
performance issue, but I don't see any for your version. Some
performance benchmarks for your version would be helpful.
Here is this patch again.
This time it is much closer to John's original one, I just kept my
change on the size of the temporary buffer which doesn't need to be as
large as it is currently allocated, especially with John's patch.
The performance tests are showing the good improvements, attached are
the before/after. Surprisingly I do not see any change regarding
allocated memory, I guess measures are not accurate enough. However
working on them also show that current implementation is fragile. If you
reduce the allowed allocated memory too much you end up with a stack
overflow because of the recursive implementation.
I already have a patch to keep on trying to allocate memory as long as
above a given threshold rather than 0 but I am also working on making
implementation non-recursive. We'll see if even a buffer of size 1 is
better than no buffer at all then.
PR libstdc++/83938
* include/bits/stl_algo.h:
(__merge_adaptive): When buffer too small consider smallest range first
and cut based on buffer size.
(__inplace_merge): Take temporary buffer length from smallest range.
(__stable_sort): Limit temporary buffer length.
* include/bits/stl_tempbuf.h (get_temporary_buffer): Change function
to reduce requested buffer length on allocation failure.
* testsuite/performance/25_algorithms/inplace_merge.cc: New.
* testsuite/performance/25_algorithms/stable_sort.cc: Rework to allow
execution with different level of memory.
François
inplace_merge.cc reverse 19r 19u
0s 48mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 18r 18u
0s 0mem 0pf
inplace_merge.cc reverse 10r 10u
0s 0mem 0pf
inplace_merge.cc forwards 8r 8u
0s 0mem 0pf
inplace_merge.cc random 22r 22u
0s 0mem 0pf
inplace_merge.cc bench 1/2 memory 1891r 1884u
7s 928mem 0pf
inplace_merge.cc reverse 19r 18u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 18r 18u
0s 0mem 0pf
inplace_merge.cc reverse 10r 10u
0s 0mem 0pf
inplace_merge.cc forwards 3r 3u
0s 0mem 0pf
inplace_merge.cc random 22r 22u
0s 0mem 0pf
inplace_merge.cc bench 1/4 memory 1867r 1861u
6s 192mem 0pf
inplace_merge.cc reverse 17r 17u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 25r 24u
0s 0mem 0pf
inplace_merge.cc reverse 28r 28u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 289r 289u
0s 0mem 0pf
inplace_merge.cc bench no memory 2155r 2149u
6s 192mem 0pf
stable_sort.cc reverse 240r 240u
1s 0mem 0pf
stable_sort.cc forwards 205r 205u
0s 0mem 0pf
stable_sort.cc random 493r 493u
0s 0mem 0pf
stable_sort.cc bench full memory 945r 939u
5s 752mem 0pf
stable_sort.cc reverse 236r 236u
0s 0mem 0pf
stable_sort.cc forwards 199r 199u
0s 0mem 0pf
stable_sort.cc random 487r 487u
0s 0mem 0pf
stable_sort.cc bench 1/2 memory 928r 925u
3s 96mem 0pf
stable_sort.cc reverse 240r 240u
0s 0mem 0pf
stable_sort.cc forwards 203r 203u
0s 0mem 0pf
stable_sort.cc random 486r 487u
0s 0mem 0pf
stable_sort.cc bench 1/4 memory 935r 932u
5s 96mem 0pf
stable_sort.cc reverse 829r 829u
0s 0mem 0pf
stable_sort.cc forwards 143r 143u
0s 0mem 0pf
stable_sort.cc random 487r 486u
0s 0mem 0pf
stable_sort.cc bench no memory 1465r 1461u
2s 96mem 0pf
inplace_merge.cc reverse 19r 19u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 19r 19u
0s 0mem 0pf
inplace_merge.cc reverse 11r 10u
0s 0mem 0pf
inplace_merge.cc forwards 8r 8u
0s 0mem 0pf
inplace_merge.cc random 22r 22u
0s 0mem 0pf
inplace_merge.cc bench 1/2 memory 1938r 1925u
12s 928mem 0pf
inplace_merge.cc reverse 19r 19u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 18r 19u
0s 0mem 0pf
inplace_merge.cc reverse 10r 11u
0s 0mem 0pf
inplace_merge.cc forwards 3r 2u
0s 0mem 0pf
inplace_merge.cc random 22r 22u
0s 0mem 0pf
inplace_merge.cc bench 1/4 memory 1922r 1915u
7s 192mem 0pf
inplace_merge.cc reverse 18r 17u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 23r 24u
0s 0mem 0pf
inplace_merge.cc reverse 28r 27u
0s 0mem 0pf
inplace_merge.cc forwards 0r 0u
0s 0mem 0pf
inplace_merge.cc random 288r 289u
0s 0mem 0pf
inplace_merge.cc bench no memory 2204r 2196u
9s 192mem 0pf
stable_sort.cc reverse 234r 234u
0s 0mem 0pf
stable_sort.cc forwards 201r 200u
1s 0mem 0pf
stable_sort.cc random 485r 484u
1s 0mem 0pf
stable_sort.cc bench full memory 927r 920u
5s 752mem 0pf
stable_sort.cc reverse 233r 231u
1s 0mem 0pf
stable_sort.cc forwards 198r 198u
0s 0mem 0pf
stable_sort.cc random 478r 478u
0s 0mem 0pf
stable_sort.cc bench 1/2 memory 915r 910u
4s 96mem 0pf
stable_sort.cc reverse 236r 236u
0s 0mem 0pf
stable_sort.cc forwards 201r 200u
0s 0mem 0pf
stable_sort.cc random 478r 478u
1s 0mem 0pf
stable_sort.cc bench 1/4 memory 921r 916u
4s 96mem 0pf
stable_sort.cc reverse 830r 830u
0s 0mem 0pf
stable_sort.cc forwards 146r 146u
0s 0mem 0pf
stable_sort.cc random 479r 478u
0s 0mem 0pf
stable_sort.cc bench no memory 1461r 1456u
5s 96mem 0pf
diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h
index 1ba447bcb6e..e36efb515e8 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -2418,7 +2418,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BidirectionalIterator __second_cut = __middle;
_Distance __len11 = 0;
_Distance __len22 = 0;
- if (__len1 > __len2)
+ if (__len1 < __len2)
{
__len11 = __len1 / 2;
std::advance(__first_cut, __len11);
@@ -2439,14 +2439,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BidirectionalIterator __new_middle
= std::__rotate_adaptive(__first_cut, __middle, __second_cut,
- __len1 - __len11, __len22, __buffer,
- __buffer_size);
- std::__merge_adaptive(__first, __first_cut, __new_middle, __len11,
- __len22, __buffer, __buffer_size, __comp);
+ __len1 - __len11, __len22,
+ __buffer, __buffer_size);
+ std::__merge_adaptive(__first, __first_cut, __new_middle,
+ __len11, __len22,
+ __buffer, __buffer_size, __comp);
std::__merge_adaptive(__new_middle, __second_cut, __last,
- __len1 - __len11,
- __len2 - __len22, __buffer,
- __buffer_size, __comp);
+ __len1 - __len11, __len2 - __len22,
+ __buffer, __buffer_size, __comp);
}
}
@@ -2520,7 +2520,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _DistanceType __len2 = std::distance(__middle, __last);
typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf;
- _TmpBuf __buf(__first, __len1 + __len2);
+ _TmpBuf __buf(__first, std::min(__len1, __len2));
if (__buf.begin() == 0)
std::__merge_without_buffer
@@ -2728,6 +2728,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp);
std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp);
}
+
std::__merge_adaptive(__first, __middle, __last,
_Distance(__middle - __first),
_Distance(__last - __middle),
@@ -4980,8 +4981,11 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
typedef typename iterator_traits<_RandomAccessIterator>::difference_type
_DistanceType;
+ if (__first == __last)
+ return;
+
typedef _Temporary_buffer<_RandomAccessIterator, _ValueType> _TmpBuf;
- _TmpBuf __buf(__first, std::distance(__first, __last));
+ _TmpBuf __buf(__first, (__last - __first + 1) / 2);
if (__buf.begin() == 0)
std::__inplace_stable_sort(__first, __last, __comp);
diff --git a/libstdc++-v3/include/bits/stl_tempbuf.h b/libstdc++-v3/include/bits/stl_tempbuf.h
index fa78ee53eb4..b6ad9ee6a46 100644
--- a/libstdc++-v3/include/bits/stl_tempbuf.h
+++ b/libstdc++-v3/include/bits/stl_tempbuf.h
@@ -95,7 +95,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::nothrow));
if (__tmp != 0)
return std::pair<_Tp*, ptrdiff_t>(__tmp, __len);
- __len /= 2;
+ __len = __len == 1 ? 0 : ((__len + 1) / 2);
}
return std::pair<_Tp*, ptrdiff_t>(static_cast<_Tp*>(0), 0);
}
diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc
new file mode 100644
index 00000000000..8522fcc8044
--- /dev/null
+++ b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc
@@ -0,0 +1,130 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <vector>
+#include <algorithm>
+
+#include <testsuite_new_operators.h>
+#include <testsuite_performance.h>
+
+const int max_size = 10000000;
+
+void bench(int mem_threshold, int pivot_index,
+ std::vector<int> revv,
+ std::vector<int> fwdv,
+ std::vector<int> rndv)
+{
+ using namespace __gnu_test;
+
+ time_counter time;
+ resource_counter resource;
+
+ std::vector<int>::iterator pivot = revv.begin() + pivot_index;
+ std::stable_sort(revv.begin(), pivot);
+ std::stable_sort(pivot, revv.end());
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(revv.begin(), pivot, revv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+
+ report_performance(__FILE__, "reverse", time, resource);
+ clear_counters(time, resource);
+
+ pivot = fwdv.begin() + pivot_index;
+ std::stable_sort(fwdv.begin(), pivot);
+ std::stable_sort(pivot, fwdv.end());
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(fwdv.begin(), pivot, fwdv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+
+ report_performance(__FILE__, "forwards", time, resource);
+ clear_counters(time, resource);
+
+ pivot = rndv.begin() + pivot_index;
+ std::stable_sort(rndv.begin(), pivot);
+ std::stable_sort(pivot, rndv.end());
+
+ set_new_limit(mem_threshold);
+
+ start_counters(time, resource);
+ std::inplace_merge(rndv.begin(), pivot, rndv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+ report_performance(__FILE__, "random", time, resource);
+}
+
+int main()
+{
+ using namespace __gnu_test;
+
+ // No constraint to build vectors.
+ set_new_limit(~size_t(0));
+
+ std::vector<int> revv(max_size);
+ for (int i = 0; i < max_size; ++i)
+ revv[i] = -i;
+
+ std::vector<int> fwdv(max_size);
+ for (int i = 0; i < max_size; ++i)
+ fwdv[i] = i;
+
+ // a simple pseudo-random series which does not rely on rand() and friends
+ std::vector<int> rndv(max_size);
+ rndv[0] = 0;
+ for (int i = 1; i < max_size; ++i)
+ rndv[i] = (rndv[i-1] + 110211473) * 745988807;
+
+ time_counter time;
+ resource_counter resource;
+
+ start_counters(time, resource);
+ // Limit to half the expected size of the sorted array.
+ bench(max_size * sizeof(int) / 2, 10, revv, fwdv, rndv);
+ bench(max_size * sizeof(int) / 2, max_size / 2, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1/2 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Limit to the fourth.
+ bench(max_size * sizeof(int) / 4, 10, revv, fwdv, rndv);
+ bench(max_size * sizeof(int) / 4, max_size / 2, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1/4 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Forbid any allocation.
+ bench(0, 10, revv, fwdv, rndv);
+ bench(0, max_size / 2, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench no memory", time, resource);
+ return 0;
+}
diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
index e79e0a7f6b2..f997b2a72ce 100644
--- a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
+++ b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
@@ -17,49 +17,102 @@
#include <vector>
#include <algorithm>
+
+#include <testsuite_new_operators.h>
#include <testsuite_performance.h>
-int main()
+const int max_size = 10000000;
+
+void bench(size_t mem_threshold,
+ std::vector<int> revv,
+ std::vector<int> fwdv,
+ std::vector<int> rndv)
{
using namespace __gnu_test;
time_counter time;
resource_counter resource;
- const int max_size = 10000000;
-
- std::vector<int> v(max_size);
-
- for (int i = 0; i < max_size; ++i)
- v[i] = -i;
+ set_new_limit(mem_threshold);
start_counters(time, resource);
- std::stable_sort(v.begin(), v.end());
+ std::stable_sort(revv.begin(), revv.end());
stop_counters(time, resource);
+ set_new_limit(~size_t(0));
report_performance(__FILE__, "reverse", time, resource);
clear_counters(time, resource);
- for (int i = 0; i < max_size; ++i)
- v[i] = i;
+ set_new_limit(mem_threshold);
start_counters(time, resource);
- std::stable_sort(v.begin(), v.end());
+ std::stable_sort(fwdv.begin(), fwdv.end());
stop_counters(time, resource);
+ set_new_limit(~size_t(0));
report_performance(__FILE__, "forwards", time, resource);
clear_counters(time, resource);
- // a simple psuedo-random series which does not rely on rand() and friends
- v[0] = 0;
+ start_counters(time, resource);
+ std::stable_sort(rndv.begin(), rndv.end());
+ stop_counters(time, resource);
+
+ set_new_limit(~size_t(0));
+ report_performance(__FILE__, "random", time, resource);
+}
+
+int main()
+{
+ using namespace __gnu_test;
+
+ // No memory constraint.
+ set_new_limit(~size_t(0));
+
+ std::vector<int> revv(max_size);
+ for (int i = 0; i < max_size; ++i)
+ revv[i] = -i;
+
+ std::vector<int> fwdv(max_size);
+ for (int i = 0; i < max_size; ++i)
+ fwdv[i] = i;
+
+ // a simple pseudo-random series which does not rely on rand() and friends
+ std::vector<int> rndv(max_size);
+ rndv[0] = 0;
for (int i = 1; i < max_size; ++i)
- v[i] = (v[i-1] + 110211473) * 745988807;
+ rndv[i] = (rndv[i-1] + 110211473) * 745988807;
+
+ time_counter time;
+ resource_counter resource;
start_counters(time, resource);
- std::stable_sort(v.begin(), v.end());
+ bench(~size_t(0), revv, fwdv, rndv);
stop_counters(time, resource);
- report_performance(__FILE__, "random", time, resource);
+ report_performance(__FILE__, "bench full memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Limit to half the expected size of the sorted array.
+ bench(max_size * sizeof(int) / 2, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1/2 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Limit to the fourth.
+ bench(max_size * sizeof(int) / 4, revv, fwdv, rndv);
+ stop_counters(time, resource);
+
+ report_performance(__FILE__, "bench 1/4 memory", time, resource);
+ clear_counters(time, resource);
+
+ start_counters(time, resource);
+ // Forbid any allocation.
+ bench(0, revv, fwdv, rndv);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "bench no memory", time, resource);
return 0;
}