From 5f6dfb46cd988debc0072491ae54bc8fc707d0b8 Mon Sep 17 00:00:00 2001
From: "Duncan P. N. Exon Smith" <dexonsmith@apple.com>
Date: Wed, 29 Jul 2015 11:48:52 -0700
Subject: [PATCH 2/2] algorithm: Avoid moving initial subsequences in
 std::inplace_merge()

r227808 (and r227811) changed `std::inplace_merge()` to meet the
complexity guidelines of the standard, which strictly restricts the
number of calls to the predicate.  Unfortunately, this removed an
optimization that avoided moving any elements if the sequence was
already fully sorted.  Moreover, any initial/final subsequence that was
in the right spot would previously never be touched.

r243530, whose main purpose was to fix a latent self-move bug made more
common by r227808/r227811, already brought half of this optimization
back.  It avoids unnecessarily moving the final subsequence.

This commit closes the loop, bringing back the other half of the
optimization without re-introducing the extra predicate call that
r227808 avoided.
---
 include/algorithm                                  | 33 ++++++++++--
 .../alg.sorting/alg.merge/inplace_merge.pass.cpp   | 60 +++++++++++++++++++---
 2 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/include/algorithm b/include/algorithm
index efd9d09..6cae566 100644
--- a/include/algorithm
+++ b/include/algorithm
@@ -4392,13 +4392,34 @@ void __half_inplace_merge(_InputIterator1 __first1, _InputIterator1 __last1,
 template <class _Compare, class _ForwardIterator>
 void
 __buffered_inplace_merge(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
-                _Compare __comp, typename iterator_traits<_ForwardIterator>::value_type* __buff,
-                __destruct_n &__d)
+                _Compare __comp, typename iterator_traits<_ForwardIterator>::difference_type __len1,
+                typename iterator_traits<_ForwardIterator>::value_type* __buff, __destruct_n &__d)
 {
+    // precondition: __first == __middle || __middle != __last
+    // precondition: __len1 == distance(__first, __middle)
+
+    // shrink [__first, __middle) as much as possible (with no moves), returning if it shrinks to 0
+    for (; true; ++__first, (void) --__len1)
+    {
+        if (__len1 == 0)
+            return;
+        if (__comp(*__middle, *__first))
+            break;
+    }
+    // __first < __middle < __last
+    // *__first > *__middle
+
+    // move [__first, __middle) to the buffer
     typedef typename iterator_traits<_ForwardIterator>::value_type value_type;
     value_type* __p = __buff;
     for (_ForwardIterator __i = __first; __i != __middle; __d.__incr((value_type*)0), (void) ++__i, ++__p)
         ::new(__p) value_type(_VSTD::move(*__i));
+
+    // merge __middle immediately to avoid an extra comparison
+    *__first = _VSTD::move(*__middle);
+    ++__middle;
+    ++__first;
+
     __half_inplace_merge(__buff, __p, __middle, __last, __first, __comp);
 }
 
@@ -4414,14 +4435,16 @@ __buffered_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator
     unique_ptr<value_type, __destruct_n&> __h2(__buff, __d);
     if (__len1 <= __len2)
     {
-        __buffered_inplace_merge(__first, __middle, __last, __comp, __buff,
-                                 __d);
+        // __len2 > 0 || __len1 == 0
+        __buffered_inplace_merge(__first, __middle, __last,
+                                 __comp, __len1, __buff, __d);
     }
     else
     {
+        // __len1 > 0
         typedef reverse_iterator<_BidirectionalIterator> _RBi;
         __buffered_inplace_merge(_RBi(__last), _RBi(__middle), _RBi(__first),
-                                 __negate<_Compare>(__comp), __buff, __d);
+                                 __negate<_Compare>(__comp), __len2, __buff, __d);
     }
 }
 
diff --git a/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp b/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp
index 9065b99..2b964c0 100644
--- a/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp
+++ b/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp
@@ -20,30 +20,71 @@
 
 #include "test_iterators.h"
 
+template <class Value> bool is_touched(Value& v) { return false; }
+template <class Value> void untouch(Value& v) {}
+
 #ifndef TEST_STD_VER >= 11
 struct S {
-	S() : i_(0) {}
-	S(int i) : i_(i) {}
+	S() : i_(0), touched_(false) {}
+	S(int i) : i_(i), touched_(false) {}
 	
-	S(const S&  rhs) : i_(rhs.i_) {}
-	S(      S&& rhs) : i_(rhs.i_) { rhs.i_ = -1; }
+	S(const S&  rhs) : i_(rhs.i_), touched_(true) {}
+	S(      S&& rhs) : i_(rhs.i_), touched_(true) { rhs.i_ = -1; }
 	
-	S& operator =(const S&  rhs) { i_ = rhs.i_;              return *this; }
-	S& operator =(      S&& rhs) { i_ = rhs.i_; rhs.i_ = -2; assert(this != &rhs); return *this; }
-	S& operator =(int i)         { i_ = i;                   return *this; }
+	S& operator =(const S&  rhs) { i_ = rhs.i_;              touched_ = true;  return *this; }
+	S& operator =(      S&& rhs) { i_ = rhs.i_; rhs.i_ = -2; touched_ = true;  assert(this != &rhs); return *this; }
+	S& operator =(int i)         { i_ = i;                   touched_ = false; return *this; }
 	
 	bool operator  <(const S&  rhs) const { return i_ < rhs.i_; }
 	bool operator ==(const S&  rhs) const { return i_ == rhs.i_; }
 	bool operator ==(int i)         const { return i_ == i; }
 
-	void set(int i) { i_ = i; }
+	void set(int i) { i_ = i; touched_ = false; }
 	
 	int i_;
+	bool touched_;
 	};
+template <> bool is_touched(S& v) { return v.touched_; }
+template <> void untouch(S& v) { v.touched_ = false; }
 #endif
 
 template <class Iter>
 void
+test_subsequence(unsigned N, unsigned M1, unsigned M2,
+                 typename std::iterator_traits<Iter>::value_type* ia)
+{
+    assert(M1 <= M2);
+    assert(M2 <= N);
+
+    if (M2 != N) {
+        // Check that the final subsequence isn't touched.
+        assert(std::is_sorted(Iter(ia), Iter(ia+N)));
+        std::random_shuffle(ia, ia+M2);
+        std::sort(ia, ia+M1);
+        std::sort(ia+M1, ia+M2);
+        for (unsigned i = M2; i < N; ++i)
+            untouch(ia[i]);
+        std::inplace_merge(Iter(ia), Iter(ia+M1), Iter(ia+N));
+        for (unsigned i = M2; i < N; ++i)
+            assert(!is_touched(ia[i]));
+    }
+
+    if (M1 != 0) {
+        // Check that the initial subsequence isn't touched.
+        assert(std::is_sorted(Iter(ia), Iter(ia+N)));
+        std::random_shuffle(ia + M1, ia+N);
+        std::sort(ia+M1, ia+M2);
+        std::sort(ia+M2, ia+N);
+        for (unsigned i = 0; i < M1; ++i)
+            untouch(ia[i]);
+        std::inplace_merge(Iter(ia), Iter(ia+M2), Iter(ia+N));
+        for (unsigned i = 0; i < M1; ++i)
+            assert(!is_touched(ia[i]));
+    }
+}
+
+template <class Iter>
+void
 test_one(unsigned N, unsigned M)
 {
     typedef typename std::iterator_traits<Iter>::value_type value_type;
@@ -60,6 +101,9 @@ test_one(unsigned N, unsigned M)
         assert(ia[0] == 0);
         assert(ia[N-1] == N-1);
         assert(std::is_sorted(ia, ia+N));
+
+        test_subsequence<Iter>(N, M / 2, M, ia);
+        test_subsequence<Iter>(N, M, M + (N - M) / 2, ia);
     }
     delete [] ia;
 }
-- 
2.3.8 (Apple Git-58)

