Hi,
this patch turns memmove to memcpy where we can and also avoids extra
guard checking if block is non-empty.  This does not show as performance
improvement in my push_back micro-benchmark because vector rellocation
does not happen that often. In general, however, we optimize memcpy better
then memove (can inline it in some cases).  Saving extra 3 instructions
makes push_back more likely to be inlined though (estimate is now 23)

I also filled in PR112653.  I think for default allocator we should be
able to work out from PTA that the memmove can be memcpy.

Honestly I am not quite sure if I need to have the first
__relocat_copy_a_1 tempalte.  It handles the case we can't use memmove,
but in my limited C++ skills I don't see how to get rid of it or make it
a wrapper for __relocat_a_1 which is identical.

Regtested on x86_64-linux.

libstdc++-v3/ChangeLog:

        * include/bits/stl_uninitialized.h (__relocate_copy_a_1): New member 
fnctions.
        (__relocate_a_1): Do not check count to be non-zero
        before calling memmove.
        (__relocate_copy_a): New member function.
        * include/bits/stl_vector.h (_S_do_relocate_copy): New member function.
        * include/bits/vector.tcc (reserve, _M_realloc_append, 
_M_realloc_insert, _M_default_append):
        Use _S_relocate_copy.

diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h 
b/libstdc++-v3/include/bits/stl_uninitialized.h
index 1282af3bc43..983fa315e1b 100644
--- a/libstdc++-v3/include/bits/stl_uninitialized.h
+++ b/libstdc++-v3/include/bits/stl_uninitialized.h
@@ -1104,6 +1104,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                 std::__addressof(*__first), __alloc);
       return __cur;
     }
+  template <typename _InputIterator, typename _ForwardIterator,
+           typename _Allocator>
+    _GLIBCXX20_CONSTEXPR
+    inline _ForwardIterator
+    __relocate_copy_a_1(_InputIterator __first, _InputIterator __last,
+                       _ForwardIterator __result, _Allocator& __alloc)
+    noexcept(noexcept(std::__relocate_object_a(std::addressof(*__result),
+                                              std::addressof(*__first),
+                                              __alloc)))
+    {
+      typedef typename iterator_traits<_InputIterator>::value_type
+       _ValueType;
+      typedef typename iterator_traits<_ForwardIterator>::value_type
+       _ValueType2;
+      static_assert(std::is_same<_ValueType, _ValueType2>::value,
+         "relocation is only possible for values of the same type");
+      _ForwardIterator __cur = __result;
+      for (; __first != __last; ++__first, (void)++__cur)
+       std::__relocate_object_a(std::__addressof(*__cur),
+                                std::__addressof(*__first), __alloc);
+      return __cur;
+    }
 
 #if _GLIBCXX_HOSTED
   template <typename _Tp, typename _Up>
@@ -1114,20 +1136,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                   [[__maybe_unused__]] allocator<_Up>& __alloc) noexcept
     {
       ptrdiff_t __count = __last - __first;
-      if (__count > 0)
-       {
 #ifdef __cpp_lib_is_constant_evaluated
-         if (std::is_constant_evaluated())
+      if (std::is_constant_evaluated())
+       {
+         // Can't use memmove. Wrap the pointer so that __relocate_a_1
+         // resolves to the non-trivial overload above.
+         if (__count > 0)
            {
-             // Can't use memmove. Wrap the pointer so that __relocate_a_1
-             // resolves to the non-trivial overload above.
              __gnu_cxx::__normal_iterator<_Tp*, void> __out(__result);
              __out = std::__relocate_a_1(__first, __last, __out, __alloc);
              return __out.base();
            }
+         return __result;
+       }
 #endif
-         __builtin_memmove(__result, __first, __count * sizeof(_Tp));
+      __builtin_memmove(__result, __first, __count * sizeof(_Tp));
+      return __result + __count;
+    }
+  template <typename _Tp, typename _Up>
+    _GLIBCXX20_CONSTEXPR
+    inline __enable_if_t<std::__is_bitwise_relocatable<_Tp>::value, _Tp*>
+    __relocate_copy_a_1(_Tp* __first, _Tp* __last,
+                       _Tp* __result,
+                       [[__maybe_unused__]] allocator<_Up>& __alloc) noexcept
+    {
+      ptrdiff_t __count = __last - __first;
+#ifdef __cpp_lib_is_constant_evaluated
+      if (std::is_constant_evaluated())
+       {
+         // Can't use memcpy. Wrap the pointer so that __relocate_copy_a_1
+         // resolves to the non-trivial overload above.
+         if (__count > 0)
+           {
+             __gnu_cxx::__normal_iterator<_Tp*, void> __out(__result);
+             __out = std::__relocate_a_1(__first, __last, __out, __alloc);
+             return __out.base();
+           }
+         return __result;
        }
+#endif
+      __builtin_memcpy(__result, __first, __count * sizeof(_Tp));
       return __result + __count;
     }
 #endif
@@ -1146,6 +1194,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                 std::__niter_base(__last),
                                 std::__niter_base(__result), __alloc);
     }
+  template <typename _InputIterator, typename _ForwardIterator,
+           typename _Allocator>
+    _GLIBCXX20_CONSTEXPR
+    inline _ForwardIterator
+    __relocate_copy_a(_InputIterator __first, _InputIterator __last,
+                    _ForwardIterator __result, _Allocator& __alloc)
+    noexcept(noexcept(__relocate_copy_a_1(std::__niter_base(__first),
+                                         std::__niter_base(__last),
+                                         std::__niter_base(__result), 
__alloc)))
+    {
+      return std::__relocate_copy_a_1(std::__niter_base(__first),
+                                     std::__niter_base(__last),
+                                     std::__niter_base(__result), __alloc);
+    }
 
   /// @endcond
 #endif // C++11
diff --git a/libstdc++-v3/include/bits/stl_vector.h 
b/libstdc++-v3/include/bits/stl_vector.h
index 973f4d7e2e9..4f9dba6c3fe 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -507,6 +507,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 #else
        using __do_it = __bool_constant<_S_use_relocate()>;
        return _S_do_relocate(__first, __last, __result, __alloc, __do_it{});
+#endif
+      }
+      static pointer
+      _S_do_relocate_copy(pointer __first, pointer __last, pointer __result,
+                         _Tp_alloc_type& __alloc, true_type) noexcept
+      {
+       return std::__relocate_a(__first, __last, __result, __alloc);
+      }
+
+      static pointer
+      _S_do_relocate_copy(pointer, pointer, pointer __result,
+                         _Tp_alloc_type&, false_type) noexcept
+      { return __result; }
+      // same as _S_relocate but assumes that the destination block
+      // is disjoint (as in memcpy)
+      static _GLIBCXX20_CONSTEXPR pointer
+      _S_relocate_copy(pointer __first, pointer __last, pointer __result,
+                      _Tp_alloc_type& __alloc) noexcept
+      {
+#if __cpp_if_constexpr
+       // All callers have already checked _S_use_relocate() so just do it.
+       return std::__relocate_copy_a(__first, __last, __result, __alloc);
+#else
+       using __do_it = __bool_constant<_S_use_relocate()>;
+       return _S_do_relocate_copy(__first, __last, __result, __alloc, 
__do_it{});
 #endif
       }
 #endif // C++11
diff --git a/libstdc++-v3/include/bits/vector.tcc 
b/libstdc++-v3/include/bits/vector.tcc
index 0ccef7911b3..2468ad85f49 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -77,8 +77,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
          if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
            {
              __tmp = this->_M_allocate(__n);
-             _S_relocate(this->_M_impl._M_start, this->_M_impl._M_finish,
-                         __tmp, _M_get_Tp_allocator());
+             _S_relocate_copy(this->_M_impl._M_start, this->_M_impl._M_finish,
+                              __tmp, _M_get_Tp_allocator());
            }
          else
 #endif
@@ -515,11 +515,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
        if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
          {
            // Relocation cannot throw.
-           __new_finish = _S_relocate(__old_start, __position.base(),
-                                      __new_start, _M_get_Tp_allocator());
+           __new_finish = _S_relocate_copy(__old_start, __position.base(),
+                                           __new_start, _M_get_Tp_allocator());
            ++__new_finish;
-           __new_finish = _S_relocate(__position.base(), __old_finish,
-                                      __new_finish, _M_get_Tp_allocator());
+           __new_finish = _S_relocate_copy(__position.base(), __old_finish,
+                                           __new_finish, 
_M_get_Tp_allocator());
          }
        else
 #endif
@@ -644,8 +644,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
        if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
          {
            // Relocation cannot throw.
-           __new_finish = _S_relocate(__old_start, __old_finish,
-                                      __new_start, _M_get_Tp_allocator());
+           __new_finish = _S_relocate_copy(__old_start, __old_finish,
+                                          __new_start, _M_get_Tp_allocator());
            ++__new_finish;
          }
        else
@@ -865,8 +865,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
                if _GLIBCXX17_CONSTEXPR (_S_use_relocate())
                  {
-                   _S_relocate(__old_start, __old_finish,
-                               __new_start, _M_get_Tp_allocator());
+                   _S_relocate_copy(__old_start, __old_finish,
+                                    __new_start, _M_get_Tp_allocator());
                  }
                else
                  {

Reply via email to