Tested x86_64-linux.

-- >8 --

Currently we only optimize std::fill to memset when the source and
destination types are the same byte-sized type. This means that we fail
to optimize when the fill character is another integer (e.g. a literal
int value), even though assigning an int to a char would produce the
same value as memset would (after converting the fill value to unsigned
char).

This patch enables the optimized code path when the fill character is a
memcpy-able integer (using the new __memcpyable_integer trait).

libstdc++-v3/ChangeLog:

        PR libstdc++/93059
        * include/bits/stl_algobase.h (__fill_a1(T*, T*, const T&)):
        Change template parameters and enable_if condition to allow the
        fill value to be an integer or std::byte.
---
 libstdc++-v3/include/bits/stl_algobase.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_algobase.h 
b/libstdc++-v3/include/bits/stl_algobase.h
index 9e92211c124..dacbeaf5f64 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -967,23 +967,26 @@ _GLIBCXX_END_NAMESPACE_CONTAINER
 #pragma GCC diagnostic pop
 
   // Specialization: for char types we can use memset.
-  template<typename _Tp>
+  template<typename _Up, typename _Tp>
     _GLIBCXX20_CONSTEXPR
     inline typename
-    __gnu_cxx::__enable_if<__is_byte<_Tp>::__value, void>::__type
-    __fill_a1(_Tp* __first, _Tp* __last, const _Tp& __c)
+    __gnu_cxx::__enable_if<__is_byte<_Up>::__value
+                            && __memcpyable_integer<_Tp>::__value,
+                          void>::__type
+    __fill_a1(_Up* __first, _Up* __last, const _Tp& __x)
     {
-      const _Tp __tmp = __c;
+      // This hoists the load out of the loop and also ensures that we don't
+      // use memset for cases where the assignment would be ill-formed.
+      const _Up __val = __x;
 #if __cpp_lib_is_constant_evaluated
       if (std::is_constant_evaluated())
        {
          for (; __first != __last; ++__first)
-           *__first = __tmp;
-         return;
+           *__first = __val;
        }
 #endif
       if (const size_t __len = __last - __first)
-       __builtin_memset(__first, static_cast<unsigned char>(__tmp), __len);
+       __builtin_memset(__first, static_cast<unsigned char>(__val), __len);
     }
 
   template<typename _Ite, typename _Cont, typename _Tp>
-- 
2.46.2

Reply via email to