Tested x86_64-linux. -- >8 --
Currently we only optimize std::fill to memset when the source and destination types are the same byte-sized type. This means that we fail to optimize when the fill character is another integer (e.g. a literal int value), even though assigning an int to a char would produce the same value as memset would (after converting the fill value to unsigned char). This patch enables the optimized code path when the fill character is a memcpy-able integer (using the new __memcpyable_integer trait). libstdc++-v3/ChangeLog: PR libstdc++/93059 * include/bits/stl_algobase.h (__fill_a1(T*, T*, const T&)): Change template parameters and enable_if condition to allow the fill value to be an integer or std::byte. --- libstdc++-v3/include/bits/stl_algobase.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h index 9e92211c124..dacbeaf5f64 100644 --- a/libstdc++-v3/include/bits/stl_algobase.h +++ b/libstdc++-v3/include/bits/stl_algobase.h @@ -967,23 +967,26 @@ _GLIBCXX_END_NAMESPACE_CONTAINER #pragma GCC diagnostic pop // Specialization: for char types we can use memset. - template<typename _Tp> + template<typename _Up, typename _Tp> _GLIBCXX20_CONSTEXPR inline typename - __gnu_cxx::__enable_if<__is_byte<_Tp>::__value, void>::__type - __fill_a1(_Tp* __first, _Tp* __last, const _Tp& __c) + __gnu_cxx::__enable_if<__is_byte<_Up>::__value + && __memcpyable_integer<_Tp>::__value, + void>::__type + __fill_a1(_Up* __first, _Up* __last, const _Tp& __x) { - const _Tp __tmp = __c; + // This hoists the load out of the loop and also ensures that we don't + // use memset for cases where the assignment would be ill-formed. + const _Up __val = __x; #if __cpp_lib_is_constant_evaluated if (std::is_constant_evaluated()) { for (; __first != __last; ++__first) - *__first = __tmp; - return; + *__first = __val; } #endif if (const size_t __len = __last - __first) - __builtin_memset(__first, static_cast<unsigned char>(__tmp), __len); + __builtin_memset(__first, static_cast<unsigned char>(__val), __len); } template<typename _Ite, typename _Cont, typename _Tp> -- 2.46.2