From: Matthew Malcomson <mmalcom...@nvidia.com> Points to question here are: 1) Whether checking for this particular internal builtin is OK (this one happens to be the one implementing the operation for a `double`, we would have to rely on the approach that if anyone implements this operation for a `double` they implement it for all the floating point types that their C++ frontend and libstdc++ handle). 2) Whether the `#if` bit should be somewhere else instead of put in the `__fetch_add_flt` function. I put it there because that's where it seemed natural, but am not familiar enough with libstdc++ to be confident in that decision.
We still need the CAS loop fallback for any compiler that doesn't implement this builtin, and hence will still need some extra choice to be made for floating point types. Once all compilers we care about implement this we can remove this special handling and merge the floating point and integral operations into the same template. Signed-off-by: Matthew Malcomson <mmalcom...@nvidia.com> --- libstdc++-v3/include/bits/atomic_base.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libstdc++-v3/include/bits/atomic_base.h b/libstdc++-v3/include/bits/atomic_base.h index 1c2367b39b6..d3b1a022db2 100644 --- a/libstdc++-v3/include/bits/atomic_base.h +++ b/libstdc++-v3/include/bits/atomic_base.h @@ -1217,30 +1217,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Tp __fetch_add_flt(_Tp* __ptr, _Val<_Tp> __i, memory_order __m) noexcept { +#if __has_builtin(__atomic_fetch_add_fp) + return __atomic_fetch_add(__ptr, __i, int(__m)); +#else _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed); _Val<_Tp> __newval = __oldval + __i; while (!compare_exchange_weak(__ptr, __oldval, __newval, __m, memory_order_relaxed)) __newval = __oldval + __i; return __oldval; +#endif } template<typename _Tp> _Tp __fetch_sub_flt(_Tp* __ptr, _Val<_Tp> __i, memory_order __m) noexcept { +#if __has_builtin(__atomic_fetch_sub) + return __atomic_fetch_sub(__ptr, __i, int(__m)); +#else _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed); _Val<_Tp> __newval = __oldval - __i; while (!compare_exchange_weak(__ptr, __oldval, __newval, __m, memory_order_relaxed)) __newval = __oldval - __i; return __oldval; +#endif } template<typename _Tp> _Tp __add_fetch_flt(_Tp* __ptr, _Val<_Tp> __i) noexcept { +#if __has_builtin(__atomic_add_fetch) + return __atomic_add_fetch(__ptr, __i, __ATOMIC_SEQ_CST); +#else _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed); _Val<_Tp> __newval = __oldval + __i; while (!compare_exchange_weak(__ptr, __oldval, __newval, @@ -1248,12 +1259,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION memory_order_relaxed)) __newval = __oldval + __i; return __newval; +#endif } template<typename _Tp> _Tp __sub_fetch_flt(_Tp* __ptr, _Val<_Tp> __i) noexcept { +#if __has_builtin(__atomic_sub_fetch) + return __atomic_sub_fetch(__ptr, __i, __ATOMIC_SEQ_CST); +#else _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed); _Val<_Tp> __newval = __oldval - __i; while (!compare_exchange_weak(__ptr, __oldval, __newval, @@ -1261,6 +1276,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION memory_order_relaxed)) __newval = __oldval - __i; return __newval; +#endif } } // namespace __atomic_impl -- 2.43.0