[PATCH 5/8] [RFC] Use new builtins in libstdc++

mmalcomson Thu, 19 Sep 2024 06:13:36 -0700

From: Matthew Malcomson <mmalcom...@nvidia.com>

Points to question here are:
1) Whether checking for this particular internal builtin is OK (this one
   happens to be the one implementing the operation for a `double`, we
   would have to rely on the approach that if anyone implements this
   operation for a `double` they implement it for all the floating point
   types that their C++ frontend and libstdc++ handle).
2) Whether the `#if` bit should be somewhere else instead of put in the
   `__fetch_add_flt` function.  I put it there because that's where it
   seemed natural, but am not familiar enough with libstdc++ to be
   confident in that decision.


We still need the CAS loop fallback for any compiler that doesn't
implement this builtin, and hence will still need some extra choice to
be made for floating point types.  Once all compilers we care about
implement this we can remove this special handling and merge the
floating point and integral operations into the same template.

Signed-off-by: Matthew Malcomson <mmalcom...@nvidia.com>
---
 libstdc++-v3/include/bits/atomic_base.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/libstdc++-v3/include/bits/atomic_base.h 
b/libstdc++-v3/include/bits/atomic_base.h
index 1c2367b39b6..d3b1a022db2 100644
--- a/libstdc++-v3/include/bits/atomic_base.h
+++ b/libstdc++-v3/include/bits/atomic_base.h
@@ -1217,30 +1217,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _Tp
       __fetch_add_flt(_Tp* __ptr, _Val<_Tp> __i, memory_order __m) noexcept
       {
+#if __has_builtin(__atomic_fetch_add_fp)
+       return __atomic_fetch_add(__ptr, __i, int(__m));
+#else
        _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
        _Val<_Tp> __newval = __oldval + __i;
        while (!compare_exchange_weak(__ptr, __oldval, __newval, __m,
                                      memory_order_relaxed))
          __newval = __oldval + __i;
        return __oldval;
+#endif
       }
 
     template<typename _Tp>
       _Tp
       __fetch_sub_flt(_Tp* __ptr, _Val<_Tp> __i, memory_order __m) noexcept
       {
+#if __has_builtin(__atomic_fetch_sub)
+       return __atomic_fetch_sub(__ptr, __i, int(__m));
+#else
        _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
        _Val<_Tp> __newval = __oldval - __i;
        while (!compare_exchange_weak(__ptr, __oldval, __newval, __m,
                                      memory_order_relaxed))
          __newval = __oldval - __i;
        return __oldval;
+#endif
       }
 
     template<typename _Tp>
       _Tp
       __add_fetch_flt(_Tp* __ptr, _Val<_Tp> __i) noexcept
       {
+#if __has_builtin(__atomic_add_fetch)
+       return __atomic_add_fetch(__ptr, __i, __ATOMIC_SEQ_CST);
+#else
        _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
        _Val<_Tp> __newval = __oldval + __i;
        while (!compare_exchange_weak(__ptr, __oldval, __newval,
@@ -1248,12 +1259,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                      memory_order_relaxed))
          __newval = __oldval + __i;
        return __newval;
+#endif
       }
 
     template<typename _Tp>
       _Tp
       __sub_fetch_flt(_Tp* __ptr, _Val<_Tp> __i) noexcept
       {
+#if __has_builtin(__atomic_sub_fetch)
+      return __atomic_sub_fetch(__ptr, __i, __ATOMIC_SEQ_CST);
+#else
        _Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
        _Val<_Tp> __newval = __oldval - __i;
        while (!compare_exchange_weak(__ptr, __oldval, __newval,
@@ -1261,6 +1276,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                      memory_order_relaxed))
          __newval = __oldval - __i;
        return __newval;
+#endif
       }
   } // namespace __atomic_impl
 
-- 
2.43.0

[PATCH 5/8] [RFC] Use new builtins in libstdc++

Reply via email to