https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66713
--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> --- #include <atomic> float *ptr; std::atomic<float*> aptr; template <typename T> T cas_original (T cmp_, T val_) { T old; __asm volatile( "lock; cmpxchgq %2, %3" : "=a" (old), "=m" (ptr) : "r" (val_), "m" (ptr), "0" (cmp_) : "cc" ); return old; } template <typename T> T cas_atomic( T cmp_, T val_) { aptr.compare_exchange_strong(cmp_, val_, std::memory_order_acq_rel); return cmp_; } void driver_original(float *a, float *b) { cas_original(a, b); } void driver_atomic(float *a, float *b) { cas_atomic(a, b); }