The __spin_until_impl function was presumably intended to just spin for
a short time, then give up and let the caller wait on a futex or
condvar. However, __spin_until_impl will never stop spinning unless
either the value changes or the timeout is reached. This means that when
__spin_until_impl returns, the caller should immediately return (because
either the value we were waiting for has changed, or the timeout
happened). So __wait_until_impl should never block on a futex or
condvar. However, the check for the return value of __spin_until_impl
would only return if the value changed (i.e. !__res._M_timeout). So if
the timeout occurred, it would fall through and block on the
futex/condvar, even though the timeout has already been reached.

This was causing a major performance regression in the timed waiting
functions of std::counting_semaphore.

The simplest fix is to replace __spin_until_impl entirely, just calling
__spin_impl to spin a small, finite number of times, and then return
immediately if either the value changed or the timeout happened. This
ensures that we don't block on the futex/condvar unnecessarily.

Removing __spin_until_impl also has the advantage that we no longer keep
calling steady_clock::now() on every iteration to check for a timeout.
That was also adding significant overhead to the timed waiting
functions.

libstdc++-v3/ChangeLog:

        PR libstdc++/122878
        * src/c++20/atomic.cc (__spin_until_impl): Remove.
        (__wait_until_impl): Use __spin_impl instead of
        __spin_until_impl and return if timeout is reached after
        spinning.
---

Tested x86_64-linux.

 libstdc++-v3/src/c++20/atomic.cc | 47 ++------------------------------
 1 file changed, 3 insertions(+), 44 deletions(-)

diff --git a/libstdc++-v3/src/c++20/atomic.cc b/libstdc++-v3/src/c++20/atomic.cc
index fdd67d834768..16fd91b7d7ab 100644
--- a/libstdc++-v3/src/c++20/atomic.cc
+++ b/libstdc++-v3/src/c++20/atomic.cc
@@ -455,49 +455,6 @@ __cond_wait_until(__condvar& __cv, mutex& __mx,
   return __wait_clock_t::now() < __atime;
 }
 #endif // ! HAVE_PLATFORM_WAIT
-
-// Unlike __spin_impl, does not always return _M_has_val == true.
-// If the deadline has already passed then no fresh value is loaded.
-__wait_result_type
-__spin_until_impl(const __platform_wait_t* __addr,
-                 const __wait_args_base& __args,
-                 const __wait_clock_t::time_point& __deadline)
-{
-  using namespace literals::chrono_literals;
-
-  __wait_result_type __res{};
-  auto __t0 = __wait_clock_t::now();
-  auto __now = __t0;
-  for (; __now < __deadline; __now = __wait_clock_t::now())
-    {
-      auto __elapsed = __now - __t0;
-#ifndef _GLIBCXX_NO_SLEEP
-      if (__elapsed > 128ms)
-       this_thread::sleep_for(64ms);
-      else if (__elapsed > 64us)
-       this_thread::sleep_for(__elapsed / 2);
-      else
-#endif
-      if (__elapsed > 4us)
-       __thread_yield();
-      else
-       {
-         __res = __detail::__spin_impl(__addr, __args);
-         if (!__res._M_timeout)
-           return __res;
-       }
-
-      __res._M_val = __atomic_load_n(__addr, __args._M_order);
-      __res._M_has_val = true;
-      if (__res._M_val != __args._M_old)
-       {
-         __res._M_timeout = false;
-         return __res;
-       }
-    }
-  __res._M_timeout = true;
-  return __res;
-}
 } // namespace
 
 __wait_result_type
@@ -509,11 +466,13 @@ __wait_until_impl([[maybe_unused]] const void* __addr, 
__wait_args_base& __args,
 
   if (__args & __wait_flags::__do_spin)
     {
-      auto __res = __detail::__spin_until_impl(__wait_addr, __args, __atime);
+      auto __res = __detail::__spin_impl(__wait_addr, __args);
       if (!__res._M_timeout)
        return __res;
       if (__args & __wait_flags::__spin_only)
        return __res;
+      if (__wait_clock_t::now() >= __atime)
+       return __res;
     }
 
 #ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
-- 
2.51.1

Reply via email to