When the semaphore counter is __platform_wait_t we can use the simpler atomic waiting functions that just take a value and wait for it to change, instead of using an accessor function and a predicate to fetch and compare the value.
Because the simpler value-based waiting functions don't return the new value when they notice it has changed, the new paths using those functions need an explicit atomic load to get the latest value. If those functions are changed to start returning a value (as comments in <bits/atomic_wait.h> and <bits/atomic_timed_wait.h> indicate) then those additional loads could be removed from the semaphore acquire functions. I'm not sure if this makes the code any faster in real scenarios, but the generated code for std::counting_semaphore<N> is now slightly smaller for N <= INT_MAX. libstdc++-v3/ChangeLog: * include/bits/semaphore_base.h (__semaphore_base::_M_acquire): Use __atomic_wait_address_v instead of __atomic_wait_address when __count_type is __platform_wait_t. (__semaphore_base::_M_try_acquire_until): Likewise for __atomic_wait_address_until_v instead of __atomic_wait_address_until. (__semaphore_base::_M_try_acquire_for): Likewise for __atomic_wait_address_for_v instead of __atomic_wait_address_for. --- Tested x86_64-linux. libstdc++-v3/include/bits/semaphore_base.h | 109 ++++++++++++++------- 1 file changed, 72 insertions(+), 37 deletions(-) diff --git a/libstdc++-v3/include/bits/semaphore_base.h b/libstdc++-v3/include/bits/semaphore_base.h index 5446e57b0ab1..6448104494e7 100644 --- a/libstdc++-v3/include/bits/semaphore_base.h +++ b/libstdc++-v3/include/bits/semaphore_base.h @@ -85,24 +85,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION memory_order::relaxed); } + // Keep trying to acquire the semaphore in a loop until it succeeds. void _M_acquire() noexcept { - auto const __vfn = [this]{ return _M_get_current(); }; - auto __val = __vfn(); - auto const __pred = [&__val](__count_type __cur) { - if (__cur > 0) - { - __val = __cur; - return true; - } - return false; - }; + auto __val = _M_get_current(); while (!_M_do_try_acquire(__val)) if (__val == 0) - std::__atomic_wait_address(&_M_counter, __pred, __vfn, true); + { + if constexpr (_Platform_wait) + { + std::__atomic_wait_address_v(&_M_counter, __val, __ATOMIC_ACQUIRE, true); + __val = _M_get_current(); + } + else + { + auto __vfn = [this]{ return _M_get_current(); }; + auto __pred = [&__val](__count_type __cur) { + if (__cur > 0) + { + __val = __cur; + return true; + } + return false; + }; + std::__atomic_wait_address(&_M_counter, __pred, __vfn, true); + } + } } + // Try to acquire the semaphore, retrying a small number of times + // in case of contention. bool _M_try_acquire() noexcept { @@ -116,22 +129,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_try_acquire_until(const chrono::time_point<_Clock, _Duration>& __atime) noexcept { - auto const __vfn = [this]{ return _M_get_current(); }; - auto __val = __vfn(); - auto const __pred = [&__val](__count_type __cur) { - if (__cur > 0) - { - __val = __cur; - return true; - } - return false; - }; + auto __val = _M_get_current(); while (!_M_do_try_acquire(__val)) if (__val == 0) { - if (!std::__atomic_wait_address_until(&_M_counter, __pred, - __vfn, __atime, true)) - return false; // timed out + if constexpr (_Platform_wait) + { + if (!std::__atomic_wait_address_until_v(&_M_counter, 0, + __ATOMIC_ACQUIRE, + __atime, true)) + return false; // timed out + __val = _M_get_current(); + } + else + { + auto __vfn = [this]{ return _M_get_current(); }; + auto __pred = [&__val](__count_type __cur) { + if (__cur > 0) + { + __val = __cur; + return true; + } + return false; + }; + if (!std::__atomic_wait_address_until(&_M_counter, __pred, + __vfn, __atime, true)) + return false; // timed out + } } return true; } @@ -140,22 +164,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_try_acquire_for(const chrono::duration<_Rep, _Period>& __rtime) noexcept { - auto const __vfn = [this]{ return _M_get_current(); }; - auto __val = __vfn(); - auto const __pred = [&__val](__count_type __cur) { - if (__cur > 0) - { - __val = __cur; - return true; - } - return false; - }; + auto __val = _M_get_current(); while (!_M_do_try_acquire(__val)) if (__val == 0) { - if (!std::__atomic_wait_address_for(&_M_counter, __pred, - __vfn, __rtime, true)) - return false; // timed out + if constexpr (_Platform_wait) + { + if (!std::__atomic_wait_address_for_v(&_M_counter, 0, + __ATOMIC_ACQUIRE, + __rtime, true)) + return false; // timed out + __val = _M_get_current(); + } + else + { + auto __vfn = [this]{ return _M_get_current(); }; + auto __pred = [&__val](__count_type __cur) { + if (__cur > 0) + { + __val = __cur; + return true; + } + return false; + }; + if (!std::__atomic_wait_address_for(&_M_counter, __pred, + __vfn, __rtime, true)) + return false; // timed out + } } return true; } -- 2.49.0