I would prefer design when we have a platform_semaphore_base<bool Binary> that is defined only if _GLIBCXX_HAVE_PLATFORM_WAIT, and semaphore_base.
For the platform_semaphore, this could you binary_semaphore code, with two modification: static constexpr ptrdiff_t _S_max = _Binary ? 1 : numeric_limits<detail::__platform_wait_t>::Max; _GLIBCXX_ALWAYS_INLINE __count_type _M_get_current() const noexcept { if constexpr(_Binary) return 1; else return __atomic_impl::load(&_M_counter, memory_order::acquire); } _GLIBCXX_ALWAYS_INLINE ptrdiff_t _M_release(ptrdiff_t __update) noexcept And the other with a semaphore base. Does this make sense? On Tue, Jun 3, 2025 at 10:32 AM Jonathan Wakely <jwak...@redhat.com> wrote: > When the semaphore counter is __platform_wait_t we can use the simpler > atomic waiting functions that just take a value and wait for it to > change, instead of using an accessor function and a predicate to fetch > and compare the value. > > Because the simpler value-based waiting functions don't return the new > value when they notice it has changed, the new paths using those > functions need an explicit atomic load to get the latest value. If those > functions are changed to start returning a value (as comments in > <bits/atomic_wait.h> and <bits/atomic_timed_wait.h> indicate) then those > additional loads could be removed from the semaphore acquire functions. > > I'm not sure if this makes the code any faster in real scenarios, but > the generated code for std::counting_semaphore<N> is now slightly > smaller for N <= INT_MAX. > > libstdc++-v3/ChangeLog: > > * include/bits/semaphore_base.h (__semaphore_base::_M_acquire): > Use __atomic_wait_address_v instead of __atomic_wait_address > when __count_type is __platform_wait_t. > (__semaphore_base::_M_try_acquire_until): Likewise for > __atomic_wait_address_until_v instead of > __atomic_wait_address_until. > (__semaphore_base::_M_try_acquire_for): Likewise for > __atomic_wait_address_for_v instead of __atomic_wait_address_for. > --- > > Tested x86_64-linux. > > libstdc++-v3/include/bits/semaphore_base.h | 109 ++++++++++++++------- > 1 file changed, 72 insertions(+), 37 deletions(-) > > diff --git a/libstdc++-v3/include/bits/semaphore_base.h > b/libstdc++-v3/include/bits/semaphore_base.h > index 5446e57b0ab1..6448104494e7 100644 > --- a/libstdc++-v3/include/bits/semaphore_base.h > +++ b/libstdc++-v3/include/bits/semaphore_base.h > @@ -85,24 +85,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > memory_order::relaxed); > } > > + // Keep trying to acquire the semaphore in a loop until it succeeds. > void > _M_acquire() noexcept > { > - auto const __vfn = [this]{ return _M_get_current(); }; > - auto __val = __vfn(); > - auto const __pred = [&__val](__count_type __cur) { > - if (__cur > 0) > - { > - __val = __cur; > - return true; > - } > - return false; > - }; > + auto __val = _M_get_current(); > while (!_M_do_try_acquire(__val)) > if (__val == 0) > - std::__atomic_wait_address(&_M_counter, __pred, __vfn, true); > + { > + if constexpr (_Platform_wait) > + { > + std::__atomic_wait_address_v(&_M_counter, __val, > __ATOMIC_ACQUIRE, true); > + __val = _M_get_current(); > + } > + else > + { > + auto __vfn = [this]{ return _M_get_current(); }; > + auto __pred = [&__val](__count_type __cur) { > + if (__cur > 0) > + { > + __val = __cur; > + return true; > + } > + return false; > + }; > + std::__atomic_wait_address(&_M_counter, __pred, __vfn, > true); > + } > + } > } > > + // Try to acquire the semaphore, retrying a small number of times > + // in case of contention. > bool > _M_try_acquire() noexcept > { > @@ -116,22 +129,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > bool > _M_try_acquire_until(const chrono::time_point<_Clock, _Duration>& > __atime) noexcept > { > - auto const __vfn = [this]{ return _M_get_current(); }; > - auto __val = __vfn(); > - auto const __pred = [&__val](__count_type __cur) { > - if (__cur > 0) > - { > - __val = __cur; > - return true; > - } > - return false; > - }; > + auto __val = _M_get_current(); > while (!_M_do_try_acquire(__val)) > if (__val == 0) > { > - if (!std::__atomic_wait_address_until(&_M_counter, __pred, > - __vfn, __atime, true)) > - return false; // timed out > + if constexpr (_Platform_wait) > + { > + if (!std::__atomic_wait_address_until_v(&_M_counter, 0, > + __ATOMIC_ACQUIRE, > + __atime, true)) > + return false; // timed out > + __val = _M_get_current(); > + } > + else > + { > + auto __vfn = [this]{ return _M_get_current(); }; > + auto __pred = [&__val](__count_type __cur) { > + if (__cur > 0) > + { > + __val = __cur; > + return true; > + } > + return false; > + }; > + if (!std::__atomic_wait_address_until(&_M_counter, > __pred, > + __vfn, __atime, > true)) > + return false; // timed out > + } > } > return true; > } > @@ -140,22 +164,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > bool > _M_try_acquire_for(const chrono::duration<_Rep, _Period>& __rtime) > noexcept > { > - auto const __vfn = [this]{ return _M_get_current(); }; > - auto __val = __vfn(); > - auto const __pred = [&__val](__count_type __cur) { > - if (__cur > 0) > - { > - __val = __cur; > - return true; > - } > - return false; > - }; > + auto __val = _M_get_current(); > while (!_M_do_try_acquire(__val)) > if (__val == 0) > { > - if (!std::__atomic_wait_address_for(&_M_counter, __pred, > - __vfn, __rtime, true)) > - return false; // timed out > + if constexpr (_Platform_wait) > + { > + if (!std::__atomic_wait_address_for_v(&_M_counter, 0, > + __ATOMIC_ACQUIRE, > + __rtime, true)) > + return false; // timed out > + __val = _M_get_current(); > + } > + else > + { > + auto __vfn = [this]{ return _M_get_current(); }; > + auto __pred = [&__val](__count_type __cur) { > + if (__cur > 0) > + { > + __val = __cur; > + return true; > + } > + return false; > + }; > + if (!std::__atomic_wait_address_for(&_M_counter, __pred, > + __vfn, __rtime, > true)) > + return false; // timed out > + } > } > return true; > } > -- > 2.49.0 > >