I would prefer design when we have a platform_semaphore_base<bool Binary>
that is defined only if _GLIBCXX_HAVE_PLATFORM_WAIT,
and semaphore_base.

For the platform_semaphore, this could you binary_semaphore code, with two
modification:
   static constexpr ptrdiff_t _S_max = _Binary ? 1 :
numeric_limits<detail::__platform_wait_t>::Max;

   _GLIBCXX_ALWAYS_INLINE __count_type
    _M_get_current() const noexcept
    {
        if constexpr(_Binary)
           return 1;
        else
            return __atomic_impl::load(&_M_counter, memory_order::acquire);
    }

    _GLIBCXX_ALWAYS_INLINE ptrdiff_t
    _M_release(ptrdiff_t __update) noexcept

And the other with a semaphore base. Does this make sense?

On Tue, Jun 3, 2025 at 10:32 AM Jonathan Wakely <jwak...@redhat.com> wrote:

> When the semaphore counter is __platform_wait_t we can use the simpler
> atomic waiting functions that just take a value and wait for it to
> change, instead of using an accessor function and a predicate to fetch
> and compare the value.
>
> Because the simpler value-based waiting functions don't return the new
> value when they notice it has changed, the new paths using those
> functions need an explicit atomic load to get the latest value. If those
> functions are changed to start returning a value (as comments in
> <bits/atomic_wait.h> and <bits/atomic_timed_wait.h> indicate) then those
> additional loads could be removed from the semaphore acquire functions.
>
> I'm not sure if this makes the code any faster in real scenarios, but
> the generated code for std::counting_semaphore<N> is now slightly
> smaller for N <= INT_MAX.
>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/semaphore_base.h (__semaphore_base::_M_acquire):
>         Use __atomic_wait_address_v instead of __atomic_wait_address
>         when __count_type is __platform_wait_t.
>         (__semaphore_base::_M_try_acquire_until): Likewise for
>         __atomic_wait_address_until_v instead of
> __atomic_wait_address_until.
>         (__semaphore_base::_M_try_acquire_for): Likewise for
>         __atomic_wait_address_for_v instead of __atomic_wait_address_for.
> ---
>
> Tested x86_64-linux.
>
>  libstdc++-v3/include/bits/semaphore_base.h | 109 ++++++++++++++-------
>  1 file changed, 72 insertions(+), 37 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/semaphore_base.h
> b/libstdc++-v3/include/bits/semaphore_base.h
> index 5446e57b0ab1..6448104494e7 100644
> --- a/libstdc++-v3/include/bits/semaphore_base.h
> +++ b/libstdc++-v3/include/bits/semaphore_base.h
> @@ -85,24 +85,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>                                                     memory_order::relaxed);
>      }
>
> +    // Keep trying to acquire the semaphore in a loop until it succeeds.
>      void
>      _M_acquire() noexcept
>      {
> -      auto const __vfn = [this]{ return _M_get_current(); };
> -      auto __val = __vfn();
> -      auto const __pred = [&__val](__count_type __cur) {
> -       if (__cur > 0)
> -         {
> -           __val = __cur;
> -           return true;
> -         }
> -       return false;
> -      };
> +      auto __val = _M_get_current();
>        while (!_M_do_try_acquire(__val))
>         if (__val == 0)
> -         std::__atomic_wait_address(&_M_counter, __pred, __vfn, true);
> +         {
> +           if constexpr (_Platform_wait)
> +             {
> +               std::__atomic_wait_address_v(&_M_counter, __val,
> __ATOMIC_ACQUIRE, true);
> +               __val = _M_get_current();
> +             }
> +           else
> +             {
> +               auto __vfn = [this]{ return _M_get_current(); };
> +               auto __pred = [&__val](__count_type __cur) {
> +                 if (__cur > 0)
> +                   {
> +                     __val = __cur;
> +                     return true;
> +                   }
> +                 return false;
> +               };
> +               std::__atomic_wait_address(&_M_counter, __pred, __vfn,
> true);
> +             }
> +         }
>      }
>
> +    // Try to acquire the semaphore, retrying a small number of times
> +    // in case of contention.
>      bool
>      _M_try_acquire() noexcept
>      {
> @@ -116,22 +129,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        bool
>        _M_try_acquire_until(const chrono::time_point<_Clock, _Duration>&
> __atime) noexcept
>        {
> -       auto const __vfn = [this]{ return _M_get_current(); };
> -       auto __val = __vfn();
> -       auto const __pred = [&__val](__count_type __cur) {
> -         if (__cur > 0)
> -           {
> -             __val = __cur;
> -             return true;
> -           }
> -         return false;
> -       };
> +       auto __val = _M_get_current();
>         while (!_M_do_try_acquire(__val))
>           if (__val == 0)
>             {
> -             if (!std::__atomic_wait_address_until(&_M_counter, __pred,
> -                                                   __vfn, __atime, true))
> -               return false; // timed out
> +             if constexpr (_Platform_wait)
> +               {
> +                 if (!std::__atomic_wait_address_until_v(&_M_counter, 0,
> +                                                         __ATOMIC_ACQUIRE,
> +                                                         __atime, true))
> +                   return false; // timed out
> +                 __val = _M_get_current();
> +               }
> +             else
> +               {
> +                 auto __vfn = [this]{ return _M_get_current(); };
> +                 auto __pred = [&__val](__count_type __cur) {
> +                   if (__cur > 0)
> +                     {
> +                       __val = __cur;
> +                       return true;
> +                     }
> +                   return false;
> +                 };
> +                 if (!std::__atomic_wait_address_until(&_M_counter,
> __pred,
> +                                                       __vfn, __atime,
> true))
> +                   return false; // timed out
> +               }
>             }
>         return true;
>        }
> @@ -140,22 +164,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        bool
>        _M_try_acquire_for(const chrono::duration<_Rep, _Period>& __rtime)
> noexcept
>        {
> -       auto const __vfn = [this]{ return _M_get_current(); };
> -       auto __val = __vfn();
> -       auto const __pred = [&__val](__count_type __cur) {
> -         if (__cur > 0)
> -           {
> -             __val = __cur;
> -             return true;
> -           }
> -         return false;
> -       };
> +       auto __val = _M_get_current();
>         while (!_M_do_try_acquire(__val))
>           if (__val == 0)
>             {
> -             if (!std::__atomic_wait_address_for(&_M_counter, __pred,
> -                                                 __vfn, __rtime, true))
> -               return false; // timed out
> +             if constexpr (_Platform_wait)
> +               {
> +                 if (!std::__atomic_wait_address_for_v(&_M_counter, 0,
> +                                                       __ATOMIC_ACQUIRE,
> +                                                       __rtime, true))
> +                   return false; // timed out
> +                 __val = _M_get_current();
> +               }
> +             else
> +               {
> +                 auto __vfn = [this]{ return _M_get_current(); };
> +                 auto __pred = [&__val](__count_type __cur) {
> +                   if (__cur > 0)
> +                     {
> +                       __val = __cur;
> +                       return true;
> +                     }
> +                   return false;
> +                 };
> +                 if (!std::__atomic_wait_address_for(&_M_counter, __pred,
> +                                                     __vfn, __rtime,
> true))
> +                   return false; // timed out
> +               }
>             }
>         return true;
>        }
> --
> 2.49.0
>
>

Reply via email to