This adds a new implementation of std::counting_semaphore<Max> for the
case where Max == 1, i.e. the std::binary_semaphore typedef. When the
maximum counter value is 1 we don't need to load the current counter
value before doing a compare-exchange to acquire the semaphore. We can
just optimisitcally assume it's currently 1, and if that's true then the
compare_exchange will succeed. This simplifies _M_try_acquire so that we
don't need the separate _M_do_try_acquire function used by the general
__semaphore_base implementation for _Max > 1 cases.
We can also use the simpler forms of atomic waiting that just take a
value instead of a value accessor and predicate, because we know that
the _M_counter is always a __platform_wait_t. This change adds a
bare_wait flag to __atomic_wait_address_v because we don't need to track
waiters for semaphores, we only need to notify when a semaphore with a
count of zero is released.
I'm not sure if this makes the code any faster in real scenarios, but
the generated code for std::binary_semaphore is slightly smaller now.
libstdc++-v3/ChangeLog:
* include/bits/semaphore_base.h (__binary_semaphore_impl): New
class with optimized implementation for std::binary_semaphore.
(__semaphore_impl) <_max == 1>: Modify alias template to use
__binary_semaphore_impl.
* include/bits/atomic_wait.h (__atomic_wait_address_v): Add
parameter for bare waits.
---
Tested x86_64-linux.
libstdc++-v3/include/bits/atomic_wait.h | 5 +-
libstdc++-v3/include/bits/semaphore_base.h | 73 +++++++++++++++++++++-
2 files changed, 73 insertions(+), 5 deletions(-)
diff --git a/libstdc++-v3/include/bits/atomic_wait.h
b/libstdc++-v3/include/bits/atomic_wait.h
index 815726c16ccb..9ae11191d9ab 100644
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -249,12 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// C++26 will return __val
}
+ // Wait on __addr while *__addr == __old is true.
inline void
__atomic_wait_address_v(const __detail::__platform_wait_t* __addr,
__detail::__platform_wait_t __old,
- int __order)
+ int __order, bool __bare_wait = false)
{
- __detail::__wait_args __args{ __addr, __old, __order };
+ __detail::__wait_args __args{ __addr, __old, __order, __bare_wait };
// C++26 will not ignore the return value here
__detail::__wait_impl(__addr, __args);
}
diff --git a/libstdc++-v3/include/bits/semaphore_base.h
b/libstdc++-v3/include/bits/semaphore_base.h
index 3f7a33ccd51a..5446e57b0ab1 100644
--- a/libstdc++-v3/include/bits/semaphore_base.h
+++ b/libstdc++-v3/include/bits/semaphore_base.h
@@ -170,15 +170,82 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __old;
}
- private:
+ protected:
alignas(_Platform_wait ? __detail::__platform_wait_alignment
: __alignof__(__count_type))
__count_type _M_counter;
};
- template<ptrdiff_t _Max>
+ // Optimized implementation for std::binary_semaphore with max == 1.
+ struct __binary_semaphore_impl : private __semaphore_base<true>
+ {
+ using _Base = __semaphore_base<true>;
+ using _Base::__count_type;
+
+ static constexpr ptrdiff_t _S_max = 1;
+
+ constexpr explicit
+ __binary_semaphore_impl(__count_type __count) noexcept
+ : _Base(__count > 0)
+ { }
+
+ __binary_semaphore_impl(const __binary_semaphore_impl&) = delete;
+ __binary_semaphore_impl& operator=(const __binary_semaphore_impl&) =
delete;
+
+ using _Base::_M_get_current;
+
+ _GLIBCXX_ALWAYS_INLINE bool
+ _M_try_acquire() noexcept
+ {
+ __count_type __val = 1;
+ return _Base::_M_do_try_acquire(__val);
+ }
+
+ void
+ _M_acquire() noexcept
+ {
+ while (!_M_try_acquire())
+ std::__atomic_wait_address_v(&_M_counter, 0, __ATOMIC_ACQUIRE, true);
+ }
+
+ template<typename _Clock, typename _Duration>
+ bool
+ _M_try_acquire_until(const chrono::time_point<_Clock, _Duration>&
__atime) noexcept
+ {
+ while (!_M_try_acquire())
+ {
+ if (!std::__atomic_wait_address_until_v(&_M_counter, 0,
+ __ATOMIC_ACQUIRE,
+ __atime, true))
+ return false; // timed out
+ }
+ return true;
+ }
+
+ template<typename _Rep, typename _Period>
+ bool
+ _M_try_acquire_for(const chrono::duration<_Rep, _Period>& __rtime)
noexcept
+ {
+ while (!_M_try_acquire())
+ {
+ if (!std::__atomic_wait_address_for_v(&_M_counter, 0,
+ __ATOMIC_ACQUIRE,
+ __rtime, true))
+ return false; // timed out
+ }
+ return true;
+ }
+
+ _GLIBCXX_ALWAYS_INLINE ptrdiff_t
+ _M_release(ptrdiff_t __update) noexcept
+ { return _Base::_M_release(__update > 0); }
+ };
+
+ template<ptrdiff_t _Max,
+ bool _PlatformWait = (_Max <= __semaphore_base<true>::_S_max)>
using __semaphore_impl
- = __semaphore_base<(_Max <= __semaphore_base<true>::_S_max)>;
+ = __conditional_t<_Max == 1, __binary_semaphore_impl,
+ __semaphore_base<_PlatformWait>>;
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
--
2.49.0