This WORK-IN-PROGRESS patch uses an atomic unsigned and futex operations to optimize the synchronization code in std::future. The current code uses a mutex/condvar combination, which is both slower (e.g., due to mutex contention, stronger ordering requirements for condvars, using an additional condvar-internal mutex, ...) and makes std::future fairly large.
It introduces an __atomic_futex_unsigned type, which provides basic atomic operations (load and store) on an atomic<unsigned> and additionally provides load_when_[not_]equal operations that do blocking waits on the atomic -- pretty much what futexes do. Such an __atomic_futex_unsigned is then There are a few bits missing in this patch: * A fallback implementation for platforms that don't provide futexes, in the form of a different implementation of __atomic_futex_unsigned. A mutex+condvar combination is what I'm aiming at; for std::future, this would lead to similar code and sizeof(std::future) as currently. * More documentation of how the synchronization works. Jonathan has a patch in flight for that, so this should get merged. * Integration with the on_thread_exit patch that Jonathan posted, which uses the current, lock-based synchronization scheme and thus needs to get adapted. * Testing. There are ways to optimize further I suppose, for example by letting the __atomic_futex_unsigned take care of all current uses of call_once too. Let me know if I should do that. This would reduce the number of atomic ops a little in some cases, as well as reduce space required for futures a little. Comments?
commit 1543313a3b590e6422f5d547cabd6662e0a6f538 Author: Torvald Riegel <trie...@redhat.com> Date: Sun Nov 16 12:07:22 2014 +0100 [WIP] Optimize synchronization in std::future if futexes are available. diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am index e6edc73..81e6a8b 100644 --- a/libstdc++-v3/include/Makefile.am +++ b/libstdc++-v3/include/Makefile.am @@ -83,6 +83,7 @@ bits_headers = \ ${bits_srcdir}/allocated_ptr.h \ ${bits_srcdir}/allocator.h \ ${bits_srcdir}/atomic_base.h \ + ${bits_srcdir}/atomic_futex.h \ ${bits_srcdir}/basic_ios.h \ ${bits_srcdir}/basic_ios.tcc \ ${bits_srcdir}/basic_string.h \ diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in index 2ade448..2d72de3 100644 --- a/libstdc++-v3/include/Makefile.in +++ b/libstdc++-v3/include/Makefile.in @@ -350,6 +350,7 @@ bits_headers = \ ${bits_srcdir}/allocated_ptr.h \ ${bits_srcdir}/allocator.h \ ${bits_srcdir}/atomic_base.h \ + ${bits_srcdir}/atomic_futex.h \ ${bits_srcdir}/basic_ios.h \ ${bits_srcdir}/basic_ios.tcc \ ${bits_srcdir}/basic_string.h \ diff --git a/libstdc++-v3/include/bits/atomic_futex.h b/libstdc++-v3/include/bits/atomic_futex.h new file mode 100644 index 0000000..4b5664d --- /dev/null +++ b/libstdc++-v3/include/bits/atomic_futex.h @@ -0,0 +1,175 @@ +// -*- C++ -*- header. + +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +/** @file bits/atomic_futex.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. + */ + +#ifndef _GLIBCXX_ATOMIC_FUTEX_H +#define _GLIBCXX_ATOMIC_FUTEX_H 1 + +#pragma GCC system_header + +#include <bits/c++config.h> +#include <atomic> +#include <mutex> +#include <condition_variable> + +#ifndef _GLIBCXX_ALWAYS_INLINE +#define _GLIBCXX_ALWAYS_INLINE inline __attribute__((always_inline)) +#endif + +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +//#ifdef _GLIBCXX_USE_FUTEX + struct __atomic_futex_unsigned_base + { + // Returns false iff a timeout occurred. + static bool + futex_wait_for(unsigned *addr, unsigned val, bool has_timeout, + chrono::seconds s, chrono::nanoseconds ns); + + // This is static because it can be executed after the object has been + // destroyed. + static void futex_notify_all(unsigned* addr); + }; + + template <unsigned _Waiter_bit = 0x80000000> + struct __atomic_futex_unsigned : public __atomic_futex_unsigned_base + { + // XXX We expect this to be lock-free, and having the payload at offset 0 + atomic<unsigned> _M_data; + + __atomic_futex_unsigned(unsigned data) : _M_data(data) + { } + + _GLIBCXX_ALWAYS_INLINE unsigned + load(memory_order mo) + { + return _M_data.load(mo) & ~_Waiter_bit; + } + + // If a timeout occurs, returns a current value after the timeout; + // otherwise, returns the operand's value if equal is true or a different + // value if equal is false. + // The assumed value is the caller's assumption about the current value + // when making the call. + unsigned + load_and_test_for_slow(unsigned assumed, unsigned operand, bool equal, + memory_order mo, bool has_timeout, + chrono::seconds s, chrono::nanoseconds ns) + { + for (;;) + { + // Don't bother checking the value again because we expect the caller to + // have done it recently. + // memory_order_relaxed is sufficient because we can rely on just the + // modification order (store_notify uses an atomic RMW operation too), + // and the futex syscalls synchronize between themselves. + _M_data.fetch_or(_Waiter_bit, memory_order_relaxed); + bool ret = futex_wait_for((unsigned*)(void*)&_M_data, + assumed | _Waiter_bit, has_timeout, s, ns); + // Fetch the current value after waiting (clears _Waiter_bit). + assumed = load(mo); + if (!ret || ((operand == assumed) == equal)) + return assumed; + // TODO adapt wait time + } + } + + _GLIBCXX_ALWAYS_INLINE unsigned + load_when_not_equal(unsigned val, memory_order mo) + { + unsigned i = load(mo); + if ((i & ~_Waiter_bit) != val) return; + // TODO Spin-wait first. + return load_and_test_for_slow(i, val, false, mo, false, 0, 0); + } + + _GLIBCXX_ALWAYS_INLINE void + load_when_equal(unsigned val, memory_order mo) + { + unsigned i = load(mo); + if ((i & ~_Waiter_bit) == val) + return; + // TODO Spin-wait first. + load_and_test_for_slow(i, val, true, mo, false, 0, 0); + } + + template<typename _Rep, typename _Period> + _GLIBCXX_ALWAYS_INLINE bool + load_when_equal_for(unsigned val, memory_order mo, + const chrono::duration<_Rep, _Period>& rel_time) + { + unsigned i = load(mo); + if ((i & ~_Waiter_bit) == val) + return true; + // TODO Spin-wait first. Ignore effect on timeout. + auto s = chrono::duration_cast<chrono::seconds>(rel_time); + auto ns = chrono::duration_cast<chrono::nanoseconds>(rel_time - s); + i = load_and_test_for_slow(i, val, true, mo, false, 0, 0); + return (i & ~_Waiter_bit) == val; + } + + template<typename _Clock, typename _Duration> + _GLIBCXX_ALWAYS_INLINE bool + load_when_equal_until(unsigned val, memory_order mo, + const chrono::time_point<_Clock, _Duration>& abs_time) + { + unsigned i = load(mo); + if ((i & ~_Waiter_bit) == val) + return true; + // TODO Spin-wait first. Ignore effect on timeout. + auto rel_time = abs_time - chrono::steady_clock::now(); + auto s = chrono::duration_cast<chrono::seconds>(rel_time); + auto ns = chrono::duration_cast<chrono::nanoseconds>(rel_time - s); + i = load_and_test_for_slow(i, val, true, mo, false, 0, 0); + return (i & ~_Waiter_bit) == val; + } + + _GLIBCXX_ALWAYS_INLINE void + store_notify_all(unsigned val, memory_order mo) + { + void* futex = (unsigned *)(void *)&_M_data; + if (_M_data.exchange(val, mo) & _Waiter_bit) + futex_notify_all(futex); + } + }; + +//#else +// struct __atomic_futex +// { +// int _M_data; +// mutex _M_mutex; +// condition_variable _M_condvar; +// }; +//#endif + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace std + +#endif diff --git a/libstdc++-v3/include/std/future b/libstdc++-v3/include/std/future index 8989474..392c13f 100644 --- a/libstdc++-v3/include/std/future +++ b/libstdc++-v3/include/std/future @@ -41,6 +41,7 @@ #include <condition_variable> #include <system_error> #include <atomic> +#include <bits/atomic_futex.h> #include <bits/functexcept.h> #include <bits/unique_ptr.h> #include <bits/shared_ptr.h> @@ -291,14 +292,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { typedef _Ptr<_Result_base> _Ptr_type; + enum _Status { + not_ready, + ready + }; + _Ptr_type _M_result; - mutex _M_mutex; - condition_variable _M_cond; + __atomic_futex_unsigned<> _M_status; atomic_flag _M_retrieved; once_flag _M_once; public: - _State_baseV2() noexcept : _M_result(), _M_retrieved(ATOMIC_FLAG_INIT) + _State_baseV2() noexcept : _M_result(), _M_retrieved(ATOMIC_FLAG_INIT), + _M_status(_Status::not_ready) { } _State_baseV2(const _State_baseV2&) = delete; _State_baseV2& operator=(const _State_baseV2&) = delete; @@ -308,8 +314,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION wait() { _M_complete_async(); - unique_lock<mutex> __lock(_M_mutex); - _M_cond.wait(__lock, [&] { return _M_ready(); }); + // Acquire MO makes sure this synchronizes with the thread that made + // the future ready. + _M_status.load_when_equal(_Status::ready, memory_order_acquire); return *_M_result; } @@ -317,15 +324,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION future_status wait_for(const chrono::duration<_Rep, _Period>& __rel) { - unique_lock<mutex> __lock(_M_mutex); - if (_M_ready()) + _Status _s = _M_status.load(memory_order_acquire); + if (_s == _Status::ready) return future_status::ready; - if (_M_has_deferred()) + if (_M_is_deferred_future()) return future_status::deferred; - if (_M_cond.wait_for(__lock, __rel, [&] { return _M_ready(); })) + if (_M_status.load_when_equal_for(_Status::ready, + memory_order_acquire, __rel)) { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 2100. timed waiting functions must also join + // This call is a no-op by default except on an async future, + // in which case the async thread is joined. It's also not a + // no-op for a deferred future, but such a future will never + // reach this point because it returns future_status::deferred + // instead of waiting for the future to become ready (see + // above). Async futures synchronize in this call, so we need + // no further synchronization here. _M_complete_async(); return future_status::ready; } @@ -336,15 +351,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION future_status wait_until(const chrono::time_point<_Clock, _Duration>& __abs) { - unique_lock<mutex> __lock(_M_mutex); - if (_M_ready()) + // Use atomics + futex loop (or synchronic) + _Status _s = _M_status.load(memory_order_acquire); + if (_s == _Status::ready) return future_status::ready; - if (_M_has_deferred()) + if (_M_is_deferred_future()) return future_status::deferred; - if (_M_cond.wait_until(__lock, __abs, [&] { return _M_ready(); })) + if (_M_status.load_when_equal_until(_Status::ready, + memory_order_acquire, __abs)) { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 2100. timed waiting functions must also join + // See wait_for(...). _M_complete_async(); return future_status::ready; } @@ -354,14 +372,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_set_result(function<_Ptr_type()> __res, bool __ignore_failure = false) { - unique_lock<mutex> __lock(_M_mutex, defer_lock); + bool __did_set = false; // all calls to this function are serialized, // side-effects of invoking __res only happen once call_once(_M_once, &_State_baseV2::_M_do_set, this, - std::__addressof(__res), std::__addressof(__lock)); - if (__lock.owns_lock()) - _M_cond.notify_all(); - else if (!__ignore_failure) + std::__addressof(__res), std::__addressof(__did_set)); + if (!__did_set && !__ignore_failure) __throw_future_error(int(future_errc::promise_already_satisfied)); } @@ -372,11 +388,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { error_code __ec(make_error_code(future_errc::broken_promise)); __res->_M_error = make_exception_ptr(future_error(__ec)); - { - lock_guard<mutex> __lock(_M_mutex); - _M_result.swap(__res); - } - _M_cond.notify_all(); + // Must not be called concurrently with set_result. + _M_result.swap(__res); + _M_status.store_notify_all(_Status::ready, memory_order_release); } } @@ -466,21 +480,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION private: void - _M_do_set(function<_Ptr_type()>* __f, unique_lock<mutex>* __lock) + _M_do_set(function<_Ptr_type()>* __f, bool* __did_set) { - _Ptr_type __res = (*__f)(); // do not hold lock while running setter - __lock->lock(); + _Ptr_type __res = (*__f)(); + // Notify the caller that we did try to set; if we do not throw an + // exception, the caller will be aware that it did set (e.g., see + // _M_set_result). + *__did_set = true; _M_result.swap(__res); + _M_status.store_notify_all(_Status::ready, memory_order_release); } - bool _M_ready() const noexcept { return static_cast<bool>(_M_result); } - // Wait for completion of async function. virtual void _M_complete_async() { } - // Return true if state contains a deferred function. - // Caller must own _M_mutex. - virtual bool _M_has_deferred() const { return false; } + // Return true if the future contains a deferred function. + virtual bool _M_is_deferred_future() const { return false; } }; #ifdef _GLIBCXX_ASYNC_ABI_COMPAT @@ -1458,7 +1473,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } virtual bool - _M_has_deferred() const { return static_cast<bool>(_M_result); } + _M_is_deferred_future() const { return true; } }; class __future_base::_Async_state_commonV2 diff --git a/libstdc++-v3/src/c++11/Makefile.am b/libstdc++-v3/src/c++11/Makefile.am index 71306db..2ef4b5f 100644 --- a/libstdc++-v3/src/c++11/Makefile.am +++ b/libstdc++-v3/src/c++11/Makefile.am @@ -53,6 +53,7 @@ sources = \ debug.cc \ functexcept.cc \ functional.cc \ + futex.cc \ future.cc \ hash_c++0x.cc \ hashtable_c++0x.cc \ diff --git a/libstdc++-v3/src/c++11/Makefile.in b/libstdc++-v3/src/c++11/Makefile.in index dd9e110..6fffe06 100644 --- a/libstdc++-v3/src/c++11/Makefile.in +++ b/libstdc++-v3/src/c++11/Makefile.in @@ -70,7 +70,7 @@ libc__11convenience_la_LIBADD = @ENABLE_CXX11_ABI_TRUE@am__objects_1 = cxx11-ios_failure.lo am__objects_2 = ctype_configure_char.lo ctype_members.lo am__objects_3 = chrono.lo condition_variable.lo ctype.lo debug.lo \ - functexcept.lo functional.lo future.lo hash_c++0x.lo \ + functexcept.lo functional.lo futex.lo future.lo hash_c++0x.lo \ hashtable_c++0x.lo ios.lo limits.lo mutex.lo placeholders.lo \ random.lo regex.lo shared_ptr.lo snprintf_lite.lo \ system_error.lo thread.lo $(am__objects_1) $(am__objects_2) @@ -334,6 +334,7 @@ sources = \ debug.cc \ functexcept.cc \ functional.cc \ + futex.cc \ future.cc \ hash_c++0x.cc \ hashtable_c++0x.cc \ diff --git a/libstdc++-v3/src/c++11/futex.cc b/libstdc++-v3/src/c++11/futex.cc new file mode 100644 index 0000000..8af98af --- /dev/null +++ b/libstdc++-v3/src/c++11/futex.cc @@ -0,0 +1,72 @@ +// futex -*- C++ -*- + +// Copyright (C) 2008-2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +#include <bits/c++config.h> +#include <bits/atomic_futex.h> +#include <bits/gthr.h> +#if defined(__GTHREADS) && defined(__GTHREAD_HAS_COND) \ + && (ATOMIC_INT_LOCK_FREE > 1) && defined(_GLIBCXX_HAVE_LINUX_FUTEX) +# include <climits> +# include <syscall.h> +# include <unistd.h> +# define _GLIBCXX_USE_FUTEX +# define _GLIBCXX_FUTEX_WAIT 0 +# define _GLIBCXX_FUTEX_WAKE 1 +#endif + +namespace std //_GLIBCXX_VISIBILITY(default) +{ +//_GLIBCXX_BEGIN_NAMESPACE_VERSION + + bool + __atomic_futex_unsigned_base::futex_wait_for(unsigned *addr, unsigned val, + bool has_timeout, chrono::seconds __s, chrono::nanoseconds __ns) + { + if (!has_timeout) + { + syscall (SYS_futex, addr, _GLIBCXX_FUTEX_WAIT, val); + return true; + } + else + { + // If we already timed out, don't execute the futex operation. + if (__ns.count() < 0 || __s.count() < 0) + return ETIMEDOUT; + __gthread_time_t ts = + { + static_cast<std::time_t>(__s.count()), + static_cast<long>(__ns.count()) + }; + int err = syscall (SYS_futex, addr, _GLIBCXX_FUTEX_WAIT, val, &ts); + return err != ETIMEDOUT; + } + } + + void + __atomic_futex_unsigned_base::futex_notify_all(unsigned* addr) + { + syscall (SYS_futex, addr, _GLIBCXX_FUTEX_WAKE, INT_MAX); + } + +}