Originally I had __atomic_exchange falling back to
__sync_lock_test_and_set, and __atomic_store falling back to
__sync_lock_release whenever the _atomic variation wasn't specified.
On some limited targets, functionality of these two operations can be
restrictedt. rth had already removed the store fallback to
__sync_lock_release, but another side effect of that was that
__sync_lock_release now always falls back to just storing 0, which I
don't think is quite right either.
the test_and_set case is worse. On some targets, a 1 can be stored, but
thats it. In those cases, and integer store of 1 would result in a lock
free call, but calues other than 1 may not happen, or be changed to a
one. This would be unacceptable.
This patch allows expansion of those 2 __sync routines to utilize their
pattern *if* the expand_builtin call originates with a __sync builtin.
So this means those two __sync calls first try to utilize the new
__atomic pattern if they can, but *will* try to fallback to the sync
pattern next. This fallback will NOT happen if the expansion originates
with an __atomic builtin.
Both C1x and C++11 define that the atomic_flag object *must* be lock
free in order to be standard compliant. The issue shows up on limited
targets which only support a test_and_set and release operations, but
nothing else. The existing code results in an external call to
__atomic_exchange_1 which is unsatisfactory. The atomic_flag
implementation needs to fall back to these 2 __sync's if there is no
other support, so thats the libstdc++-v3 changes. Plus the oversight
where the fence routines don't actually call the fence builtins.
This patch bootstraps and produces no new testsuite regressions on
x86_64-unknown-linux-gnu.
Andrew
2011-11-07 Andrew MacLeod <amacl...@redhat.com>
libstdc++-v3
* include/bits/atomic_base.h (atomic_thread_fence): Call builtin.
(atomic_signal_fence): Call builtin.
(atomic_flag::test_and_set): Call __atomic_exchange when it is lockfree,
otherwise fall back to call __sync_lock_test_and_set.
(atomic_flag::clear): Call __atomic_store when it is lockfree,
otherwise fall back to call __sync_lock_release.
gcc
* doc/extend.texi: Docuemnt behaviour change for __atomic_exchange and
__atomic_store.
* optabs.c (expand_atomic_exchange): Expand to __sync_lock_test_and_set
only when originated from that builtin.
(expand_atomic_store): Expand to __sync_lock_release when originated
from that builtin.
* builtins.c (expand_builtin_sync_lock_test_and_set): Add flag that
expand_atomic_exchange call originated from here.
(expand_builtin_sync_lock_release): Add flag that expand_atomic_store
call originated from here.
(expand_builtin_atomic_exchange): Add origination flag.
(expand_builtin_atomic_store): Add origination flag.
* expr.h (expand_atomic_exchange, expand_atomic_store): Add boolean
parameters to indicate implementation fall back options.
Index: libstdc++-v3/include/bits/atomic_base.h
===================================================================
*** libstdc++-v3/include/bits/atomic_base.h (revision 181031)
--- libstdc++-v3/include/bits/atomic_base.h (working copy)
*************** _GLIBCXX_BEGIN_NAMESPACE_VERSION
*** 69,78 ****
}
void
! atomic_thread_fence(memory_order) noexcept;
void
! atomic_signal_fence(memory_order) noexcept;
/// kill_dependency
template<typename _Tp>
--- 69,84 ----
}
void
! atomic_thread_fence(memory_order __m) noexcept
! {
! __atomic_thread_fence (__m);
! }
void
! atomic_signal_fence(memory_order __m) noexcept
! {
! __atomic_signal_fence (__m);
! }
/// kill_dependency
template<typename _Tp>
*************** _GLIBCXX_BEGIN_NAMESPACE_VERSION
*** 261,273 ****
bool
test_and_set(memory_order __m = memory_order_seq_cst) noexcept
{
! return __atomic_exchange_n(&_M_i, 1, __m);
}
bool
test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept
{
! return __atomic_exchange_n(&_M_i, 1, __m);
}
void
--- 267,301 ----
bool
test_and_set(memory_order __m = memory_order_seq_cst) noexcept
{
! /* The standard *requires* this to be lock free. If exchange is not
! always lock free, the resort to the old test_and_set. */
! if (__atomic_always_lock_free (sizeof (_M_i), 0))
! return __atomic_exchange_n(&_M_i, 1, __m);
! else
! {
! /* Sync test and set is only guaranteed to be acquire. */
! if (__m == memory_order_seq_cst || __m == memory_order_release
! || __m == memory_order_acq_rel)
! atomic_thread_fence (__m);
! return __sync_lock_test_and_set (&_M_i, 1);
! }
}
bool
test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept
{
! /* The standard *requires* this to be lock free. If exchange is not
! always lock free, the resort to the old test_and_set. */
! if (__atomic_always_lock_free (sizeof (_M_i), 0))
! return __atomic_exchange_n(&_M_i, 1, __m);
! else
! {
! /* Sync test and set is only guaranteed to be acquire. */
! if (__m == memory_order_seq_cst || __m == memory_order_release
! || __m == memory_order_acq_rel)
! atomic_thread_fence (__m);
! return __sync_lock_test_and_set (&_M_i, 1);
! }
}
void
*************** _GLIBCXX_BEGIN_NAMESPACE_VERSION
*** 277,283 ****
__glibcxx_assert(__m != memory_order_acquire);
__glibcxx_assert(__m != memory_order_acq_rel);
! __atomic_store_n(&_M_i, 0, __m);
}
void
--- 305,321 ----
__glibcxx_assert(__m != memory_order_acquire);
__glibcxx_assert(__m != memory_order_acq_rel);
! /* The standard *requires* this to be lock free. If store is not always
! lock free, the resort to the old style __sync_lock_release. */
! if (__atomic_always_lock_free (sizeof (_M_i), 0))
! __atomic_store_n(&_M_i, 0, __m);
! else
! {
! __sync_lock_release (&_M_i, 0);
! /* __sync_lock_release is only guaranteed to be a release barrier. */
! if (__m == memory_order_seq_cst)
! atomic_thread_fence (__m);
! }
}
void
*************** _GLIBCXX_BEGIN_NAMESPACE_VERSION
*** 287,293 ****
__glibcxx_assert(__m != memory_order_acquire);
__glibcxx_assert(__m != memory_order_acq_rel);
! __atomic_store_n(&_M_i, 0, __m);
}
};
--- 325,341 ----
__glibcxx_assert(__m != memory_order_acquire);
__glibcxx_assert(__m != memory_order_acq_rel);
! /* The standard *requires* this to be lock free. If store is not always
! lock free, the resort to the old style __sync_lock_release. */
! if (__atomic_always_lock_free (sizeof (_M_i), 0))
! __atomic_store_n(&_M_i, 0, __m);
! else
! {
! __sync_lock_release (&_M_i, 0);
! /* __sync_lock_release is only guaranteed to be a release barrier. */
! if (__m == memory_order_seq_cst)
! atomic_thread_fence (__m);
! }
}
};
Index: gcc/doc/extend.texi
===================================================================
*** gcc/doc/extend.texi (revision 181031)
--- gcc/doc/extend.texi (working copy)
*************** contents of @code{*@var{ptr}} in @code{*
*** 6910,6918 ****
@deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr,
@var{type} val, int memmodel)
This built-in function implements an atomic store operation. It writes
! @code{@var{val}} into @code{*@var{ptr}}. On targets which are limited,
! 0 may be the only valid value. This mimics the behaviour of
! @code{__sync_lock_release} on such hardware.
The valid memory model variants are
@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}.
--- 6910,6916 ----
@deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr,
@var{type} val, int memmodel)
This built-in function implements an atomic store operation. It writes
! @code{@var{val}} into @code{*@var{ptr}}.
The valid memory model variants are
@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}.
*************** This built-in function implements an ato
*** 6930,6939 ****
@var{val} into @code{*@var{ptr}}, and returns the previous contents of
@code{*@var{ptr}}.
- On targets which are limited, a value of 1 may be the only valid value
- written. This mimics the behaviour of @code{__sync_lock_test_and_set} on
- such hardware.
-
The valid memory model variants are
@code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE},
@code{__ATOMIC_RELEASE}, and @code{__ATOMIC_ACQ_REL}.
--- 6928,6933 ----
Index: gcc/optabs.c
===================================================================
*** gcc/optabs.c (revision 181031)
--- gcc/optabs.c (working copy)
*************** expand_compare_and_swap_loop (rtx mem, r
*** 7256,7265 ****
atomically store VAL in MEM and return the previous value in MEM.
MEMMODEL is the memory model variant to use.
! TARGET is an option place to stick the return value. */
rtx
! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
--- 7256,7268 ----
atomically store VAL in MEM and return the previous value in MEM.
MEMMODEL is the memory model variant to use.
! TARGET is an optional place to stick the return value.
! USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used
! as a fall back if the atomic_exchange pattern does not exist. */
rtx
! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
! bool use_test_and_set)
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
*************** expand_atomic_exchange (rtx target, rtx
*** 7284,7314 ****
acquire barrier. If the pattern exists, and the memory model is stronger
than acquire, add a release barrier before the instruction.
The barrier is not needed if sync_lock_test_and_set doesn't exist since
! it will expand into a compare-and-swap loop. */
! icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
! last_insn = get_last_insn ();
! if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST ||
! model == MEMMODEL_RELEASE ||
! model == MEMMODEL_ACQ_REL))
! expand_builtin_mem_thread_fence (model);
! if (icode != CODE_FOR_nothing)
! {
! struct expand_operand ops[3];
! create_output_operand (&ops[0], target, mode);
! create_fixed_operand (&ops[1], mem);
! /* VAL may have been promoted to a wider mode. Shrink it if so. */
! create_convert_operand_to (&ops[2], val, mode, true);
! if (maybe_expand_insn (icode, 3, ops))
! return ops[0].value;
! }
! /* Remove any fence we may have inserted since a compare and swap loop is a
! full memory barrier. */
! if (last_insn != get_last_insn ())
! delete_insns_since (last_insn);
/* Otherwise, use a compare-and-swap loop for the exchange. */
if (can_compare_and_swap_p (mode))
--- 7287,7325 ----
acquire barrier. If the pattern exists, and the memory model is stronger
than acquire, add a release barrier before the instruction.
The barrier is not needed if sync_lock_test_and_set doesn't exist since
! it will expand into a compare-and-swap loop.
! Some targets have non-compliant test_and_sets, so it would be incorrect
! to emit a test_and_set in place of an __atomic_exchange. The
test_and_set
! builtin shares this expander since exchange can always replace the
! test_and_set. */
!
! if (use_test_and_set)
! {
! icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
! last_insn = get_last_insn ();
! if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST ||
! model == MEMMODEL_RELEASE ||
! model == MEMMODEL_ACQ_REL))
! expand_builtin_mem_thread_fence (model);
! if (icode != CODE_FOR_nothing)
! {
! struct expand_operand ops[3];
! create_output_operand (&ops[0], target, mode);
! create_fixed_operand (&ops[1], mem);
! /* VAL may have been promoted to a wider mode. Shrink it if so. */
! create_convert_operand_to (&ops[2], val, mode, true);
! if (maybe_expand_insn (icode, 3, ops))
! return ops[0].value;
! }
! /* Remove any fence that was inserted since a compare and swap loop is
! already a full memory barrier. */
! if (last_insn != get_last_insn ())
! delete_insns_since (last_insn);
! }
/* Otherwise, use a compare-and-swap loop for the exchange. */
if (can_compare_and_swap_p (mode))
*************** expand_atomic_load (rtx target, rtx mem,
*** 7489,7498 ****
/* This function expands the atomic store operation:
Atomically store VAL in MEM.
MEMMODEL is the memory model variant to use.
function returns const0_rtx if a pattern was emitted. */
rtx
! expand_atomic_store (rtx mem, rtx val, enum memmodel model)
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
--- 7500,7510 ----
/* This function expands the atomic store operation:
Atomically store VAL in MEM.
MEMMODEL is the memory model variant to use.
+ USE_RELEASE is true if __sync_lock_release can be used as a fall back.
function returns const0_rtx if a pattern was emitted. */
rtx
! expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
*************** expand_atomic_store (rtx mem, rtx val, e
*** 7509,7520 ****
return const0_rtx;
}
/* If the size of the object is greater than word size on this target,
a default store will not be atomic, Try a mem_exchange and throw away
the result. If that doesn't work, don't do anything. */
if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
{
! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model);
if (target)
return const0_rtx;
else
--- 7521,7550 ----
return const0_rtx;
}
+ /* If using __sync_lock_release is a viable alternative, try it. */
+ if (use_release)
+ {
+ icode = direct_optab_handler (sync_lock_release_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ create_fixed_operand (&ops[0], mem);
+ create_input_operand (&ops[1], const0_rtx, mode);
+ if (maybe_expand_insn (icode, 2, ops))
+ {
+ /* lock_release is only a release barrier. */
+ if (model == MEMMODEL_SEQ_CST)
+ expand_builtin_mem_thread_fence (model);
+ return const0_rtx;
+ }
+ }
+ }
+
/* If the size of the object is greater than word size on this target,
a default store will not be atomic, Try a mem_exchange and throw away
the result. If that doesn't work, don't do anything. */
if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
{
! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false);
if (target)
return const0_rtx;
else
Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c (revision 181031)
--- gcc/builtins.c (working copy)
*************** expand_builtin_sync_lock_test_and_set (e
*** 5221,5227 ****
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE);
}
/* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */
--- 5221,5227 ----
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE, true);
}
/* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */
*************** expand_builtin_sync_lock_release (enum m
*** 5234,5240 ****
/* Expand the operands. */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
! expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE);
}
/* Given an integer representing an ``enum memmodel'', verify its
--- 5234,5240 ----
/* Expand the operands. */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
! expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE, true);
}
/* Given an integer representing an ``enum memmodel'', verify its
*************** expand_builtin_atomic_exchange (enum mac
*** 5285,5291 ****
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_atomic_exchange (target, mem, val, model);
}
/* Expand the __atomic_compare_exchange intrinsic:
--- 5285,5291 ----
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_atomic_exchange (target, mem, val, model, false);
}
/* Expand the __atomic_compare_exchange intrinsic:
*************** expand_builtin_atomic_store (enum machin
*** 5402,5408 ****
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_atomic_store (mem, val, model);
}
/* Expand the __atomic_fetch_XXX intrinsic:
--- 5402,5408 ----
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_atomic_store (mem, val, model, false);
}
/* Expand the __atomic_fetch_XXX intrinsic:
Index: gcc/expr.h
===================================================================
*** gcc/expr.h (revision 181031)
--- gcc/expr.h (working copy)
*************** rtx emit_conditional_add (rtx, enum rtx_
*** 215,223 ****
rtx expand_sync_operation (rtx, rtx, enum rtx_code);
rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel);
rtx expand_atomic_load (rtx, rtx, enum memmodel);
! rtx expand_atomic_store (rtx, rtx, enum memmodel);
rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel,
bool);
void expand_atomic_thread_fence (enum memmodel);
--- 215,223 ----
rtx expand_sync_operation (rtx, rtx, enum rtx_code);
rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel, bool);
rtx expand_atomic_load (rtx, rtx, enum memmodel);
! rtx expand_atomic_store (rtx, rtx, enum memmodel, bool);
rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel,
bool);
void expand_atomic_thread_fence (enum memmodel);