I wasn't liking the duplication between the old __sync routines and the
new __sync_mem routines. I thought it was going to add confusion to port
maintainers at the very least.
This patch consolidates everything except the various fetch_op routines,
which I will do in a different patch since they are much more complicated,
The basic form now is that the new routine expansion code follow this
pattern:
__sync_mem_ROUTINE:
- Check for existence of a pattern for __sync_mem_ROUTINE, and try to
use that.
- Absorb the code from the expansion routine for __sync_ROUTINE.
- See if that pattern exists and can be used, issuing any
additional fences that may be needed.
- fall back to the compare and swap loop, or execute the insn with
fences as appropriate.
And now the original __sync routines will simply call the expanders for
the new routines passing in the memory model they actually intend.
__sync_ROUTINE:
call expand __sync_mem_ROUTINE (..., DOCUMENTED_MEMORY_MODEL)
ie, so __sync_lock_test_and_set is defined to be acquire mode, so now
any use of that older built-in will actually end up being expanded by
this call:
expand_sync_mem_exchange (target, mem, val, MEMMODEL_ACQUIRE);
rather than the original call to expand_sync_test_and_set.
Any slight variances in behaviour which need to be absorbed are
documented, such as __sync_lock_test_and_set may only use values of 0
and 1 on some limited architectures. With that functionality
consolidated, I have removed the __sync_mem_flag_{test_and_set,clear}
routines since their functionality is now completed by invoking
__sync_mem_exchange (byte_addr, 1) and __sync_mem_store (byte_addr, 0).
Its all much cleaner, and now it much easier to maintain a target. All
you need to do is take the original patterns for the old __sync
routines, rename them and add any memory model support you want. no
confusing duplication of functionality and all that.
the various fetch_ops will be consolidated in a followup next week.
Bootstraps on x86_64-unknown-linux-gnu and causes no new regressions.
Andrew
* expr.h: Remove prototypes.
* sync-builtins.def (BUILT_IN_SYNC_MEM_FLAG_TEST_AND_SET,
BUILT_IN_SYNC_MEM_FLAG_CLEAR): Remove.
* optabs.h (enum direct_optab_index): Remove DOI_sync_mem_flag_*.
* optabs.c (expand_sync_lock_test_and_set): Remove.
(expand_sync_mem_exchange): Incorporate sync_lock_test_and_set here.
(expand_sync_mem_store): If storing const0_rtx, try using
sync_lock_release.
* builtins.c (expand_builtin_sync_lock_test_and_set): Expand into
sync_mem_exchange instead.
(expand_builtin_sync_lock_release): Expand into sync_mem_store of 0.
(expand_builtin_sync_mem_flag_test_and_set): Remove.
(expand_builtin_sync_mem_flag_clear): Remove.
(expand_builtin): Remove cases for __SYNC_MEM_FLAG_*.
* doc/extend.texi: Update documentation to match.
* testsuite/gcc.dg/sync-mem-invalid.c: Remove __sync_mem_flag_clear
tests.
* testsuite/gcc.dg/sync-mem-flag.c: Remove.
Index: expr.h
===================================================================
*** expr.h (revision 178710)
--- expr.h (working copy)
*************** rtx expand_val_compare_and_swap (rtx, rt
*** 216,222 ****
rtx expand_bool_compare_and_swap (rtx, rtx, rtx, rtx);
rtx expand_sync_operation (rtx, rtx, enum rtx_code);
rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
- rtx expand_sync_lock_test_and_set (rtx, rtx, rtx);
rtx expand_sync_mem_exchange (rtx, rtx, rtx, enum memmodel);
rtx expand_sync_mem_compare_exchange (rtx, rtx, rtx, rtx, enum memmodel,
--- 216,221 ----
*************** rtx expand_sync_mem_load (rtx, rtx, enum
*** 225,232 ****
void expand_sync_mem_store (rtx, rtx, enum memmodel);
rtx expand_sync_mem_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel,
bool);
- rtx expand_sync_mem_flag_test_and_set (rtx, rtx, enum memmodel);
- void expand_sync_mem_flag_clear (rtx, enum memmodel);
void expand_sync_mem_thread_fence (enum memmodel);
void expand_sync_mem_signal_fence (enum memmodel);
--- 224,229 ----
Index: sync-builtins.def
===================================================================
*** sync-builtins.def (revision 178710)
--- sync-builtins.def (working copy)
*************** DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_FETC
*** 533,546 ****
"__sync_mem_fetch_or_16",
BT_FN_I16_VPTR_I16_INT, ATTR_NOTHROW_LEAF_LIST)
- DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_FLAG_TEST_AND_SET,
- "__sync_mem_flag_test_and_set",
- BT_FN_BOOL_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
-
- DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_FLAG_CLEAR,
- "__sync_mem_flag_clear",
- BT_FN_VOID_VPTR_INT, ATTR_NOTHROW_LEAF_LIST)
-
DEF_SYNC_BUILTIN (BUILT_IN_SYNC_MEM_THREAD_FENCE,
"__sync_mem_thread_fence",
BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST)
--- 533,538 ----
Index: optabs.h
===================================================================
*** optabs.h (revision 178710)
--- optabs.h (working copy)
*************** enum direct_optab_index
*** 700,707 ****
DOI_sync_mem_fetch_and,
DOI_sync_mem_fetch_xor,
DOI_sync_mem_fetch_or,
- DOI_sync_mem_flag_test_and_set,
- DOI_sync_mem_flag_clear,
DOI_sync_mem_thread_fence,
DOI_sync_mem_signal_fence,
--- 700,705 ----
*************** typedef struct direct_optab_d *direct_op
*** 779,788 ****
(&direct_optab_table[(int) DOI_sync_mem_fetch_xor])
#define sync_mem_fetch_or_optab \
(&direct_optab_table[(int) DOI_sync_mem_fetch_or])
- #define sync_mem_flag_test_and_set_optab \
- (&direct_optab_table[(int) DOI_sync_mem_flag_test_and_set])
- #define sync_mem_flag_clear_optab \
- (&direct_optab_table[(int) DOI_sync_mem_flag_clear])
#define sync_mem_thread_fence_optab \
(&direct_optab_table[(int) DOI_sync_mem_thread_fence])
#define sync_mem_signal_fence_optab \
--- 777,782 ----
Index: optabs.c
===================================================================
*** optabs.c (revision 178710)
--- optabs.c (working copy)
*************** expand_sync_fetch_operation (rtx mem, rt
*** 7124,7171 ****
return NULL_RTX;
}
- /* This function expands a test-and-set operation. Ideally we atomically
- store VAL in MEM and return the previous value in MEM. Some targets
- may not support this operation and only support VAL with the constant 1;
- in this case while the return value will be 0/1, but the exact value
- stored in MEM is target defined. TARGET is an option place to stick
- the return value. */
-
- rtx
- expand_sync_lock_test_and_set (rtx mem, rtx val, rtx target)
- {
- enum machine_mode mode = GET_MODE (mem);
- enum insn_code icode;
-
- /* If the target supports the test-and-set directly, great. */
- icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
- if (icode != CODE_FOR_nothing)
- {
- struct expand_operand ops[3];
-
- create_output_operand (&ops[0], target, mode);
- create_fixed_operand (&ops[1], mem);
- /* VAL may have been promoted to a wider mode. Shrink it if so. */
- create_convert_operand_to (&ops[2], val, mode, true);
- if (maybe_expand_insn (icode, 3, ops))
- return ops[0].value;
- }
-
- /* Otherwise, use a compare-and-swap loop for the exchange. */
- if (direct_optab_handler (sync_compare_and_swap_optab, mode)
- != CODE_FOR_nothing)
- {
- if (!target || !register_operand (target, mode))
- target = gen_reg_rtx (mode);
- if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
- val = convert_modes (mode, GET_MODE (val), val, 1);
- if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
- return target;
- }
-
- return NULL_RTX;
- }
-
/* This function expands the atomic exchange operation:
atomically store VAL in MEM and return the previous value in MEM.
--- 7124,7129 ----
*************** expand_sync_mem_exchange (rtx target, rt
*** 7177,7182 ****
--- 7135,7141 ----
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
+ rtx last_insn;
/* If the target supports the exchange directly, great. */
icode = direct_optab_handler (sync_mem_exchange_optab, mode);
*************** expand_sync_mem_exchange (rtx target, rt
*** 7198,7209 ****
than acquire, add a release barrier before the instruction.
The barrier is not needed if sync_lock_test_and_set doesn't exist since
it will expand into a compare-and-swap loop. */
icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST ||
! model == MEMMODEL_ACQ_REL))
expand_builtin_mem_thread_fence (model);
! return expand_sync_lock_test_and_set (mem, val, target);
}
/* This function expands the atomic compare exchange operation:
--- 7157,7200 ----
than acquire, add a release barrier before the instruction.
The barrier is not needed if sync_lock_test_and_set doesn't exist since
it will expand into a compare-and-swap loop. */
+
icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
+ last_insn = get_last_insn ();
if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST ||
! model == MEMMODEL_RELEASE ||
! model == MEMMODEL_ACQ_REL))
expand_builtin_mem_thread_fence (model);
! if (icode != CODE_FOR_nothing)
! {
! struct expand_operand ops[3];
!
! create_output_operand (&ops[0], target, mode);
! create_fixed_operand (&ops[1], mem);
! /* VAL may have been promoted to a wider mode. Shrink it if so. */
! create_convert_operand_to (&ops[2], val, mode, true);
! if (maybe_expand_insn (icode, 3, ops))
! return ops[0].value;
! }
!
! /* Remove any fence we may have inserted since a compare and swap loop is a
! full memory barrier. */
! if (last_insn != get_last_insn ())
! delete_insns_since (last_insn);
!
! /* Otherwise, use a compare-and-swap loop for the exchange. */
! if (direct_optab_handler (sync_compare_and_swap_optab, mode)
! != CODE_FOR_nothing)
! {
! if (!target || !register_operand (target, mode))
! target = gen_reg_rtx (mode);
! if (GET_MODE (val) != VOIDmode && GET_MODE (val) != mode)
! val = convert_modes (mode, GET_MODE (val), val, 1);
! if (expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
! return target;
! }
!
! return NULL_RTX;
}
/* This function expands the atomic compare exchange operation:
*************** expand_sync_mem_store (rtx mem, rtx val,
*** 7323,7351 ****
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
/* If the target supports the store directly, great. */
icode = direct_optab_handler (sync_mem_store_optab, mode);
if (icode != CODE_FOR_nothing)
{
- struct expand_operand ops[3];
create_output_operand (&ops[0], mem, mode);
! create_fixed_operand (&ops[1], val);
create_integer_operand (&ops[2], model);
if (maybe_expand_insn (icode, 3, ops))
return;
}
/* If there is no mem_store, default to a move with barriers */
!
! if (model == MEMMODEL_SEQ_CST)
expand_builtin_mem_thread_fence (model);
emit_move_insn (mem, val);
/* For SEQ_CST, also emit a barrier after the load. */
! expand_builtin_mem_thread_fence (model);
}
/* This function expands an atomic fetch_OP operation:
--- 7314,7359 ----
{
enum machine_mode mode = GET_MODE (mem);
enum insn_code icode;
+ struct expand_operand ops[3];
/* If the target supports the store directly, great. */
icode = direct_optab_handler (sync_mem_store_optab, mode);
if (icode != CODE_FOR_nothing)
{
create_output_operand (&ops[0], mem, mode);
! create_input_operand (&ops[1], val, mode);
create_integer_operand (&ops[2], model);
if (maybe_expand_insn (icode, 3, ops))
return;
}
+ /* A store of 0 is the same as __sync_lock_release, try that. */
+ if (CONST_INT_P (val) && INTVAL (val) == 0)
+ {
+ icode = direct_optab_handler (sync_lock_release_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ create_fixed_operand (&ops[0], mem);
+ create_input_operand (&ops[1], const0_rtx, mode);
+ if (maybe_expand_insn (icode, 2, ops))
+ {
+ /* lock_release is only a release barrier. */
+ if (model == MEMMODEL_SEQ_CST)
+ expand_builtin_mem_thread_fence (model);
+ return;
+ }
+ }
+ }
/* If there is no mem_store, default to a move with barriers */
! if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
expand_builtin_mem_thread_fence (model);
emit_move_insn (mem, val);
/* For SEQ_CST, also emit a barrier after the load. */
! if (model == MEMMODEL_SEQ_CST)
! expand_builtin_mem_thread_fence (model);
}
/* This function expands an atomic fetch_OP operation:
Index: builtins.c
===================================================================
*** builtins.c (revision 178718)
--- builtins.c (working copy)
*************** expand_builtin_sync_lock_test_and_set (e
*** 5207,5213 ****
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_sync_lock_test_and_set (mem, val, target);
}
/* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */
--- 5207,5213 ----
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_sync_mem_exchange (target, mem, val, MEMMODEL_ACQUIRE);
}
/* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */
*************** expand_builtin_sync_lock_test_and_set (e
*** 5215,5241 ****
static void
expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
{
- struct expand_operand ops[2];
- enum insn_code icode;
rtx mem;
/* Expand the operands. */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
! /* If there is an explicit operation in the md file, use it. */
! icode = direct_optab_handler (sync_lock_release_optab, mode);
! if (icode != CODE_FOR_nothing)
! {
! create_fixed_operand (&ops[0], mem);
! create_input_operand (&ops[1], const0_rtx, mode);
! if (maybe_expand_insn (icode, 2, ops))
! return;
! }
!
! /* Otherwise we can implement this operation by emitting a barrier
! followed by a store of zero. */
! expand_builtin_sync_synchronize ();
! emit_move_insn (mem, const0_rtx);
}
/* Given an integer representing an ``enum memmodel'', verify its
--- 5215,5226 ----
static void
expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
{
rtx mem;
/* Expand the operands. */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
! expand_sync_mem_store (mem, const0_rtx, MEMMODEL_RELEASE);
}
/* Given an integer representing an ``enum memmodel'', verify its
*************** expand_builtin_sync_mem_fetch_op (enum m
*** 5404,5471 ****
return expand_sync_mem_fetch_op (target, mem, val, code, model,
fetch_after);
}
- /* Expand the __sync_mem_flag_test_and_set intrinsic:
- bool __sync_mem_flag_test_and_set (char *object, enum memmodel)
- EXP is the CALL_EXPR.
- TARGET is an optional place for us to store the results. */
-
- static rtx
- expand_builtin_sync_mem_flag_test_and_set (tree exp, rtx target)
- {
- rtx mem;
- enum memmodel model;
-
- model = get_memmodel (CALL_EXPR_ARG (exp, 1));
- /* Expand the operand. */
- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), QImode);
-
- #ifdef HAVE_mem_flag_test_and_set
- return emit_insn (gen_mem_flag_test_and_set (mem, model));
- #else
- /* sync_lock_test_and_set is an acquire barrier, so a barrier may be needed
- before. */
-
- if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_ACQ_REL
- || model == MEMMODEL_RELEASE)
- expand_builtin_sync_synchronize ();
-
- return expand_sync_lock_test_and_set (mem, const1_rtx, target);
- #endif
- }
-
- /* Expand the __sync_mem_flag_clear intrinsic:
- void __sync_mem_flag_clear (char *object, enum memmodel)
- EXP is the CALL_EXPR. */
-
- static void
- expand_builtin_sync_mem_flag_clear (tree exp)
- {
- enum memmodel model;
- #ifdef HAVE_mem_flag_clear
- rtx mem;
- #endif
-
- model = get_memmodel (CALL_EXPR_ARG (exp, 1));
-
- if (model == MEMMODEL_ACQUIRE || model == MEMMODEL_ACQ_REL)
- {
- error ("invalid memory model for %<__sync_mem_flag_clear%>");
- return;
- }
-
- #ifdef HAVE_mem_flag_clear
- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), QImode);
- emit_insn (gen_mem_flag_clear (mem, model));
- #else
- expand_builtin_sync_lock_release (QImode, exp);
-
- /* sync_lock_release is a release barrier, so we may need a seq-cst
- barrier afterwards. */
- if (model == MEMMODEL_SEQ_CST)
- expand_builtin_sync_synchronize ();
- #endif
- }
-
/* This routine will either emit the mem_thread_fence pattern or issue a
sync_synchronize to generate a fence for memory model MEMMODEL. */
--- 5389,5394 ----
*************** expand_builtin (tree exp, rtx target, rt
*** 6498,6513 ****
return target;
break;
- case BUILT_IN_SYNC_MEM_FLAG_TEST_AND_SET:
- target = expand_builtin_sync_mem_flag_test_and_set (exp, target);
- if (target)
- return target;
- break;
-
- case BUILT_IN_SYNC_MEM_FLAG_CLEAR:
- expand_builtin_sync_mem_flag_clear (exp);
- return const0_rtx;
-
case BUILT_IN_SYNC_MEM_THREAD_FENCE:
expand_builtin_sync_mem_thread_fence (exp);
return const0_rtx;
--- 6421,6426 ----
Index: doc/extend.texi
===================================================================
*** doc/extend.texi (revision 178718)
--- doc/extend.texi (working copy)
*************** __SYNC_MEM_RELAXED, __SYNC_MEM_SEQ_CST,
*** 6809,6817 ****
__SYNC_MEM_CONSUME.
@item void __sync_mem_store (@var{type} *ptr, @var{type} val, int memmodel)
! @findex __sync_mem_load
This builtin implements an atomic store operation. It writes @code{@var{val}}
! into @code{*@var{ptr}}.
The valid memory model variants are
__SYNC_MEM_RELAXED, __SYNC_MEM_SEQ_CST, and __SYNC_MEM_RELEASE.
--- 6809,6818 ----
__SYNC_MEM_CONSUME.
@item void __sync_mem_store (@var{type} *ptr, @var{type} val, int memmodel)
! @findex __sync_mem_store
This builtin implements an atomic store operation. It writes @code{@var{val}}
! into @code{*@var{ptr}}. On targets which are limited, 0 may be the only valid
! value. This mimics the behaviour of __sync_lock_release on such hardware.
The valid memory model variants are
__SYNC_MEM_RELAXED, __SYNC_MEM_SEQ_CST, and __SYNC_MEM_RELEASE.
*************** __SYNC_MEM_RELAXED, __SYNC_MEM_SEQ_CST,
*** 6821,6826 ****
--- 6822,6830 ----
This builtin implements an atomic exchange operation. It writes @var{val}
into @code{*@var{ptr}}, and returns the previous contents of
@code{*@var{ptr}}.
+ On targets which are limited, a value of 1 may be the only valid value
written.
+ This mimics the behaviour of __sync_lock_test_and_set on such hardware.
+
The valid memory model variants are
__SYNC_MEM_RELAXED, __SYNC_MEM_SEQ_CST, __SYNC_MEM_ACQUIRE,
__SYNC_MEM_RELEASE, and __SYNC_MEM_ACQ_REL.
*************** that had previously been in *ptr . That
*** 6880,6901 ****
All memory models are valid.
- @item bool __sync_mem_flag_test_and_set (char *ptr, int memmodel)
- @findex __sync_mem_flag_test_and_set
-
- This builtin performs a test and set operation. The contents of
- @code{*@var{ptr}} will be set to true, and the previous value returned.
-
- All memory orders are valid.
-
- @item void __sync_mem_flag_clear (char *ptr, int memmodel)
- @findex __sync_mem_flag_clear
-
- This builtin performs a clear operation. The contents of @code{*@var{ptr}}
- will be set to false.
-
- The memory order cannot be __SYNC_MEM_ACQUIRE or __SYNC_MEM_ACQ_REL.
-
@item void __sync_mem_thread_fence (int memmodel)
@findex __sync_mem_thread_fence
--- 6884,6889 ----
Index: testsuite/gcc.dg/sync-mem-invalid.c
===================================================================
*** testsuite/gcc.dg/sync-mem-invalid.c (revision 178710)
--- testsuite/gcc.dg/sync-mem-invalid.c (working copy)
*************** main ()
*** 16,21 ****
__sync_mem_store (&i, 1, __SYNC_MEM_CONSUME); /* { dg-error "invalid memory
model" } */
__sync_mem_store (&i, 1, __SYNC_MEM_ACQ_REL); /* { dg-error "invalid memory
model" } */
- __sync_mem_flag_clear (&i, __SYNC_MEM_ACQUIRE); /* { dg-error "invalid
memory model" } */
- __sync_mem_flag_clear (&i, __SYNC_MEM_ACQ_REL); /* { dg-error "invalid
memory model" } */
}
--- 16,19 ----
Index: testsuite/gcc.dg/sync-mem-flag.c
===================================================================
*** testsuite/gcc.dg/sync-mem-flag.c (revision 178710)
--- testsuite/gcc.dg/sync-mem-flag.c (working copy)
***************
*** 1,37 ****
- /* Test __sync_mem routines for existence and proper execution with each
valid
- memory model. */
- /* { dg-do run } */
- /* { dg-require-effective-target sync_char_short } */
-
-
- /* Test the execution of __sync_mem_flag_{test_and_set,clear} builtins. */
-
- extern void abort(void);
-
- char v;
-
- main ()
- {
- v = 0;
-
- if (__sync_mem_flag_test_and_set (&v, __SYNC_MEM_RELAXED) != 0)
- abort();
- if (v != 1)
- abort();
- __sync_mem_flag_clear (&v, __SYNC_MEM_RELAXED);
-
- if (__sync_mem_flag_test_and_set (&v, __SYNC_MEM_RELEASE) != 0)
- abort();
- if (v != 1)
- abort();
- __sync_mem_flag_clear (&v, __SYNC_MEM_RELEASE);
-
- if (__sync_mem_flag_test_and_set (&v, __SYNC_MEM_SEQ_CST) != 0)
- abort();
- if (v != 1)
- abort();
- __sync_mem_flag_clear (&v, __SYNC_MEM_SEQ_CST);
-
- return 0;
- }
-
--- 0 ----