I think I had a similar patch earlier that got forgotten, but I've made
some changes.
if (sizeof(atomic) > target_word_size) then defaulting to a load or
store with barriers will result in non-atomic code, which is bad.
instead, the default has now been changed.
Store will try an exchange, and throw away the result. If that fails,
then default to a function call.
Load will try to do a compare_swap (mem, 0 , 0), which will get the
current value of *mem if it is not 0, and if it is 0, then it will cause
a 'mostly' harmless store of 0 to a location already containing 0.
bootstraps with no new regressions on x86_64-unknown-linux-gnu
Andrew
* optabs.c (expand_sync_mem_load): Don't expand into a default load if
the type is larger than a word. Try a compare_and_swap with 0.
(expand_sync_mem_store): Return const0_rtx if a store is generated. If
type is larger than a word try an exchange, then fail.
* builtins.c (expand_builtin_sync_mem_store): Return a value.
(expand_builtin): If no store generated, leave a function call.
* expr.h (expand_sync_mem_store): Prototype returns value.
* testsuite/gcc.dg/memmodel/sync-other-int128.c: Don't xfail any more.
Index: optabs.c
===================================================================
*** optabs.c (revision 178916)
--- optabs.c (working copy)
*************** expand_sync_mem_load (rtx target, rtx me
*** 7032,7038 ****
return ops[0].value;
}
! /* If there is no load, default to a move with barriers. */
if (target == const0_rtx)
target = gen_reg_rtx (mode);
--- 7032,7050 ----
return ops[0].value;
}
! /* If there is no load pattern, default to a move with barriers. If the size
! of the object is greater than word size on this target, a default load
! will not be atomic. */
! if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
! {
! /* Issue val = compare_and_swap (mem, 0 , 0).
! This may cause the occasional harmless store of 0 when the value is
! already 0, but do it anyway until its determined to be invalid. */
! target = expand_val_compare_and_swap (mem, const0_rtx, const0_rtx,
! target);
! return target;
! }
!
if (target == const0_rtx)
target = gen_reg_rtx (mode);
*************** expand_sync_mem_load (rtx target, rtx me
*** 7048,7060 ****
return target;
}
! /* This function expands the atomic load operation:
! return the atomically loaded value in MEM.
!
MEMMODEL is the memory model variant to use.
! TARGET is an option place to stick the return value. */
! void
expand_sync_mem_store (rtx mem, rtx val, enum memmodel model)
{
enum machine_mode mode = GET_MODE (mem);
--- 7060,7071 ----
return target;
}
! /* This function expands the atomic store operation:
! Atomically store VAL in MEM.
MEMMODEL is the memory model variant to use.
! function returns const0_rtx if a pattern was emitted. */
! rtx
expand_sync_mem_store (rtx mem, rtx val, enum memmodel model)
{
enum machine_mode mode = GET_MODE (mem);
*************** expand_sync_mem_store (rtx mem, rtx val,
*** 7070,7076 ****
create_input_operand (&ops[1], val, mode);
create_integer_operand (&ops[2], model);
if (maybe_expand_insn (icode, 3, ops))
! return;
}
/* A store of 0 is the same as __sync_lock_release, try that. */
--- 7081,7087 ----
create_input_operand (&ops[1], val, mode);
create_integer_operand (&ops[2], model);
if (maybe_expand_insn (icode, 3, ops))
! return const0_rtx;
}
/* A store of 0 is the same as __sync_lock_release, try that. */
*************** expand_sync_mem_store (rtx mem, rtx val,
*** 7086,7095 ****
/* lock_release is only a release barrier. */
if (model == MEMMODEL_SEQ_CST)
expand_builtin_mem_thread_fence (model);
! return;
}
}
}
/* If there is no mem_store, default to a move with barriers */
if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
expand_builtin_mem_thread_fence (model);
--- 7097,7119 ----
/* lock_release is only a release barrier. */
if (model == MEMMODEL_SEQ_CST)
expand_builtin_mem_thread_fence (model);
! return const0_rtx;
}
}
}
+
+ /* If the size of the object is greater than word size on this target,
+ a default store will not be atomic, Try a mem_exchange and throw away
+ the result. If that doesn't work, don't do anything. */
+ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
+ {
+ rtx target = expand_sync_mem_exchange (NULL_RTX, mem, val, model);
+ if (target)
+ return const0_rtx;
+ else
+ return NULL_RTX;
+ }
+
/* If there is no mem_store, default to a move with barriers */
if (model == MEMMODEL_SEQ_CST || model == MEMMODEL_RELEASE)
expand_builtin_mem_thread_fence (model);
*************** expand_sync_mem_store (rtx mem, rtx val,
*** 7099,7104 ****
--- 7123,7130 ----
/* For SEQ_CST, also emit a barrier after the load. */
if (model == MEMMODEL_SEQ_CST)
expand_builtin_mem_thread_fence (model);
+
+ return const0_rtx;
}
Index: builtins.c
===================================================================
*** builtins.c (revision 178916)
--- builtins.c (working copy)
*************** expand_builtin_sync_mem_load (enum machi
*** 5340,5346 ****
EXP is the CALL_EXPR.
TARGET is an optional place for us to store the results. */
! static void
expand_builtin_sync_mem_store (enum machine_mode mode, tree exp)
{
rtx mem, val;
--- 5340,5346 ----
EXP is the CALL_EXPR.
TARGET is an optional place for us to store the results. */
! static rtx
expand_builtin_sync_mem_store (enum machine_mode mode, tree exp)
{
rtx mem, val;
*************** expand_builtin_sync_mem_store (enum mach
*** 5352,5365 ****
&& model != MEMMODEL_RELEASE)
{
error ("invalid memory model for %<__sync_mem_store%>");
! return;
}
/* Expand the operands. */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! expand_sync_mem_store (mem, val, model);
}
/* Expand the __sync_mem_fetch_XXX intrinsic:
--- 5352,5365 ----
&& model != MEMMODEL_RELEASE)
{
error ("invalid memory model for %<__sync_mem_store%>");
! return NULL_RTX;
}
/* Expand the operands. */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
! return expand_sync_mem_store (mem, val, model);
}
/* Expand the __sync_mem_fetch_XXX intrinsic:
*************** expand_builtin (tree exp, rtx target, rt
*** 6289,6296 ****
case BUILT_IN_SYNC_MEM_STORE_8:
case BUILT_IN_SYNC_MEM_STORE_16:
mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_MEM_STORE_1);
! expand_builtin_sync_mem_store (mode, exp);
! return const0_rtx;
case BUILT_IN_SYNC_MEM_ADD_FETCH_1:
case BUILT_IN_SYNC_MEM_ADD_FETCH_2:
--- 6289,6298 ----
case BUILT_IN_SYNC_MEM_STORE_8:
case BUILT_IN_SYNC_MEM_STORE_16:
mode = get_builtin_sync_mode (fcode - BUILT_IN_SYNC_MEM_STORE_1);
! target = expand_builtin_sync_mem_store (mode, exp);
! if (target)
! return const0_rtx;
! break;
case BUILT_IN_SYNC_MEM_ADD_FETCH_1:
case BUILT_IN_SYNC_MEM_ADD_FETCH_2:
Index: testsuite/gcc.dg/memmodel/sync-other-int128.c
===================================================================
*** testsuite/gcc.dg/memmodel/sync-other-int128.c (revision 178916)
--- testsuite/gcc.dg/memmodel/sync-other-int128.c (working copy)
***************
*** 1,10 ****
/* { dg-do link } */
-
/* { dg-require-effective-target sync_int_128 } */
/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
!
! /* { dg-final { memmodel-gdb-test { xfail *-*-* } } } */
!
#include <stdio.h>
#include "memmodel.h"
--- 1,7 ----
/* { dg-do link } */
/* { dg-require-effective-target sync_int_128 } */
/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
! /* { dg-final { memmodel-gdb-test } } */
#include <stdio.h>
#include "memmodel.h"
Index: expr.h
===================================================================
*** expr.h (revision 178916)
--- expr.h (working copy)
*************** rtx expand_sync_mem_exchange (rtx, rtx,
*** 221,227 ****
rtx expand_sync_mem_compare_exchange (rtx, rtx, rtx, rtx, enum memmodel,
enum memmodel);
rtx expand_sync_mem_load (rtx, rtx, enum memmodel);
! void expand_sync_mem_store (rtx, rtx, enum memmodel);
rtx expand_sync_mem_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel,
bool);
void expand_sync_mem_thread_fence (enum memmodel);
--- 221,227 ----
rtx expand_sync_mem_compare_exchange (rtx, rtx, rtx, rtx, enum memmodel,
enum memmodel);
rtx expand_sync_mem_load (rtx, rtx, enum memmodel);
! rtx expand_sync_mem_store (rtx, rtx, enum memmodel);
rtx expand_sync_mem_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel,
bool);
void expand_sync_mem_thread_fence (enum memmodel);