Again, like in patch 1/3, a backend might expand C11 atomic load/store to a volatile memory access if hardware memory ordering is sufficiently strong that no machine barrier is required. Nevertheless, we must ensure that compiler memory barrier(s) are present before/after the access to prevent wrong code transformations. Simply place them as appropriate in optabs.c expansion code.
There's ALIAS_SET_MEMORY_BARRIER placed on MEMs accessed via atomics, but it's probably not useful, it's very hard to audit all RTL passes and ensure they respect it. On the other hand, asm barriers must work or else much of real-world code will break. PR rtl-optimization/57448 PR target/67458 PR target/81316 * optabs.c (expand_atomic_load): Place compiler memory barriers if using atomic_load pattern. (expand_atomic_store): Likewise. testsuite/ * gcc.dg/atomic/pr80640-2.c: New testcase. * gcc.dg/atomic/pr81316.c: New testcase. --- gcc/optabs.c | 20 ++++++++++++++++++-- gcc/testsuite/gcc.dg/atomic/pr80640-2.c | 32 ++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/atomic/pr81316.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/atomic/pr80640-2.c create mode 100644 gcc/testsuite/gcc.dg/atomic/pr81316.c diff --git a/gcc/optabs.c b/gcc/optabs.c index 6884fd4b8aa..11977d6d53f 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6343,12 +6343,20 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model) if (icode != CODE_FOR_nothing) { struct expand_operand ops[3]; + rtx_insn *last = get_last_insn (); + if (is_mm_seq_cst (model)) + expand_asm_memory_barrier (); create_output_operand (&ops[0], target, mode); create_fixed_operand (&ops[1], mem); create_integer_operand (&ops[2], model); if (maybe_expand_insn (icode, 3, ops)) - return ops[0].value; + { + if (!is_mm_relaxed (model)) + expand_asm_memory_barrier (); + return ops[0].value; + } + delete_insns_since (last); } /* If the size of the object is greater than word size on this target, @@ -6393,11 +6401,19 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) icode = direct_optab_handler (atomic_store_optab, mode); if (icode != CODE_FOR_nothing) { + rtx_insn *last = get_last_insn (); + if (!is_mm_relaxed (model)) + expand_asm_memory_barrier (); create_fixed_operand (&ops[0], mem); create_input_operand (&ops[1], val, mode); create_integer_operand (&ops[2], model); if (maybe_expand_insn (icode, 3, ops)) - return const0_rtx; + { + if (is_mm_seq_cst (model)) + expand_asm_memory_barrier (); + return const0_rtx; + } + delete_insns_since (last); } /* If using __sync_lock_release is a viable alternative, try it. diff --git a/gcc/testsuite/gcc.dg/atomic/pr80640-2.c b/gcc/testsuite/gcc.dg/atomic/pr80640-2.c new file mode 100644 index 00000000000..a735054090d --- /dev/null +++ b/gcc/testsuite/gcc.dg/atomic/pr80640-2.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-options "-pthread" } */ +/* { dg-require-effective-target pthread } */ + +#include <pthread.h> + +static volatile int sem1; +static _Atomic int sem2; + +static void *f(void *va) +{ + void **p = va; + if (*p) return *p; + sem1 = 1; + while (!__atomic_load_n(&sem2, __ATOMIC_ACQUIRE)); + // GCC used to RTL-CSE this and the first load, causing 0 to be returned + return *p; +} + +int main() +{ + void *p = 0; + pthread_t thr; + if (pthread_create(&thr, 0, f, &p)) + return 2; + while (!sem1); + __atomic_thread_fence(__ATOMIC_ACQUIRE); + p = &p; + __atomic_store_n(&sem2, 1, __ATOMIC_RELEASE); + pthread_join(thr, &p); + return !p; +} diff --git a/gcc/testsuite/gcc.dg/atomic/pr81316.c b/gcc/testsuite/gcc.dg/atomic/pr81316.c new file mode 100644 index 00000000000..ef10095718e --- /dev/null +++ b/gcc/testsuite/gcc.dg/atomic/pr81316.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-pthread" } */ +/* { dg-require-effective-target pthread } */ + +#include <pthread.h> +#include <stdlib.h> + +static _Atomic int sem1; + +static void *f(void *va) +{ + void **p = va; + while (!__atomic_load_n(&sem1, __ATOMIC_ACQUIRE)); + exit(!*p); +} + +int main(int argc) +{ + void *p = 0; + pthread_t thr; + if (pthread_create(&thr, 0, f, &p)) + return 2; + // GCC used to RTL-DSE this store + p = &p; + __atomic_store_n(&sem1, 1, __ATOMIC_RELEASE); + int r = -1; + while (r < 0) asm("":"+r"(r)); + return r; +} -- 2.13.3