As recently discussed in the previous thread for PR 80640, some targets have
sufficiently strong hardware memory ordering that implementation of C11 atomic
fences might not need machine barriers.  However, a compiler memory barrier
nevertheless must be present, and at least two targets (x86, s390) got this
wrong in their .md expanders.

Handle it in the middle-end by detecting empty expansion and emitting a
compiler memory barrier.  If the backend produced non-empty RTL, expect that
it's using the same RTX structure as in "memory_barrier" (__sync_synchronize)
expansion, which must be a compiler memory on its own.

A followup refactor is possible in expand_mem_thread_fence to make it start with

  if (is_mm_relaxed (model))
    return;

as it's not useful to pass __ATOMIC_RELAXED model to gen_mem_thread_fence.

        PR target/80640
        * doc/md.texi (mem_thread_fence): Remove mention of mode.  Clarify that
        empty expansion is handled by placing a compiler barrier.
        * optabs.c (expand_mem_thread_fence): Place a compiler barrier if
        targetm.gen_mem_thread_fence produced no instructions.
testsuite/
        * gcc.dg/atomic/pr80640.c: New testcase.

---
 gcc/doc/md.texi                       |  9 +++++++--
 gcc/optabs.c                          |  7 ++++++-
 gcc/testsuite/gcc.dg/atomic/pr80640.c | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/atomic/pr80640.c

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index ea959208c98..0be161a08dc 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7044,11 +7044,16 @@ If these patterns are not defined, attempts will be 
made to use
 counterparts.  If none of these are available a compare-and-swap
 loop will be used.
 
-@cindex @code{mem_thread_fence@var{mode}} instruction pattern
-@item @samp{mem_thread_fence@var{mode}}
+@cindex @code{mem_thread_fence} instruction pattern
+@item @samp{mem_thread_fence}
 This pattern emits code required to implement a thread fence with
 memory model semantics.  Operand 0 is the memory model to be used.
 
+Expanding this pattern may produce no instructions if no machine barrier
+is required on the target for given memory model.  In that case, unless
+memory model is @code{__ATOMIC_RELAXED}, a compiler memory barrier is
+emitted automatically.
+
 If this pattern is not specified, all memory models except
 @code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
 barrier pattern.
diff --git a/gcc/optabs.c b/gcc/optabs.c
index a9900657a58..d568ca376fe 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6298,7 +6298,12 @@ void
 expand_mem_thread_fence (enum memmodel model)
 {
   if (targetm.have_mem_thread_fence ())
-    emit_insn (targetm.gen_mem_thread_fence (GEN_INT (model)));
+    {
+      rtx_insn *last = get_last_insn ();
+      emit_insn (targetm.gen_mem_thread_fence (GEN_INT (model)));
+      if (last == get_last_insn () && !is_mm_relaxed (model))
+       expand_asm_memory_barrier ();
+    }
   else if (!is_mm_relaxed (model))
     {
       if (targetm.have_memory_barrier ())
diff --git a/gcc/testsuite/gcc.dg/atomic/pr80640.c 
b/gcc/testsuite/gcc.dg/atomic/pr80640.c
new file mode 100644
index 00000000000..fd17978a482
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/atomic/pr80640.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-pthread" } */
+/* { dg-require-effective-target pthread } */
+
+#include <pthread.h>
+
+static volatile int sem1;
+static volatile int sem2;
+
+static void *f(void *va)
+{
+  void **p = va;
+  if (*p) return *p;
+  sem1 = 1;
+  while (!sem2);
+  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+  // GCC used to RTL-CSE this and the first load, causing 0 to be returned
+  return *p;
+}
+
+int main()
+{
+  void *p = 0;
+  pthread_t thr;
+  if (pthread_create(&thr, 0, f, &p))
+    return 2;
+  while (!sem1);
+  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+  p = &p;
+  __atomic_thread_fence(__ATOMIC_RELEASE);
+  sem2 = 1;
+  pthread_join(thr, &p);
+  return !p;
+}
-- 
2.13.3

Reply via email to