On Tue, Sep 5, 2017 at 12:35 PM, Alexander Monakov <[email protected]> wrote:
> On Tue, 5 Sep 2017, Uros Bizjak wrote:
>> This patch allows to emit memory_blockage pattern instead of default
>> asm volatile as a memory blockage. This patch is needed, so targets
>> (e.g. x86) can define and emit more optimal memory blockage pseudo
>> insn.
>
> Optimal in what sense? What pattern do you intend to use on x86, and
> would any target be able to use the same?
You don't have to emit a generic asm-like pattern. This is the same
situation as with blockage insn, where targets can emit "blockage"
instead of generic asm insn.
x86 defines memory_blockage as:
(define_expand "memory_blockage"
[(set (match_dup 0)
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
""
{
operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
MEM_VOLATILE_P (operands[0]) = 1;
})
However, this definition can't be generic, since unspec is used.
>> And let's call scheduler memory barriers a "memory blockage"
>> pseudo insn, not "memory barrier" which should describe real
>> instruction.
>
> Note this is not about scheduling, but all RTL passes. This (pseudo-)
> instruction is meant to prevent all memory movement across it, including
> RTL CSE, RTL DSE, etc.
Yes, the above insn satisfies all mentioned use cases.
Oh, I noticed that I attach the wrong version of the patch. Correct
version attached.
Uros.
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 14aab9474bc2..df4dc8ccd0e1 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6734,6 +6734,13 @@ scheduler and other passes from moving instructions and
using register
equivalences across the boundary defined by the blockage insn.
This needs to be an UNSPEC_VOLATILE pattern or a volatile ASM.
+@cindex @code{memmory_blockage} instruction pattern
+@item @samp{memory_blockage}
+This pattern defines a pseudo insn that prevents the instruction
+scheduler and other passes from moving instructions accessing memory
+across the boundary defined by the blockage insn. This instruction
+needs to read and write volatile BLKmode memory.
+
@cindex @code{memory_barrier} instruction pattern
@item @samp{memory_barrier}
If the target memory model is not fully synchronous, then this pattern
diff --git a/gcc/optabs.c b/gcc/optabs.c
index b65707080eee..5821d2a9547c 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6276,10 +6276,10 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx
*ptarget_oval,
return true;
}
-/* Generate asm volatile("" : : : "memory") as the memory barrier. */
+/* Generate asm volatile("" : : : "memory") as the memory blockage. */
static void
-expand_asm_memory_barrier (void)
+expand_asm_memory_blockage (void)
{
rtx asm_op, clob;
@@ -6295,6 +6295,21 @@ expand_asm_memory_barrier (void)
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob)));
}
+#ifndef HAVE_memory_blockage
+#define HAVE_memory_blockage 0
+#endif
+
+/* Do not schedule instructions accessing memory across this point. */
+
+static void
+expand_memory_blockage (void)
+{
+ if (HAVE_memory_blockage)
+ emit_insn (gen_memory_blockage ());
+ else
+ expand_asm_memory_blockage ();
+}
+
/* This routine will either emit the mem_thread_fence pattern or issue a
sync_synchronize to generate a fence for memory model MEMMODEL. */
@@ -6306,14 +6321,14 @@ expand_mem_thread_fence (enum memmodel model)
if (targetm.have_mem_thread_fence ())
{
emit_insn (targetm.gen_mem_thread_fence (GEN_INT (model)));
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
}
else if (targetm.have_memory_barrier ())
emit_insn (targetm.gen_memory_barrier ());
else if (synchronize_libfunc != NULL_RTX)
emit_library_call (synchronize_libfunc, LCT_NORMAL, VOIDmode);
else
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
}
/* Emit a signal fence with given memory model. */
@@ -6324,7 +6339,7 @@ expand_mem_signal_fence (enum memmodel model)
/* No machine barrier is required to implement a signal fence, but
a compiler memory barrier must be issued, except for relaxed MM. */
if (!is_mm_relaxed (model))
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
}
/* This function expands the atomic load operation:
@@ -6346,7 +6361,7 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel
model)
struct expand_operand ops[3];
rtx_insn *last = get_last_insn ();
if (is_mm_seq_cst (model))
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
create_output_operand (&ops[0], target, mode);
create_fixed_operand (&ops[1], mem);
@@ -6354,7 +6369,7 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel
model)
if (maybe_expand_insn (icode, 3, ops))
{
if (!is_mm_relaxed (model))
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
return ops[0].value;
}
delete_insns_since (last);
@@ -6404,14 +6419,14 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel
model, bool use_release)
{
rtx_insn *last = get_last_insn ();
if (!is_mm_relaxed (model))
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
create_fixed_operand (&ops[0], mem);
create_input_operand (&ops[1], val, mode);
create_integer_operand (&ops[2], model);
if (maybe_expand_insn (icode, 3, ops))
{
if (is_mm_seq_cst (model))
- expand_asm_memory_barrier ();
+ expand_memory_blockage ();
return const0_rtx;
}
delete_insns_since (last);