[PATCH] RISC-V: Enable overlap-by-pieces via tune param

2021-07-21 Thread Christoph Muellner via Gcc-patches
This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which allows to enable the overlap_op_by_pieces
feature of the by-pieces infrastructure.

gcc/ChangeLog:

* config/riscv/riscv.c (struct riscv_tune_param): New field.
(riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.

Signed-off-by: Christoph Muellner 
---
 gcc/config/riscv/riscv.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..824e930ef05 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -220,6 +220,7 @@ struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  bool overlap_op_by_pieces;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   3,   /* branch_cost */
   5,   /* memory_cost */
   true,/* 
slow_unaligned_access */
+  false,   /* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   4,   /* branch_cost */
   3,   /* memory_cost */
   true,/* 
slow_unaligned_access */
+  false,   /* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   3,/* branch_cost */
   5,/* memory_cost */
   false,/* slow_unaligned_access */
+  false,   /* overlap_op_by_pieces */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -324,6 +328,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   1,   /* branch_cost */
   2,   /* memory_cost */
   false,   /* slow_unaligned_access */
+  false,   /* overlap_op_by_pieces */
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
-- 
2.31.1



[PATCH] RISC-V: Enable overlap-by-pieces in case of fast unaliged access

2021-07-22 Thread Christoph Muellner via Gcc-patches
This patch enables the overlap-by-pieces feature of the by-pieces
infrastructure for inlining builtins in case the target has set
riscv_slow_unaligned_access_p to false.

To demonstrate the effect for targets with fast unaligned access,
the following code sequences are generated for a 15-byte memset-zero.

Without overlap_op_by_pieces we get:
  8e:   00053023sd  zero,0(a0)
  92:   00052423sw  zero,8(a0)
  96:   00051623sh  zero,12(a0)
  9a:   00050723sb  zero,14(a0)

With overlap_op_by_pieces we get:
  7e:   00053023sd  zero,0(a0)
  82:   000533a3sd  zero,7(a0)

gcc/ChangeLog:

* config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.

Signed-off-by: Christoph Muellner 
---
 gcc/config/riscv/riscv.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..98c76ba657a 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return !riscv_slow_unaligned_access_p;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
-- 
2.31.1



[PATCH v2] RISC-V: Enable overlap-by-pieces in case of fast unaliged access

2021-07-22 Thread Christoph Muellner via Gcc-patches
This patch enables the overlap-by-pieces feature of the by-pieces
infrastructure for inlining builtins in case the target has set
riscv_slow_unaligned_access_p to false.

An example to demonstrate the effect for targets with fast unaligned
access (target's that have slow_unaligned_access set to false) is
the code that is generated for "memset (p, 0, 15);", where the
alignment of p is unknown:

  Without overlap_op_by_pieces we get:
8e:   00053023sd  zero,0(a0)
92:   00052423sw  zero,8(a0)
96:   00051623sh  zero,12(a0)
9a:   00050723sb  zero,14(a0)

  With overlap_op_by_pieces we get:
7e:   00053023sd  zero,0(a0)
82:   000533a3sd  zero,7(a0)

gcc/ChangeLog:

* config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/builtins-overlap-1.c: New test.
* gcc.target/riscv/builtins-overlap-2.c: New test.
* gcc.target/riscv/builtins-overlap-3.c: New test.
* gcc.target/riscv/builtins-overlap-4.c: New test.
* gcc.target/riscv/builtins-overlap-5.c: New test.
* gcc.target/riscv/builtins-overlap-6.c: New test.
* gcc.target/riscv/builtins-overlap-7.c: New test.
* gcc.target/riscv/builtins-overlap-8.c: New test.
* gcc.target/riscv/builtins-strict-align.c: New test.
* gcc.target/riscv/builtins.h: New test.

Signed-off-by: Christoph Muellner 
---
 gcc/config/riscv/riscv.c | 11 +++
 .../gcc.target/riscv/builtins-overlap-1.c| 10 ++
 .../gcc.target/riscv/builtins-overlap-2.c| 10 ++
 .../gcc.target/riscv/builtins-overlap-3.c| 10 ++
 .../gcc.target/riscv/builtins-overlap-4.c| 10 ++
 .../gcc.target/riscv/builtins-overlap-5.c| 11 +++
 .../gcc.target/riscv/builtins-overlap-6.c| 13 +
 .../gcc.target/riscv/builtins-overlap-7.c| 11 +++
 .../gcc.target/riscv/builtins-overlap-8.c| 11 +++
 .../gcc.target/riscv/builtins-strict-align.c | 10 ++
 gcc/testsuite/gcc.target/riscv/builtins.h| 16 
 11 files changed, 123 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-overlap-8.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..98c76ba657a 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return !riscv_slow_unaligned_access_p;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
new file mode 100644
index 000..ca51fff0fc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-1.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(7)
+
+/* { dg-final { scan-assembler-times "sw\tzero,0"  1 } } */
+/* { dg-final { scan-assembler-times "sw\tzero,3"  1 } } */
+/* { dg-final { scan-assembler-not   "sb" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c 
b/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
new file mode 100644
index 000..24b5b254658
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/builtins-overlap-2.c
@@ -0,0 +1,10 @@
+/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
+/* { dg-do compile } */
+
+#include "builtins.h"
+
+DO_MEMSET0_N(11)
+
+/* { dg-final { scan-assemb

[PATCH] RISC-V: Allow unaligned accesses in cpymemsi expansion

2021-07-29 Thread Christoph Muellner via Gcc-patches
The RISC-V cpymemsi expansion is called, whenever the by-pieces
infrastructure will not be taking care of the builtin expansion.
Currently, that's the case for e.g. memcpy() with n <= 24 bytes.
The code emitted by the by-pieces infrastructure emits code, that
performs unaligned accesses if the target's
riscv_slow_unaligned_access_p is false (and n is not 1).

If n > 24, then the RISC-V cpymemsi expansion is called, which is
implemented in riscv_expand_block_move(). The current implementation
does not check riscv_slow_unaligned_access_p and never emits unaligned
accesses.

Since by-pieces emits unaligned accesses, it is reasonable to implement
the same behaviour in the cpymemsi expansion. And that's what this patch
is doing.

The patch checks riscv_slow_unaligned_access_p at the entry and sets
the allowed alignment accordingly. This alignment is then propagated
down to the routines that emit the actual instructions.

Without the patch a memcpy() with n==25 will be exanded only
if the given pointers are aligned. With the patch also unaligned
pointers are accepted if riscv_slow_unaligned_access_p is false.

gcc/ChangeLog:

* config/riscv/riscv.c (riscv_block_move_straight): Add
parameter align.
(riscv_adjust_block_mem): Replace parameter length by parameter
align.
(riscv_block_move_loop): Add parameter align.
(riscv_expand_block_move): Set alignment properly if the target
has fast unaligned access.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/builtins-strict-align.c: New test.
* gcc.target/riscv/builtins-unaligned-1.c: New test.
* gcc.target/riscv/builtins-unaligned-2.c: New test.
* gcc.target/riscv/builtins-unaligned-3.c: New test.
* gcc.target/riscv/builtins-unaligned-4.c: New test.
* gcc.target/riscv/builtins.h: New test.

Signed-off-by: Christoph Muellner 
---
 gcc/config/riscv/riscv.c  | 53 +++
 .../gcc.target/riscv/builtins-strict-align.c  | 13 +
 .../gcc.target/riscv/builtins-unaligned-1.c   | 15 ++
 .../gcc.target/riscv/builtins-unaligned-2.c   | 15 ++
 .../gcc.target/riscv/builtins-unaligned-3.c   | 15 ++
 .../gcc.target/riscv/builtins-unaligned-4.c   | 15 ++
 gcc/testsuite/gcc.target/riscv/builtins.h | 10 
 7 files changed, 115 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..0596a9ff1b6 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3173,11 +3173,13 @@ riscv_legitimize_call_address (rtx addr)
   return addr;
 }
 
-/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST
+   with accesses that are ALIGN bytes aligned.
Assume that the areas do not overlap.  */
 
 static void
-riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
+riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
+  unsigned HOST_WIDE_INT align)
 {
   unsigned HOST_WIDE_INT offset, delta;
   unsigned HOST_WIDE_INT bits;
@@ -3185,8 +3187,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length)
   enum machine_mode mode;
   rtx *regs;
 
-  bits = MAX (BITS_PER_UNIT,
- MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest;
+  bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align));
 
   mode = mode_for_size (bits, MODE_INT, 0).require ();
   delta = bits / BITS_PER_UNIT;
@@ -3211,21 +3212,20 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length)
 {
   src = adjust_address (src, BLKmode, offset);
   dest = adjust_address (dest, BLKmode, offset);
-  move_by_pieces (dest, src, length - offset,
- MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
+  move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN);
 }
 }
 
 /* Helper function for doing a loop-based block operation on memory
-   reference MEM.  Each iteration of the loop will operate on LENGTH
-   bytes of MEM.
+   reference MEM.
 
Create a new base register for use within the loop and point it to
the start of MEM.  Create a new memory reference that uses this
-   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+   register and has an alignment of ALIGN.  Store them in *LOOP_REG
+   and *LOOP_MEM respectively.  */
 
 static void
-riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
+riscv_a

[PATCH 1/2] REE: PR rtl-optimization/100264: Handle more PARALLEL SET expressions

2021-04-26 Thread Christoph Muellner via Gcc-patches
[ree] PR rtl-optimization/100264: Handle more PARALLEL SET expressions

PR rtl-optimization/100264
* ree.c (get_sub_rtx): Ignore SET expressions without register
destinations.
(merge_def_and_ext): Eliminate destination check for register
as such SET expressions can't occur anymore.
(combine_reaching_defs): Likewise.
---
 gcc/ree.c | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/ree.c b/gcc/ree.c
index 65457c582c6a..1eaaaf9e1eb5 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -658,10 +658,11 @@ make_defs_and_copies_lists (rtx_insn *extend_insn, 
const_rtx set_pat,
   return ret;
 }
 
-/* If DEF_INSN has single SET expression, possibly buried inside
-   a PARALLEL, return the address of the SET expression, else
-   return NULL.  This is similar to single_set, except that
-   single_set allows multiple SETs when all but one is dead.  */
+/* If DEF_INSN has single SET expression with a register
+   destination, possibly buried inside a PARALLEL, return
+   the address of the SET expression, else return NULL.
+   This is similar to single_set, except that single_set
+   allows multiple SETs when all but one is dead.  */
 static rtx *
 get_sub_rtx (rtx_insn *def_insn)
 {
@@ -675,6 +676,8 @@ get_sub_rtx (rtx_insn *def_insn)
   rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
   if (GET_CODE (s_expr) != SET)
 continue;
+ if (!REG_P (SET_DEST (s_expr)))
+   continue;
 
   if (sub_rtx == NULL)
 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
@@ -686,7 +689,13 @@ get_sub_rtx (rtx_insn *def_insn)
 }
 }
   else if (code == SET)
-sub_rtx = &PATTERN (def_insn);
+{
+   rtx s_expr = PATTERN (def_insn);
+   if (!REG_P (SET_DEST (s_expr)))
+ return NULL;
+
+   sub_rtx = &PATTERN (def_insn);
+}
   else
 {
   /* It is not a PARALLEL or a SET, what could it be ? */
@@ -712,13 +721,12 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, 
ext_state *state)
   if (sub_rtx == NULL)
 return false;
 
-  if (REG_P (SET_DEST (*sub_rtx))
-  && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
+  if (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
  || ((state->modified[INSN_UID (def_insn)].kind
   == (cand->code == ZERO_EXTEND
   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
  && state->modified[INSN_UID (def_insn)].mode
-== ext_src_mode)))
+== ext_src_mode))
 {
   if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
  >= GET_MODE_UNIT_SIZE (cand->mode))
@@ -853,8 +861,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
 CAND->insn, then this transformation is not safe.  Note we have
 to test in the widened mode.  */
   rtx *dest_sub_rtx = get_sub_rtx (def_insn);
-  if (dest_sub_rtx == NULL
- || !REG_P (SET_DEST (*dest_sub_rtx)))
+  if (dest_sub_rtx == NULL)
return false;
 
   rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (set)),
@@ -947,8 +954,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
break;
 
  rtx *dest_sub_rtx2 = get_sub_rtx (def_insn2);
- if (dest_sub_rtx2 == NULL
- || !REG_P (SET_DEST (*dest_sub_rtx2)))
+ if (dest_sub_rtx2 == NULL)
break;
 
  /* On RISC machines we must make sure that changing the mode of
-- 
2.31.1



[PATCH 00/10] [RISC-V] Atomics improvements [PR100265/PR100266]

2021-04-26 Thread Christoph Muellner via Gcc-patches
This series provides a cleanup of the current atomics implementation
of RISC-V:

* PR100265: Use proper fences for atomic load/store
* PR100266: Provide programmatic implementation of CAS

As both are very related, I merged the patches into one series
(to avoid merge issues if one overtake the other).

The first patch could be squashed into the following patches,
but I found it easier to understand the changes with it in place.

The series has been tested as follows:
* Building and testing a multilib RV32/64 toolchain
  (bootstrapped with riscv-gnu-toolchain repo)
* Manual review of generated sequences for GCC's atomic builtins API

The second part of the series (the re-implementation of CAS) benefits
from a REE improvement (see PR100264):
  https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568680.html
If this patch is not in place, then an additional s.ext instruction
is emitted after the SC.W (in case of RV64 and CAS for uint32_t).

Christoph Muellner (10):
  RISC-V: Simplify memory model code [PR 100265]
  RISC-V: Emit proper memory ordering suffixes for AMOs [PR 100265]
  RISC-V: Eliminate %F specifier from riscv_print_operand() [PR 100265]
  RISC-V: Don't use amoswap for atomic stores [PR 100265]
  RISC-V: Emit fences according to chosen memory model [PR 100265]
  RISC-V: Implement atomic_{load,store} [PR 100265]
  RISC-V: Model INSNs for LR and SC [PR 100266]
  RISC-V: Add s.ext-consuming INSNs for LR and SC [PR 100266]
  RISC-V: Generate helpers for cbranch4 [PR 100266]
  RISC-V: Provide programmatic implementation of CAS [PR 100266]

 gcc/config/riscv/riscv-protos.h |   1 +
 gcc/config/riscv/riscv.c| 134 +-
 gcc/config/riscv/riscv.md   |   2 +-
 gcc/config/riscv/sync.md| 190 ++--
 4 files changed, 215 insertions(+), 112 deletions(-)

-- 
2.31.1



[PATCH 02/10] RISC-V: Emit proper memory ordering suffixes for AMOs [PR 100265]

2021-04-26 Thread Christoph Muellner via Gcc-patches
The ratified A extension supports '.aq', '.rl' and '.aqrl' as
memory ordering suffixes. Let's emit them in case we get a '%A'
conversion specifier for riscv_print_operand().

As '%A' was already used for a similar, but restricted, purpose
(only '.aq' was emitted so far), this does not require any other
changes.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_amo_acquire):
  Remove function.
* config/riscv/riscv.c (riscv_print_amo_memory_ordering_suffix):
  Add function to emit AMO memory ordering suffixes.
* config/riscv/riscv.c (riscv_print_operand): Call
  riscv_print_amo_memory_ordering_suffix() instead of
  riscv_memmodel_needs_amo_acquire().
---
 gcc/config/riscv/riscv.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 9b5aedc77131..881eab66a481 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3341,24 +3341,26 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
   fputc (')', file);
 }
 
-/* Return true if the .AQ suffix should be added to an AMO to implement the
-   acquire portion of memory model MODEL.  */
+/* Print the memory ordering suffix for AMOs.  */
 
-static bool
-riscv_memmodel_needs_amo_acquire (const enum memmodel model)
+static void
+riscv_print_amo_memory_ordering_suffix (FILE *file, const enum memmodel model)
 {
   switch (model)
 {
-  case MEMMODEL_ACQ_REL:
-  case MEMMODEL_SEQ_CST:
-  case MEMMODEL_ACQUIRE:
+  case MEMMODEL_RELAXED:
+   break;
   case MEMMODEL_CONSUME:
-   return true;
-
+  case MEMMODEL_ACQUIRE:
+   fputs (".aq", file);
+   break;
   case MEMMODEL_RELEASE:
-  case MEMMODEL_RELAXED:
-   return false;
-
+   fputs (".rl", file);
+   break;
+  case MEMMODEL_ACQ_REL:
+  case MEMMODEL_SEQ_CST:
+   fputs (".aqrl", file);
+   break;
   default:
gcc_unreachable ();
 }
@@ -3423,8 +3425,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire (model))
-   fputs (".aq", file);
+  riscv_print_amo_memory_ordering_suffix (file, model);
   break;
 
 case 'F':
-- 
2.31.1



[PATCH 03/10] RISC-V: Eliminate %F specifier from riscv_print_operand() [PR 100265]

2021-04-26 Thread Christoph Muellner via Gcc-patches
A previous patch took care, that the proper memory ordering suffixes
for AMOs are emitted. Therefore there is no reason to keep the fence
generation mechanism for release operations.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_release_fence):
  Remove function.
* config/riscv/riscv.c (riscv_print_operand): Remove
  %F format specifier.
* config/riscv/sync.md: Remove %F format specifier uses.
---
 gcc/config/riscv/riscv.c | 29 -
 gcc/config/riscv/sync.md | 16 
 2 files changed, 8 insertions(+), 37 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 881eab66a481..87cdde73ae21 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3366,29 +3366,6 @@ riscv_print_amo_memory_ordering_suffix (FILE *file, 
const enum memmodel model)
 }
 }
 
-/* Return true if a FENCE should be emitted to before a memory access to
-   implement the release portion of memory model MODEL.  */
-
-static bool
-riscv_memmodel_needs_release_fence (const enum memmodel model)
-{
-  switch (model)
-{
-  case MEMMODEL_ACQ_REL:
-  case MEMMODEL_SEQ_CST:
-  case MEMMODEL_RELEASE:
-   return true;
-
-  case MEMMODEL_ACQUIRE:
-  case MEMMODEL_CONSUME:
-  case MEMMODEL_RELAXED:
-   return false;
-
-  default:
-   gcc_unreachable ();
-}
-}
-
 /* Implement TARGET_PRINT_OPERAND.  The RISCV-specific operand codes are:
 
'h' Print the high-part relocation associated with OP, after stripping
@@ -3396,7 +3373,6 @@ riscv_memmodel_needs_release_fence (const enum memmodel 
model)
'R' Print the low-part relocation associated with OP.
'C' Print the integer branch condition for comparison OP.
'A' Print the atomic operation suffix for memory model OP.
-   'F' Print a FENCE if the memory model requires a release.
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.  */
 
@@ -3428,11 +3404,6 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   riscv_print_amo_memory_ordering_suffix (file, model);
   break;
 
-case 'F':
-  if (riscv_memmodel_needs_release_fence (model))
-   fputs ("fence iorw,ow; ", file);
-  break;
-
 case 'i':
   if (code != REG)
 fputs ("i", file);
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 747a799e2377..aeeb2e854b68 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -65,7 +65,7 @@
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_ATOMIC_STORE))]
   "TARGET_ATOMIC"
-  "%F2amoswap.%A2 zero,%z1,%0"
+  "amoswap.%A2 zero,%z1,%0"
   [(set (attr "length") (const_int 8))])
 
 (define_insn "atomic_"
@@ -76,8 +76,8 @@
   (match_operand:SI 2 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F2amo.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A2 zero,%z1,%0"
+)
 
 (define_insn "atomic_fetch_"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -89,8 +89,8 @@
   (match_operand:SI 3 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F3amo.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A3 %0,%z2,%1"
+)
 
 (define_insn "atomic_exchange"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -101,8 +101,8 @@
(set (match_dup 1)
(match_operand:GPR 2 "register_operand" "0"))]
   "TARGET_ATOMIC"
-  "%F3amoswap.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amoswap.%A3 %0,%z2,%1"
+)
 
 (define_insn "atomic_cas_value_strong"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -115,7 +115,7 @@
 UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 6 "=&r"))]
   "TARGET_ATOMIC"
-  "%F5 1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 
1:"
+  "1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 1:"
   [(set (attr "length") (const_int 20))])
 
 (define_expand "atomic_compare_and_swap"
-- 
2.31.1



[PATCH 01/10] RISC-V: Simplify memory model code [PR 100265]

2021-04-26 Thread Christoph Muellner via Gcc-patches
We don't have any special treatment of MEMMODEL_SYNC_* values,
so let's hide them behind the memmodel_base() function.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_amo_acquire):
  Ignore MEMMODEL_SYNC_* values.
* config/riscv/riscv.c (riscv_memmodel_needs_release_fence):
  Likewise.
* config/riscv/riscv.c (riscv_print_operand): Eliminate
  MEMMODEL_SYNC_* values by calling memmodel_base().
---
 gcc/config/riscv/riscv.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 17cdf705c328..9b5aedc77131 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3345,20 +3345,17 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
acquire portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_amo_acquire (enum memmodel model)
+riscv_memmodel_needs_amo_acquire (const enum memmodel model)
 {
   switch (model)
 {
   case MEMMODEL_ACQ_REL:
   case MEMMODEL_SEQ_CST:
-  case MEMMODEL_SYNC_SEQ_CST:
   case MEMMODEL_ACQUIRE:
   case MEMMODEL_CONSUME:
-  case MEMMODEL_SYNC_ACQUIRE:
return true;
 
   case MEMMODEL_RELEASE:
-  case MEMMODEL_SYNC_RELEASE:
   case MEMMODEL_RELAXED:
return false;
 
@@ -3371,20 +3368,17 @@ riscv_memmodel_needs_amo_acquire (enum memmodel model)
implement the release portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_release_fence (enum memmodel model)
+riscv_memmodel_needs_release_fence (const enum memmodel model)
 {
   switch (model)
 {
   case MEMMODEL_ACQ_REL:
   case MEMMODEL_SEQ_CST:
-  case MEMMODEL_SYNC_SEQ_CST:
   case MEMMODEL_RELEASE:
-  case MEMMODEL_SYNC_RELEASE:
return true;
 
   case MEMMODEL_ACQUIRE:
   case MEMMODEL_CONSUME:
-  case MEMMODEL_SYNC_ACQUIRE:
   case MEMMODEL_RELAXED:
return false;
 
@@ -3409,6 +3403,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
 {
   machine_mode mode = GET_MODE (op);
   enum rtx_code code = GET_CODE (op);
+  const enum memmodel model = memmodel_base (INTVAL (op));
 
   switch (letter)
 {
@@ -3428,12 +3423,12 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_amo_acquire (model))
fputs (".aq", file);
   break;
 
 case 'F':
-  if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_release_fence (model))
fputs ("fence iorw,ow; ", file);
   break;
 
-- 
2.31.1



[PATCH 04/10] RISC-V: Don't use amoswap for atomic stores [PR 100265]

2021-04-26 Thread Christoph Muellner via Gcc-patches
Using amoswap as atomic store is not an expected optimization
and most likely causes a performance penalty.
Neither SW nor HW have a benefit from this optimization,
so let's simply drop it.

gcc/
PR 100265
* config/riscv/sync.md (atomic_store):
  Remove.
---
 gcc/config/riscv/sync.md | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index aeeb2e854b68..efd49745a8e2 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -57,17 +57,6 @@
 
 ;; Atomic memory operations.
 
-;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
-(define_insn "atomic_store"
-  [(set (match_operand:GPR 0 "memory_operand" "=A")
-(unspec_volatile:GPR
-  [(match_operand:GPR 1 "reg_or_0_operand" "rJ")
-   (match_operand:SI 2 "const_int_operand")]  ;; model
-  UNSPEC_ATOMIC_STORE))]
-  "TARGET_ATOMIC"
-  "amoswap.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
-
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH 05/10] RISC-V: Emit fences according to chosen memory model [PR 100265]

2021-04-26 Thread Christoph Muellner via Gcc-patches
mem_thread_fence gets the desired memory model as operand.
Let's emit fences according to this value (as defined in section
"Code Porting and Mapping Guidelines" of the unpriv spec).

gcc/
PR 100265
* config/riscv/sync.md (mem_thread_fence):
  Emit fences according to given operand.
* config/riscv/sync.md (mem_fence):
  Add INSNs for different fence flavours.
* config/riscv/sync.md (mem_thread_fence_1):
  Remove.
---
 gcc/config/riscv/sync.md | 41 +++-
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index efd49745a8e2..406db1730b81 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -34,26 +34,41 @@
 ;; Memory barriers.
 
 (define_expand "mem_thread_fence"
-  [(match_operand:SI 0 "const_int_operand" "")] ;; model
+  [(match_operand:SI 0 "const_int_operand")] ;; model
   ""
 {
-  if (INTVAL (operands[0]) != MEMMODEL_RELAXED)
-{
-  rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (mem) = 1;
-  emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
-}
+  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
+  if (!(is_mm_relaxed (model)))
+  emit_insn (gen_mem_fence (operands[0]));
   DONE;
 })
 
-;; Until the RISC-V memory model (hence its mapping from C++) is finalized,
-;; conservatively emit a full FENCE.
-(define_insn "mem_thread_fence_1"
+(define_expand "mem_fence"
+  [(set (match_dup 1)
+   (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+   UNSPEC_MEMORY_BARRIER))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[1]) = 1;
+})
+
+(define_insn "*mem_fence"
   [(set (match_operand:BLK 0 "" "")
-   (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
-   (match_operand:SI 1 "const_int_operand" "")] ;; model
+   (unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+   UNSPEC_MEMORY_BARRIER))]
   ""
-  "fence\tiorw,iorw")
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[1]));
+  if (is_mm_consume (model) || is_mm_acquire (model))
+return "fence\tr, rw";
+  else if (is_mm_release (model))
+return "fence\trw, w";
+  else if (is_mm_acq_rel (model))
+return "fence.tso";
+  else
+return "fence\trw, rw";
+})
 
 ;; Atomic memory operations.
 
-- 
2.31.1



[PATCH 06/10] RISC-V: Implement atomic_{load,store} [PR 100265]

2021-04-26 Thread Christoph Muellner via Gcc-patches
A recent commit introduced a mechanism to emit proper fences
for RISC-V. Additionally, we already have emit_move_insn ().
Let's reuse this code and provide atomic_load and
atomic_store for RISC-V (as defined in section
"Code Porting and Mapping Guidelines" of the unpriv spec).
Note, that this works also for sub-word atomics.

gcc/
PR 100265
* config/riscv/sync.md (atomic_load): New.
* config/riscv/sync.md (atomic_store): New.
---
 gcc/config/riscv/sync.md | 41 
 1 file changed, 41 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 406db1730b81..ceec324dfa30 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -23,6 +23,7 @@
   UNSPEC_COMPARE_AND_SWAP
   UNSPEC_SYNC_OLD_OP
   UNSPEC_SYNC_EXCHANGE
+  UNSPEC_ATOMIC_LOAD
   UNSPEC_ATOMIC_STORE
   UNSPEC_MEMORY_BARRIER
 ])
@@ -72,6 +73,46 @@
 
 ;; Atomic memory operations.
 
+(define_expand "atomic_load"
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+(unspec_volatile:ANYI
+  [(match_operand:ANYI 1 "memory_operand" "A")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_ATOMIC_LOAD))]
+  ""
+  {
+rtx target = operands[0];
+rtx mem = operands[1];
+enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+if (is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_SEQ_CST)));
+emit_move_insn (target, mem);
+if (is_mm_acquire (model) || is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_ACQUIRE)));
+
+DONE;
+})
+
+(define_expand "atomic_store"
+  [(set (match_operand:ANYI 0 "memory_operand" "=A")
+(unspec_volatile:ANYI
+  [(match_operand:ANYI 1 "reg_or_0_operand" "rJ")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_ATOMIC_STORE))]
+  ""
+  {
+rtx mem = operands[0];
+rtx val = operands[1];
+enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+if (is_mm_release (model) || is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_RELEASE)));
+emit_move_insn (mem, val);
+
+DONE;
+})
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH 07/10] RISC-V: Model INSNs for LR and SC [PR 100266]

2021-04-26 Thread Christoph Muellner via Gcc-patches
In order to emit LR/SC sequences, let's provide INSNs, which
take care of memory ordering constraints.

gcc/
PR 100266
* config/rsicv/sync.md (UNSPEC_LOAD_RESERVED): New.
* config/rsicv/sync.md (UNSPEC_STORE_CONDITIONAL): New.
* config/riscv/sync.md (riscv_load_reserved): New.
* config/riscv/sync.md (riscv_store_conditional): New.
---
 gcc/config/riscv/sync.md | 24 
 1 file changed, 24 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index ceec324dfa30..edff6520b87e 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -26,6 +26,8 @@
   UNSPEC_ATOMIC_LOAD
   UNSPEC_ATOMIC_STORE
   UNSPEC_MEMORY_BARRIER
+  UNSPEC_LOAD_RESERVED
+  UNSPEC_STORE_CONDITIONAL
 ])
 
 (define_code_iterator any_atomic [plus ior xor and])
@@ -113,6 +115,28 @@
 DONE;
 })
 
+(define_insn "@riscv_load_reserved"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(unspec_volatile:GPR
+  [(match_operand:GPR 1 "memory_operand" "A")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_LOAD_RESERVED))]
+  "TARGET_ATOMIC"
+  "lr.%A2 %0, %1"
+)
+
+(define_insn "@riscv_store_conditional"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+(unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
+   (set (match_operand:GPR 1 "memory_operand" "=A")
+(unspec_volatile:GPR
+  [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")]  ;; model
+  UNSPEC_STORE_CONDITIONAL))]
+  "TARGET_ATOMIC"
+  "sc.%A3 %0, %z2, %1"
+)
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH 08/10] RISC-V: Add s.ext-consuming INSNs for LR and SC [PR 100266]

2021-04-26 Thread Christoph Muellner via Gcc-patches
The current model of the LR and SC INSNs requires a sign-extension
to use the generated SImode value for conditional branches, which
only operate on XLEN registers.
However, the sign-extension is actually not required in both cases,
therefore this patch introduces additional INSNs that consume
the sign-extension.

Rationale:
The sign-extension of the loaded value of a LR.W is specified as
sign-extended. Therefore, a sign-extension is not required.
The sign-extension of the success value a SC.W is specified as
non-zero. As sign-extended non-zero value remains non-zero,
therefore the sign-extension is not required.

gcc/
PR 100266
* config/riscv/sync.md (riscv_load_reserved): New.
* config/riscv/sync.md (riscv_store_conditional): New.
---
 gcc/config/riscv/sync.md | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index edff6520b87e..49b860da8ef0 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -125,6 +125,21 @@
   "lr.%A2 %0, %1"
 )
 
+;; This pattern allows to consume a sign-extension of the loaded value.
+;; This is legal, because the specification of LR.W defines the loaded
+;; value to be sign-extended.
+
+(define_insn "riscv_load_reserved"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(sign_extend:DI
+  (unspec_volatile:SI
+   [(match_operand:SI 1 "memory_operand" "A")
+(match_operand:SI 2 "const_int_operand")]  ;; model
+   UNSPEC_LOAD_RESERVED)))]
+  "TARGET_ATOMIC && TARGET_64BIT"
+  "lr.w%A2 %0, %1"
+)
+
 (define_insn "@riscv_store_conditional"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 (unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
@@ -137,6 +152,25 @@
   "sc.%A3 %0, %z2, %1"
 )
 
+;; This pattern allows to consume a sign-extension of the success
+;; value of SC.W, which can then be used for instructions which
+;; require values of XLEN-size (e.g. conditional branches).
+;; This is legal, because any non-zero value remains non-zero
+;; after sign-extension.
+
+(define_insn "riscv_store_conditional"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+(sign_extend:DI
+  (unspec_volatile:SI [(const_int 0)] UNSPEC_STORE_CONDITIONAL)))
+   (set (match_operand:SI 1 "memory_operand" "=A")
+(unspec_volatile:SI
+  [(match_operand:SI 2 "reg_or_0_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")]  ;; model
+  UNSPEC_STORE_CONDITIONAL))]
+  "TARGET_ATOMIC && TARGET_64BIT"
+  "sc.w%A3 %0, %z2, %1"
+)
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH 09/10] RISC-V: Generate helpers for cbranch4 [PR 100266]

2021-04-26 Thread Christoph Muellner via Gcc-patches
On RISC-V we are facing the fact, that our conditional branches
require Pmode conditions. Currently, we generate them explicitly
with a check for Pmode and then calling the proper generator
(i.e. gen_cbranchdi4 on RV64 and gen_cbranchsi4 on RV32).
Let's make simplify this code by using gen_cbranch4 (Pmode).

gcc/
PR 100266
* config/rsicv/riscv.c (riscv_block_move_loop): Simplify.
* config/rsicv/riscv.md (cbranch4): Generate helpers.
---
 gcc/config/riscv/riscv.c  | 5 +
 gcc/config/riscv/riscv.md | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 87cdde73ae21..6e97b38db6db 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3250,10 +3250,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length,
 
   /* Emit the loop condition.  */
   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
-  if (Pmode == DImode)
-emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
-  else
-emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+  emit_jump_insn (gen_cbranch4 (Pmode, test, src_reg, final_src, label));
 
   /* Mop up any left-over bytes.  */
   if (leftover)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index c3687d57047b..52f8a321ac23 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1908,7 +1908,7 @@
  (label_ref (match_operand 1))
  (pc)))])
 
-(define_expand "cbranch4"
+(define_expand "@cbranch4"
   [(set (pc)
(if_then_else (match_operator 0 "comparison_operator"
  [(match_operand:BR 1 "register_operand")
-- 
2.31.1



[PATCH 10/10] RISC-V: Provide programmatic implementation of CAS [PR 100266]

2021-04-26 Thread Christoph Muellner via Gcc-patches
The existing CAS implementation uses an INSN definition, which provides
the core LR/SC sequence. Additionally to that, there is a follow-up code,
that evaluates the results and calculates the return values.
This has two drawbacks: a) an extension to sub-word CAS implementations
is not possible (even if, then it would be unmaintainable), and b) the
implementation is hard to maintain/improve.
This patch provides a programmatic implementation of CAS, similar
like many other architectures are having one.

The implementation supports both, RV32 and RV64.

Additionally, the implementation does not introduce data dependencies
for computation of the return value. Instead, we set the return value
(success state of the CAS operation) based on structural information.
This approach is also shown in the the RISC-V unpriv spec (as part
of the sample code for a compare-and-swap function using LR/SC).
The cost of this implementation is a single LI instruction on top,
which is actually not required in case of success (it will be
overwritten in the success case later).

The resulting sequence requires 9 instructions in the success case.
The previous implementation required 11 instructions in the succcess
case (including a taken branch) and had a "subw;seqz;beqz" sequence,
with direct dependencies.

Below is the generated code of a 32-bit CAS sequence with the old
implementation and the new implementation (ignore the ANDIs below).

Old:
 f00:   419clw  a5,0(a1)
 f02:   1005272flr.wa4,(a0)
 f06:   00f71563bne a4,a5,f10
 f0a:   18c526afsc.wa3,a2,(a0)
 f0e:   faf5bneza3,f02
 f10:   40f707bbsubwa5,a4,a5
 f14:   0017b513seqza0,a5
 f18:   c391beqza5,f1c
 f1a:   c198sw  a4,0(a1)
 f1c:   8905andia0,a0,1
 f1e:   8082ret

New:
 e28:   4194lw  a3,0(a1)
 e2a:   4701li  a4,0
 e2c:   1005282flr.wa6,(a0)
 e30:   00d81963bne a6,a3,e42
 e34:   18c527afsc.wa5,a2,(a0)
 e38:   fbf5bneza5,e2c
 e3a:   4705li  a4,1
 e3c:   00177513andia0,a4,1
 e40:   8082ret
 e42:   0105a023sw  a6,0(a1)
 e46:   00177513andia0,a4,1
 e4a:   8082ret

gcc/
PR 100266
* config/riscv/riscv-protos.h (riscv_expand_compare_and_swap): New.
* config/riscv/riscv.c (riscv_emit_unlikely_jump): New.
* config/rsicv/riscv.c (riscv_expand_compare_and_swap): New.
* config/rsicv/sync.md (atomic_cas_value_strong): Removed.
* config/rsicv/sync.md (atomic_compare_and_swap): Call
  riscv_expand_compare_and_swap.
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.c| 68 +
 gcc/config/riscv/sync.md| 35 +
 3 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 43d7224d6941..eb7e67d3b95a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -59,6 +59,7 @@ extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, 
rtx);
 extern void riscv_expand_float_scc (rtx, enum rtx_code, rtx, rtx);
 extern void riscv_expand_conditional_branch (rtx, enum rtx_code, rtx, rtx);
 extern void riscv_expand_conditional_move (rtx, rtx, rtx, rtx_code, rtx, rtx);
+extern void riscv_expand_compare_and_swap (rtx[]);
 #endif
 extern rtx riscv_legitimize_call_address (rtx);
 extern void riscv_set_return_address (rtx, rtx);
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 6e97b38db6db..c81a9bd6a29e 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -2488,6 +2488,74 @@ riscv_expand_conditional_move (rtx dest, rtx cons, rtx 
alt, rtx_code code,
  cons, alt)));
 }
 
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+riscv_emit_unlikely_jump (rtx insn)
+{
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+}
+
+/* Expand code to perform a compare-and-swap.  */
+
+extern void riscv_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, oldval, mem, expval, newval, mod_s, mod_f, scratch, cond1, cond2;
+  machine_mode mode;
+  rtx_code_label *begin_label, *end_label;
+
+  bval = operands[0];
+  oldval = operands[1];
+  mem = operands[2];
+  expval = operands[3];
+  newval = operands[4];
+  mod_s = operands[6];
+  mod_f = operands[7];

[PATCH] RISC-V: Generate helpers for cbranch4

2021-05-05 Thread Christoph Muellner via Gcc-patches
On RISC-V we are facing the fact, that our conditional branches
require Pmode conditions. Currently, we generate them explicitly
with a check for Pmode and then calling the proper generator
(i.e. gen_cbranchdi4 on RV64 and gen_cbranchsi4 on RV32).
Let's simplify this code by generating the INSN helpers
and use gen_cbranch4 (Pmode).

gcc/
PR 100266
* config/rsicv/riscv.c (riscv_block_move_loop): Simplify.
* config/rsicv/riscv.md (cbranch4): Generate helpers.
---
 gcc/config/riscv/riscv.c  |  5 +
 gcc/config/riscv/riscv.md | 12 
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index e1064e374eb0..27665e5b58f9 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3258,10 +3258,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length,
 
   /* Emit the loop condition.  */
   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
-  if (Pmode == DImode)
-emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
-  else
-emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+  emit_jump_insn (gen_cbranch4 (Pmode, test, src_reg, final_src, label));
 
   /* Mop up any left-over bytes.  */
   if (leftover)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 0e35960fefaa..f88877fd5966 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2153,7 +2153,7 @@
  (label_ref (match_operand 1))
  (pc)))])
 
-(define_expand "cbranch4"
+(define_expand "@cbranch4"
   [(set (pc)
(if_then_else (match_operator 0 "comparison_operator"
  [(match_operand:BR 1 "register_operand")
@@ -2167,7 +2167,7 @@
   DONE;
 })
 
-(define_expand "cbranch4"
+(define_expand "@cbranch4"
   [(set (pc)
(if_then_else (match_operator 0 "fp_branch_comparison"
   [(match_operand:ANYF 1 "register_operand")
@@ -2829,12 +2829,8 @@
operands[0],
operands[1]));
 
-  if (mode == DImode)
-emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-   result, const0_rtx, operands[2]));
-  else
-emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-   result, const0_rtx, operands[2]));
+  rtx cond = gen_rtx_EQ (VOIDmode, result, const0_rtx);
+  emit_jump_insn (gen_cbranch4 (mode, cond, result, const0_rtx, operands[2]));
 
   DONE;
 })
-- 
2.31.1



[PATCH v2 00/10] [RISC-V] Atomics improvements [PR100265/PR100266]

2021-05-05 Thread Christoph Muellner via Gcc-patches
This series provides a cleanup of the current atomics implementation
of RISC-V:

* PR100265: Use proper fences for atomic load/store
* PR100266: Provide programmatic implementation of CAS

As both are very related, I merged the patches into one series.

The first patch could be squashed into the following patches,
but I found it easier to understand the chances with it in place.

The series has been tested as follows:
* Building and testing a multilib RV32/64 toolchain
  (bootstrapped with riscv-gnu-toolchain repo)
* Manual review of generated sequences for GCC's atomic builtins API

The programmatic re-implementation of CAS benefits from a REE improvement
(see PR100264):
  https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568680.html
If this patch is not in place, then an additional extension instruction
is emitted after the SC.W (in case of RV64 and CAS for uint32_t).

Further, the new CAS code requires cbranch INSN helpers to be present:
  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/569689.html

Changes for v2:
* Guard LL/SC sequence by compiler barriers ("blockage")
  (suggested by Andrew Waterman)
* Changed commit message for AMOSWAP->STORE change
  (suggested by Andrew Waterman)
* Extracted cbranch4 patch from patchset (suggested by Kito Cheng)
* Introduce predicate riscv_sync_memory_operand (suggested by Jim Wilson)
* Fix small code style issue

Christoph Muellner (10):
  RISC-V: Simplify memory model code [PR 100265]
  RISC-V: Emit proper memory ordering suffixes for AMOs [PR 100265]
  RISC-V: Eliminate %F specifier from riscv_print_operand() [PR 100265]
  RISC-V: Use STORE instead of AMOSWAP for atomic stores [PR 100265]
  RISC-V: Emit fences according to chosen memory model [PR 100265]
  RISC-V: Implement atomic_{load,store} [PR 100265]
  RISC-V: Model INSNs for LR and SC [PR 100266]
  RISC-V: Add s.ext-consuming INSNs for LR and SC [PR 100266]
  RISC-V: Provide programmatic implementation of CAS [PR 100266]
  RISC-V: Introduce predicate "riscv_sync_memory_operand" [PR 100266]

 gcc/config/riscv/riscv-protos.h |   1 +
 gcc/config/riscv/riscv.c| 136 +---
 gcc/config/riscv/sync.md| 216 +---
 3 files changed, 235 insertions(+), 118 deletions(-)

-- 
2.31.1



[PATCH v2 01/10] RISC-V: Simplify memory model code [PR 100265]

2021-05-05 Thread Christoph Muellner via Gcc-patches
We don't have any special treatment of MEMMODEL_SYNC_* values,
so let's hide them behind the memmodel_base() function.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_amo_acquire):
  Ignore MEMMODEL_SYNC_* values.
* config/riscv/riscv.c (riscv_memmodel_needs_release_fence):
  Likewise.
* config/riscv/riscv.c (riscv_print_operand): Eliminate
  MEMMODEL_SYNC_* values by calling memmodel_base().
---
 gcc/config/riscv/riscv.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 27665e5b58f9..545f3d0cb82c 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3350,20 +3350,17 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
acquire portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_amo_acquire (enum memmodel model)
+riscv_memmodel_needs_amo_acquire (const enum memmodel model)
 {
   switch (model)
 {
   case MEMMODEL_ACQ_REL:
   case MEMMODEL_SEQ_CST:
-  case MEMMODEL_SYNC_SEQ_CST:
   case MEMMODEL_ACQUIRE:
   case MEMMODEL_CONSUME:
-  case MEMMODEL_SYNC_ACQUIRE:
return true;
 
   case MEMMODEL_RELEASE:
-  case MEMMODEL_SYNC_RELEASE:
   case MEMMODEL_RELAXED:
return false;
 
@@ -3376,20 +3373,17 @@ riscv_memmodel_needs_amo_acquire (enum memmodel model)
implement the release portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_release_fence (enum memmodel model)
+riscv_memmodel_needs_release_fence (const enum memmodel model)
 {
   switch (model)
 {
   case MEMMODEL_ACQ_REL:
   case MEMMODEL_SEQ_CST:
-  case MEMMODEL_SYNC_SEQ_CST:
   case MEMMODEL_RELEASE:
-  case MEMMODEL_SYNC_RELEASE:
return true;
 
   case MEMMODEL_ACQUIRE:
   case MEMMODEL_CONSUME:
-  case MEMMODEL_SYNC_ACQUIRE:
   case MEMMODEL_RELAXED:
return false;
 
@@ -3414,6 +3408,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
 {
   machine_mode mode = GET_MODE (op);
   enum rtx_code code = GET_CODE (op);
+  const enum memmodel model = memmodel_base (INTVAL (op));
 
   switch (letter)
 {
@@ -3433,12 +3428,12 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_amo_acquire (model))
fputs (".aq", file);
   break;
 
 case 'F':
-  if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_release_fence (model))
fputs ("fence iorw,ow; ", file);
   break;
 
-- 
2.31.1



[PATCH v2 02/10] RISC-V: Emit proper memory ordering suffixes for AMOs [PR 100265]

2021-05-05 Thread Christoph Muellner via Gcc-patches
The ratified A extension supports '.aq', '.rl' and '.aqrl' as
memory ordering suffixes. Let's emit them in case we get a '%A'
conversion specifier for riscv_print_operand().

As '%A' was already used for a similar, but restricted, purpose
(only '.aq' was emitted so far), this does not require any other
changes.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_amo_acquire):
  Remove function.
* config/riscv/riscv.c (riscv_print_amo_memory_ordering_suffix):
  Add function to emit AMO memory ordering suffixes.
* config/riscv/riscv.c (riscv_print_operand): Call
  riscv_print_amo_memory_ordering_suffix() instead of
  riscv_memmodel_needs_amo_acquire().
---
 gcc/config/riscv/riscv.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 545f3d0cb82c..3edd5c239d7c 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3346,24 +3346,26 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
   fputc (')', file);
 }
 
-/* Return true if the .AQ suffix should be added to an AMO to implement the
-   acquire portion of memory model MODEL.  */
+/* Print the memory ordering suffix for AMOs.  */
 
-static bool
-riscv_memmodel_needs_amo_acquire (const enum memmodel model)
+static void
+riscv_print_amo_memory_ordering_suffix (FILE *file, const enum memmodel model)
 {
   switch (model)
 {
-  case MEMMODEL_ACQ_REL:
-  case MEMMODEL_SEQ_CST:
-  case MEMMODEL_ACQUIRE:
+  case MEMMODEL_RELAXED:
+   break;
   case MEMMODEL_CONSUME:
-   return true;
-
+  case MEMMODEL_ACQUIRE:
+   fputs (".aq", file);
+   break;
   case MEMMODEL_RELEASE:
-  case MEMMODEL_RELAXED:
-   return false;
-
+   fputs (".rl", file);
+   break;
+  case MEMMODEL_ACQ_REL:
+  case MEMMODEL_SEQ_CST:
+   fputs (".aqrl", file);
+   break;
   default:
gcc_unreachable ();
 }
@@ -3428,8 +3430,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire (model))
-   fputs (".aq", file);
+  riscv_print_amo_memory_ordering_suffix (file, model);
   break;
 
 case 'F':
-- 
2.31.1



[PATCH v2 03/10] RISC-V: Eliminate %F specifier from riscv_print_operand() [PR 100265]

2021-05-05 Thread Christoph Muellner via Gcc-patches
A previous patch took care, that the proper memory ordering suffixes
for AMOs are emitted. Therefore there is no reason to keep the fence
generation mechanism for release operations.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_release_fence):
  Remove function.
* config/riscv/riscv.c (riscv_print_operand): Remove
  %F format specifier.
* config/riscv/sync.md: Remove %F format specifier uses.
---
 gcc/config/riscv/riscv.c | 29 -
 gcc/config/riscv/sync.md | 16 
 2 files changed, 8 insertions(+), 37 deletions(-)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 3edd5c239d7c..5fe65776e608 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -3371,29 +3371,6 @@ riscv_print_amo_memory_ordering_suffix (FILE *file, 
const enum memmodel model)
 }
 }
 
-/* Return true if a FENCE should be emitted to before a memory access to
-   implement the release portion of memory model MODEL.  */
-
-static bool
-riscv_memmodel_needs_release_fence (const enum memmodel model)
-{
-  switch (model)
-{
-  case MEMMODEL_ACQ_REL:
-  case MEMMODEL_SEQ_CST:
-  case MEMMODEL_RELEASE:
-   return true;
-
-  case MEMMODEL_ACQUIRE:
-  case MEMMODEL_CONSUME:
-  case MEMMODEL_RELAXED:
-   return false;
-
-  default:
-   gcc_unreachable ();
-}
-}
-
 /* Implement TARGET_PRINT_OPERAND.  The RISCV-specific operand codes are:
 
'h' Print the high-part relocation associated with OP, after stripping
@@ -3401,7 +3378,6 @@ riscv_memmodel_needs_release_fence (const enum memmodel 
model)
'R' Print the low-part relocation associated with OP.
'C' Print the integer branch condition for comparison OP.
'A' Print the atomic operation suffix for memory model OP.
-   'F' Print a FENCE if the memory model requires a release.
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.  */
 
@@ -3433,11 +3409,6 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   riscv_print_amo_memory_ordering_suffix (file, model);
   break;
 
-case 'F':
-  if (riscv_memmodel_needs_release_fence (model))
-   fputs ("fence iorw,ow; ", file);
-  break;
-
 case 'i':
   if (code != REG)
 fputs ("i", file);
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 747a799e2377..aeeb2e854b68 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -65,7 +65,7 @@
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_ATOMIC_STORE))]
   "TARGET_ATOMIC"
-  "%F2amoswap.%A2 zero,%z1,%0"
+  "amoswap.%A2 zero,%z1,%0"
   [(set (attr "length") (const_int 8))])
 
 (define_insn "atomic_"
@@ -76,8 +76,8 @@
   (match_operand:SI 2 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F2amo.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A2 zero,%z1,%0"
+)
 
 (define_insn "atomic_fetch_"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -89,8 +89,8 @@
   (match_operand:SI 3 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F3amo.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A3 %0,%z2,%1"
+)
 
 (define_insn "atomic_exchange"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -101,8 +101,8 @@
(set (match_dup 1)
(match_operand:GPR 2 "register_operand" "0"))]
   "TARGET_ATOMIC"
-  "%F3amoswap.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amoswap.%A3 %0,%z2,%1"
+)
 
 (define_insn "atomic_cas_value_strong"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -115,7 +115,7 @@
 UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 6 "=&r"))]
   "TARGET_ATOMIC"
-  "%F5 1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 
1:"
+  "1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 1:"
   [(set (attr "length") (const_int 20))])
 
 (define_expand "atomic_compare_and_swap"
-- 
2.31.1



[PATCH v2 04/10] RISC-V: Use STORE instead of AMOSWAP for atomic stores [PR 100265]

2021-05-05 Thread Christoph Muellner via Gcc-patches
Using AMOSWAP as atomic store does not allow us to do sub-word accesses.
Further, it is not consistent with our atomic_load () implementation.
The benefit of AMOSWAP is that the resulting code sequence will be
smaller (comapred to FENCE+STORE), however, this does not weight
out for the lack of sub-word accesses.
Additionally, HW implementors have claimed that an optimal
implementation AMOSWAP is slightly more expensive than FENCE+STORE.
So let's use STORE instead of AMOSWAP.

gcc/
PR 100265
* config/riscv/sync.md (atomic_store):
  Remove.
---
 gcc/config/riscv/sync.md | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index aeeb2e854b68..efd49745a8e2 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -57,17 +57,6 @@
 
 ;; Atomic memory operations.
 
-;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
-(define_insn "atomic_store"
-  [(set (match_operand:GPR 0 "memory_operand" "=A")
-(unspec_volatile:GPR
-  [(match_operand:GPR 1 "reg_or_0_operand" "rJ")
-   (match_operand:SI 2 "const_int_operand")]  ;; model
-  UNSPEC_ATOMIC_STORE))]
-  "TARGET_ATOMIC"
-  "amoswap.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
-
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH v2 05/10] RISC-V: Emit fences according to chosen memory model [PR 100265]

2021-05-05 Thread Christoph Muellner via Gcc-patches
mem_thread_fence gets the desired memory model as operand.
Let's emit fences according to this value (as defined in section
"Code Porting and Mapping Guidelines" of the unpriv spec).

gcc/
PR 100265
* config/riscv/sync.md (mem_thread_fence):
  Emit fences according to given operand.
* config/riscv/sync.md (mem_fence):
  Add INSNs for different fence flavours.
* config/riscv/sync.md (mem_thread_fence_1):
  Remove.
---
 gcc/config/riscv/sync.md | 41 +++-
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index efd49745a8e2..406db1730b81 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -34,26 +34,41 @@
 ;; Memory barriers.
 
 (define_expand "mem_thread_fence"
-  [(match_operand:SI 0 "const_int_operand" "")] ;; model
+  [(match_operand:SI 0 "const_int_operand")] ;; model
   ""
 {
-  if (INTVAL (operands[0]) != MEMMODEL_RELAXED)
-{
-  rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (mem) = 1;
-  emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
-}
+  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
+  if (!(is_mm_relaxed (model)))
+  emit_insn (gen_mem_fence (operands[0]));
   DONE;
 })
 
-;; Until the RISC-V memory model (hence its mapping from C++) is finalized,
-;; conservatively emit a full FENCE.
-(define_insn "mem_thread_fence_1"
+(define_expand "mem_fence"
+  [(set (match_dup 1)
+   (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+   UNSPEC_MEMORY_BARRIER))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[1]) = 1;
+})
+
+(define_insn "*mem_fence"
   [(set (match_operand:BLK 0 "" "")
-   (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
-   (match_operand:SI 1 "const_int_operand" "")] ;; model
+   (unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+   UNSPEC_MEMORY_BARRIER))]
   ""
-  "fence\tiorw,iorw")
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[1]));
+  if (is_mm_consume (model) || is_mm_acquire (model))
+return "fence\tr, rw";
+  else if (is_mm_release (model))
+return "fence\trw, w";
+  else if (is_mm_acq_rel (model))
+return "fence.tso";
+  else
+return "fence\trw, rw";
+})
 
 ;; Atomic memory operations.
 
-- 
2.31.1



[PATCH v2 06/10] RISC-V: Implement atomic_{load,store} [PR 100265]

2021-05-05 Thread Christoph Muellner via Gcc-patches
A recent commit introduced a mechanism to emit proper fences
for RISC-V. Additionally, we already have emit_move_insn ().
Let's reuse this code and provide atomic_load and
atomic_store for RISC-V (as defined in section
"Code Porting and Mapping Guidelines" of the unpriv spec).
Note, that this works also for sub-word atomics.

gcc/
PR 100265
* config/riscv/sync.md (atomic_load): New.
* config/riscv/sync.md (atomic_store): New.
---
 gcc/config/riscv/sync.md | 41 
 1 file changed, 41 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 406db1730b81..ceec324dfa30 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -23,6 +23,7 @@
   UNSPEC_COMPARE_AND_SWAP
   UNSPEC_SYNC_OLD_OP
   UNSPEC_SYNC_EXCHANGE
+  UNSPEC_ATOMIC_LOAD
   UNSPEC_ATOMIC_STORE
   UNSPEC_MEMORY_BARRIER
 ])
@@ -72,6 +73,46 @@
 
 ;; Atomic memory operations.
 
+(define_expand "atomic_load"
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+(unspec_volatile:ANYI
+  [(match_operand:ANYI 1 "memory_operand" "A")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_ATOMIC_LOAD))]
+  ""
+  {
+rtx target = operands[0];
+rtx mem = operands[1];
+enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+if (is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_SEQ_CST)));
+emit_move_insn (target, mem);
+if (is_mm_acquire (model) || is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_ACQUIRE)));
+
+DONE;
+})
+
+(define_expand "atomic_store"
+  [(set (match_operand:ANYI 0 "memory_operand" "=A")
+(unspec_volatile:ANYI
+  [(match_operand:ANYI 1 "reg_or_0_operand" "rJ")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_ATOMIC_STORE))]
+  ""
+  {
+rtx mem = operands[0];
+rtx val = operands[1];
+enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+if (is_mm_release (model) || is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_RELEASE)));
+emit_move_insn (mem, val);
+
+DONE;
+})
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH v2 07/10] RISC-V: Model INSNs for LR and SC [PR 100266]

2021-05-05 Thread Christoph Muellner via Gcc-patches
In order to emit LR/SC sequences, let's provide INSNs, which
take care of memory ordering constraints.

gcc/
PR 100266
* config/rsicv/sync.md (UNSPEC_LOAD_RESERVED): New.
* config/rsicv/sync.md (UNSPEC_STORE_CONDITIONAL): New.
* config/riscv/sync.md (riscv_load_reserved): New.
* config/riscv/sync.md (riscv_store_conditional): New.
---
 gcc/config/riscv/sync.md | 24 
 1 file changed, 24 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index ceec324dfa30..edff6520b87e 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -26,6 +26,8 @@
   UNSPEC_ATOMIC_LOAD
   UNSPEC_ATOMIC_STORE
   UNSPEC_MEMORY_BARRIER
+  UNSPEC_LOAD_RESERVED
+  UNSPEC_STORE_CONDITIONAL
 ])
 
 (define_code_iterator any_atomic [plus ior xor and])
@@ -113,6 +115,28 @@
 DONE;
 })
 
+(define_insn "@riscv_load_reserved"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(unspec_volatile:GPR
+  [(match_operand:GPR 1 "memory_operand" "A")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_LOAD_RESERVED))]
+  "TARGET_ATOMIC"
+  "lr.%A2 %0, %1"
+)
+
+(define_insn "@riscv_store_conditional"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+(unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
+   (set (match_operand:GPR 1 "memory_operand" "=A")
+(unspec_volatile:GPR
+  [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")]  ;; model
+  UNSPEC_STORE_CONDITIONAL))]
+  "TARGET_ATOMIC"
+  "sc.%A3 %0, %z2, %1"
+)
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH v2 08/10] RISC-V: Add s.ext-consuming INSNs for LR and SC [PR 100266]

2021-05-05 Thread Christoph Muellner via Gcc-patches
The current model of the LR and SC INSNs requires a sign-extension
to use the generated SImode value for conditional branches, which
only operate on XLEN registers.
However, the sign-extension is actually not required in both cases,
therefore this patch introduces additional INSNs that consume
the sign-extension.

Rationale:
The sign-extension of the loaded value of a LR.W is specified as
sign-extended. Therefore, a sign-extension is not required.
The sign-extension of the success value a SC.W is specified as
non-zero. As sign-extended non-zero value remains non-zero,
therefore the sign-extension is not required.

gcc/
PR 100266
* config/riscv/sync.md (riscv_load_reserved): New.
* config/riscv/sync.md (riscv_store_conditional): New.
---
 gcc/config/riscv/sync.md | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index edff6520b87e..49b860da8ef0 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -125,6 +125,21 @@
   "lr.%A2 %0, %1"
 )
 
+;; This pattern allows to consume a sign-extension of the loaded value.
+;; This is legal, because the specification of LR.W defines the loaded
+;; value to be sign-extended.
+
+(define_insn "riscv_load_reserved"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(sign_extend:DI
+  (unspec_volatile:SI
+   [(match_operand:SI 1 "memory_operand" "A")
+(match_operand:SI 2 "const_int_operand")]  ;; model
+   UNSPEC_LOAD_RESERVED)))]
+  "TARGET_ATOMIC && TARGET_64BIT"
+  "lr.w%A2 %0, %1"
+)
+
 (define_insn "@riscv_store_conditional"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 (unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
@@ -137,6 +152,25 @@
   "sc.%A3 %0, %z2, %1"
 )
 
+;; This pattern allows to consume a sign-extension of the success
+;; value of SC.W, which can then be used for instructions which
+;; require values of XLEN-size (e.g. conditional branches).
+;; This is legal, because any non-zero value remains non-zero
+;; after sign-extension.
+
+(define_insn "riscv_store_conditional"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+(sign_extend:DI
+  (unspec_volatile:SI [(const_int 0)] UNSPEC_STORE_CONDITIONAL)))
+   (set (match_operand:SI 1 "memory_operand" "=A")
+(unspec_volatile:SI
+  [(match_operand:SI 2 "reg_or_0_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")]  ;; model
+  UNSPEC_STORE_CONDITIONAL))]
+  "TARGET_ATOMIC && TARGET_64BIT"
+  "sc.w%A3 %0, %z2, %1"
+)
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.31.1



[PATCH v2 09/10] RISC-V: Provide programmatic implementation of CAS [PR 100266]

2021-05-05 Thread Christoph Muellner via Gcc-patches
The existing CAS implementation uses an INSN definition, which provides
the core LR/SC sequence. Additionally to that, there is a follow-up code,
that evaluates the results and calculates the return values.
This has two drawbacks: a) an extension to sub-word CAS implementations
is not possible (even if, then it would be unmaintainable), and b) the
implementation is hard to maintain/improve.
This patch provides a programmatic implementation of CAS, similar
like many other architectures are having one.

The implementation supports both, RV32 and RV64.

Additionally, the implementation does not introduce data dependencies
for computation of the return value. Instead, we set the return value
(success state of the CAS operation) based on structural information.
This approach is also shown in the the RISC-V unpriv spec (as part
of the sample code for a compare-and-swap function using LR/SC).
The cost of this implementation is a single LI instruction on top,
which is actually not required in case of success (it will be
overwritten in the success case later).

The resulting sequence requires 9 instructions in the success case.
The previous implementation required 11 instructions in the succcess
case (including a taken branch) and had a "subw;seqz;beqz" sequence,
with direct dependencies.

Below is the generated code of a 32-bit CAS sequence with the old
implementation and the new implementation (ignore the ANDIs below).

Old:
 f00:   419clw  a5,0(a1)
 f02:   1005272flr.wa4,(a0)
 f06:   00f71563bne a4,a5,f10
 f0a:   18c526afsc.wa3,a2,(a0)
 f0e:   faf5bneza3,f02
 f10:   40f707bbsubwa5,a4,a5
 f14:   0017b513seqza0,a5
 f18:   c391beqza5,f1c
 f1a:   c198sw  a4,0(a1)
 f1c:   8905andia0,a0,1
 f1e:   8082ret

New:
 e28:   4194lw  a3,0(a1)
 e2a:   4701li  a4,0
 e2c:   1005282flr.wa6,(a0)
 e30:   00d81963bne a6,a3,e42
 e34:   18c527afsc.wa5,a2,(a0)
 e38:   fbf5bneza5,e2c
 e3a:   4705li  a4,1
 e3c:   00177513andia0,a4,1
 e40:   8082ret
 e42:   0105a023sw  a6,0(a1)
 e46:   00177513andia0,a4,1
 e4a:   8082ret

gcc/
PR 100266
* config/riscv/riscv-protos.h (riscv_expand_compare_and_swap): New.
* config/riscv/riscv.c (riscv_emit_unlikely_jump): New.
* config/rsicv/riscv.c (riscv_expand_compare_and_swap): New.
* config/rsicv/sync.md (atomic_cas_value_strong): Removed.
* config/rsicv/sync.md (atomic_compare_and_swap): Call
  riscv_expand_compare_and_swap.
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.c| 75 +
 gcc/config/riscv/sync.md| 35 +--
 3 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 43d7224d6941..eb7e67d3b95a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -59,6 +59,7 @@ extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, 
rtx);
 extern void riscv_expand_float_scc (rtx, enum rtx_code, rtx, rtx);
 extern void riscv_expand_conditional_branch (rtx, enum rtx_code, rtx, rtx);
 extern void riscv_expand_conditional_move (rtx, rtx, rtx, rtx_code, rtx, rtx);
+extern void riscv_expand_compare_and_swap (rtx[]);
 #endif
 extern rtx riscv_legitimize_call_address (rtx);
 extern void riscv_set_return_address (rtx, rtx);
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 5fe65776e608..a7b18d650daa 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -2496,6 +2496,81 @@ riscv_expand_conditional_move (rtx dest, rtx cons, rtx 
alt, rtx_code code,
  cons, alt)));
 }
 
+/* Mark the previous jump instruction as unlikely.  */
+
+static void
+riscv_emit_unlikely_jump (rtx insn)
+{
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+}
+
+/* Expand code to perform a compare-and-swap.  */
+
+extern void
+riscv_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, oldval, mem, expval, newval, mod_s, mod_f, scratch, cond1, cond2;
+  machine_mode mode;
+  rtx_code_label *begin_label, *end_label;
+
+  bval = operands[0];
+  oldval = operands[1];
+  mem = operands[2];
+  expval = operands[3];
+  newval = operands[4];
+  mod_s = operands[6];
+  mod_f = operands[7];

[PATCH v2 10/10] RISC-V: Introduce predicate "riscv_sync_memory_operand" [PR 100266]

2021-05-05 Thread Christoph Muellner via Gcc-patches
Atomic instructions require zero-offset memory addresses.
If we allow all addresses, the nonzero-offset addresses will
be prepared in an extra register in an extra instruction before
the actual atomic instruction.

This patch introduces the predicate "riscv_sync_memory_operand",
which restricts the memory operand to be suitable for atomic
instructions.

gcc/
PR 100266
* config/riscv/sync.md (riscv_sync_memory_operand): New.
* config/riscv/sync.md (riscv_load_reserved): Use new predicate.
* config/riscv/sync.md (riscv_store_conditional): Likewise.
* config/riscv/sync.md (atomic_): Likewise.
* config/riscv/sync.md (atomic_fetch_): Likewise.
* config/riscv/sync.md (atomic_exchange): Likewise.
* config/riscv/sync.md (atomic_compare_and_swap): Likewise.
---
 gcc/config/riscv/sync.md | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index da8dbf698163..cd9078a40248 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -30,6 +30,10 @@
   UNSPEC_STORE_CONDITIONAL
 ])
 
+(define_predicate "riscv_sync_memory_operand"
+  (and (match_operand 0 "memory_operand")
+   (match_code "reg" "0")))
+
 (define_code_iterator any_atomic [plus ior xor and])
 (define_code_attr atomic_optab
   [(plus "add") (ior "or") (xor "xor") (and "and")])
@@ -118,7 +122,7 @@
 (define_insn "@riscv_load_reserved"
   [(set (match_operand:GPR 0 "register_operand" "=r")
 (unspec_volatile:GPR
-  [(match_operand:GPR 1 "memory_operand" "A")
+  [(match_operand:GPR 1 "riscv_sync_memory_operand" "A")
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_LOAD_RESERVED))]
   "TARGET_ATOMIC"
@@ -133,7 +137,7 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 (sign_extend:DI
   (unspec_volatile:SI
-   [(match_operand:SI 1 "memory_operand" "A")
+   [(match_operand:SI 1 "riscv_sync_memory_operand" "A")
 (match_operand:SI 2 "const_int_operand")]  ;; model
UNSPEC_LOAD_RESERVED)))]
   "TARGET_ATOMIC && TARGET_64BIT"
@@ -143,7 +147,7 @@
 (define_insn "@riscv_store_conditional"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 (unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
-   (set (match_operand:GPR 1 "memory_operand" "=A")
+   (set (match_operand:GPR 1 "riscv_sync_memory_operand" "=A")
 (unspec_volatile:GPR
   [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
(match_operand:SI 3 "const_int_operand")]  ;; model
@@ -162,7 +166,7 @@
   [(set (match_operand:DI 0 "register_operand" "=&r")
 (sign_extend:DI
   (unspec_volatile:SI [(const_int 0)] UNSPEC_STORE_CONDITIONAL)))
-   (set (match_operand:SI 1 "memory_operand" "=A")
+   (set (match_operand:SI 1 "riscv_sync_memory_operand" "=A")
 (unspec_volatile:SI
   [(match_operand:SI 2 "reg_or_0_operand" "rJ")
(match_operand:SI 3 "const_int_operand")]  ;; model
@@ -172,7 +176,7 @@
 )
 
 (define_insn "atomic_"
-  [(set (match_operand:GPR 0 "memory_operand" "+A")
+  [(set (match_operand:GPR 0 "riscv_sync_memory_operand" "+A")
(unspec_volatile:GPR
  [(any_atomic:GPR (match_dup 0)
 (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
@@ -184,7 +188,7 @@
 
 (define_insn "atomic_fetch_"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
-   (match_operand:GPR 1 "memory_operand" "+A"))
+   (match_operand:GPR 1 "riscv_sync_memory_operand" "+A"))
(set (match_dup 1)
(unspec_volatile:GPR
  [(any_atomic:GPR (match_dup 1)
@@ -198,7 +202,7 @@
 (define_insn "atomic_exchange"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
(unspec_volatile:GPR
- [(match_operand:GPR 1 "memory_operand" "+A")
+ [(match_operand:GPR 1 "riscv_sync_memory_operand" "+A")
   (match_operand:SI 3 "const_int_operand")] ;; model
  UNSPEC_SYNC_EXCHANGE))
(set (match_dup 1)
@@ -208,14 +212,14 @@
 )
 
 (define_expand "atomic_compare_and_swap"
-  [(match_operand:SI 0 "register_operand" "")   ;; bool output
-   (match_operand:GPR 1 "register_operand" "")  ;; val output
-   (match_operand:GPR 2 "memory_operand" "");; memory
-   (match_operand:GPR 3 "reg_or_0_operand" "")  ;; expected value
-   (match_operand:GPR 4 "reg_or_0_operand" "")  ;; desired value
-   (match_operand:SI 5 "const_int_operand" "")  ;; is_weak
-   (match_operand:SI 6 "const_int_operand" "")  ;; mod_s
-   (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+  [(match_operand:SI 0 "register_operand" "")   ;; bool output
+   (match_operand:GPR 1 "register_operand" "")  ;; val output
+   (match_operand:GPR 2 "riscv_sync_memory_operand" "") ;; memory
+   (match_operand:GPR 3 "reg_or_0_operand" "")  ;; expected value
+   (match_operand:GPR 4 "reg_or_0_operand" "")  ;; desired value
+   (match_operand:SI 5 "const_int_opera

[PATCH v2] REE: PR rtl-optimization/100264: Handle more PARALLEL SET expressions

2021-05-10 Thread Christoph Muellner via Gcc-patches
Move the check for register targets (i.e. REG_P ()) into the function
get_sub_rtx () and change the restriction of REE to "only one child of
a PARALLEL expression is a SET register expression" (was "only one child of
a PARALLEL expression is a SET expression").

This allows to handle more PARALLEL SET expressions.

gcc/ChangeLog:
PR rtl-optimization/100264
* ree.c (get_sub_rtx): Ignore SET expressions without register
destinations and remove assertion, as it is not valid anymore
with this new behaviour.
(merge_def_and_ext): Eliminate destination check for register
as such SET expressions can't occur anymore.
(combine_reaching_defs): Likewise.
---
 gcc/ree.c | 30 ++
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/gcc/ree.c b/gcc/ree.c
index 65457c582c6a..e31ca2fa1a80 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -658,10 +658,11 @@ make_defs_and_copies_lists (rtx_insn *extend_insn, 
const_rtx set_pat,
   return ret;
 }
 
-/* If DEF_INSN has single SET expression, possibly buried inside
-   a PARALLEL, return the address of the SET expression, else
-   return NULL.  This is similar to single_set, except that
-   single_set allows multiple SETs when all but one is dead.  */
+/* If DEF_INSN has single SET expression with a register
+   destination, possibly buried inside a PARALLEL, return
+   the address of the SET expression, else return NULL.
+   This is similar to single_set, except that single_set
+   allows multiple SETs when all but one is dead.  */
 static rtx *
 get_sub_rtx (rtx_insn *def_insn)
 {
@@ -675,6 +676,8 @@ get_sub_rtx (rtx_insn *def_insn)
   rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
   if (GET_CODE (s_expr) != SET)
 continue;
+ if (!REG_P (SET_DEST (s_expr)))
+   continue;
 
   if (sub_rtx == NULL)
 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
@@ -686,14 +689,12 @@ get_sub_rtx (rtx_insn *def_insn)
 }
 }
   else if (code == SET)
-sub_rtx = &PATTERN (def_insn);
-  else
 {
-  /* It is not a PARALLEL or a SET, what could it be ? */
-  return NULL;
+   rtx s_expr = PATTERN (def_insn);
+   if (REG_P (SET_DEST (s_expr)))
+ sub_rtx = &PATTERN (def_insn);
 }
 
-  gcc_assert (sub_rtx != NULL);
   return sub_rtx;
 }
 
@@ -712,13 +713,12 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, 
ext_state *state)
   if (sub_rtx == NULL)
 return false;
 
-  if (REG_P (SET_DEST (*sub_rtx))
-  && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
+  if (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
  || ((state->modified[INSN_UID (def_insn)].kind
   == (cand->code == ZERO_EXTEND
   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
  && state->modified[INSN_UID (def_insn)].mode
-== ext_src_mode)))
+== ext_src_mode))
 {
   if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
  >= GET_MODE_UNIT_SIZE (cand->mode))
@@ -853,8 +853,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
 CAND->insn, then this transformation is not safe.  Note we have
 to test in the widened mode.  */
   rtx *dest_sub_rtx = get_sub_rtx (def_insn);
-  if (dest_sub_rtx == NULL
- || !REG_P (SET_DEST (*dest_sub_rtx)))
+  if (dest_sub_rtx == NULL)
return false;
 
   rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (set)),
@@ -947,8 +946,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
break;
 
  rtx *dest_sub_rtx2 = get_sub_rtx (def_insn2);
- if (dest_sub_rtx2 == NULL
- || !REG_P (SET_DEST (*dest_sub_rtx2)))
+ if (dest_sub_rtx2 == NULL)
break;
 
  /* On RISC machines we must make sure that changing the mode of
-- 
2.31.1



[PATCH v3 0/9] [RISC-V] Atomics improvements

2022-05-26 Thread Christoph Muellner via Gcc-patches
This series provides a cleanup of the current atomics implementation
of RISC-V (PR100265: Use proper fences for atomic load/store).

The first patch could be squashed into the following patches,
but I found it easier to understand the chances with it in place.

The series has been tested as follows:
* Building and testing a multilib RV32/64 toolchain
  (bootstrapped with riscv-gnu-toolchain repo)
* Manual review of generated sequences for GCC's atomic builtins API

This series was developed more than a year ago, but got never merged.

v1 can be found here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568684.html

v2 can be found here:
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/569691.html

Jim expressed concerns about patch 9/10 (which was inspired by the
AArch64 implementation), that it won't emit the expected CAS sequence
under register pressure. Therefore, I've dropped the patch from the
series in v3.

Changes for v3:
* Rebase/retest on master
* Drop patch 9/10 ("Provide programmatic implementation of CAS")

Changes for v2:
* Guard LL/SC sequence by compiler barriers ("blockage")
  (suggested by Andrew Waterman)
* Changed commit message for AMOSWAP->STORE change
  (suggested by Andrew Waterman)
* Extracted cbranch4 patch from patchset (suggested by Kito Cheng)
* Introduce predicate riscv_sync_memory_operand (suggested by Jim Wilson)
* Fix small code style issue

Christoph Muellner (9):
  RISC-V: Simplify memory model code [PR 100265]
  RISC-V: Emit proper memory ordering suffixes for AMOs [PR 100265]
  RISC-V: Eliminate %F specifier from riscv_print_operand() [PR 100265]
  RISC-V: Use STORE instead of AMOSWAP for atomic stores [PR 100265]
  RISC-V: Emit fences according to chosen memory model [PR 100265]
  RISC-V: Implement atomic_{load,store} [PR 100265]
  RISC-V: Model INSNs for LR and SC [PR 100266]
  RISC-V: Add s.ext-consuming INSNs for LR and SC [PR 100266]
  RISC-V: Introduce predicate "riscv_sync_memory_operand" [PR 100266]

 gcc/config/riscv/riscv.cc |  61 +++--
 gcc/config/riscv/sync.md  | 183 ++
 2 files changed, 159 insertions(+), 85 deletions(-)

-- 
2.35.3



[PATCH v3 1/9] RISC-V: Simplify memory model code [PR 100265]

2022-05-26 Thread Christoph Muellner via Gcc-patches
We don't have any special treatment of MEMMODEL_SYNC_* values,
so let's hide them behind the memmodel_base() function.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_amo_acquire):
  Ignore MEMMODEL_SYNC_* values.
* config/riscv/riscv.c (riscv_memmodel_needs_release_fence):
  Likewise.
* config/riscv/riscv.c (riscv_print_operand): Eliminate
  MEMMODEL_SYNC_* values by calling memmodel_base().
---
 gcc/config/riscv/riscv.cc | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f83dc796d88..1a130f1fe3b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3578,20 +3578,17 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
acquire portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_amo_acquire (enum memmodel model)
+riscv_memmodel_needs_amo_acquire (const enum memmodel model)
 {
   switch (model)
 {
   case MEMMODEL_ACQ_REL:
   case MEMMODEL_SEQ_CST:
-  case MEMMODEL_SYNC_SEQ_CST:
   case MEMMODEL_ACQUIRE:
   case MEMMODEL_CONSUME:
-  case MEMMODEL_SYNC_ACQUIRE:
return true;
 
   case MEMMODEL_RELEASE:
-  case MEMMODEL_SYNC_RELEASE:
   case MEMMODEL_RELAXED:
return false;
 
@@ -3604,20 +3601,17 @@ riscv_memmodel_needs_amo_acquire (enum memmodel model)
implement the release portion of memory model MODEL.  */
 
 static bool
-riscv_memmodel_needs_release_fence (enum memmodel model)
+riscv_memmodel_needs_release_fence (const enum memmodel model)
 {
   switch (model)
 {
   case MEMMODEL_ACQ_REL:
   case MEMMODEL_SEQ_CST:
-  case MEMMODEL_SYNC_SEQ_CST:
   case MEMMODEL_RELEASE:
-  case MEMMODEL_SYNC_RELEASE:
return true;
 
   case MEMMODEL_ACQUIRE:
   case MEMMODEL_CONSUME:
-  case MEMMODEL_SYNC_ACQUIRE:
   case MEMMODEL_RELAXED:
return false;
 
@@ -3644,6 +3638,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
 {
   machine_mode mode = GET_MODE (op);
   enum rtx_code code = GET_CODE (op);
+  const enum memmodel model = memmodel_base (INTVAL (op));
 
   switch (letter)
 {
@@ -3663,12 +3658,12 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_amo_acquire (model))
fputs (".aq", file);
   break;
 
 case 'F':
-  if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+  if (riscv_memmodel_needs_release_fence (model))
fputs ("fence iorw,ow; ", file);
   break;
 
-- 
2.35.3



[PATCH v3 2/9] RISC-V: Emit proper memory ordering suffixes for AMOs [PR 100265]

2022-05-26 Thread Christoph Muellner via Gcc-patches
The ratified A extension supports '.aq', '.rl' and '.aqrl' as
memory ordering suffixes. Let's emit them in case we get a '%A'
conversion specifier for riscv_print_operand().

As '%A' was already used for a similar, but restricted, purpose
(only '.aq' was emitted so far), this does not require any other
changes.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_amo_acquire):
  Remove function.
* config/riscv/riscv.c (riscv_print_amo_memory_ordering_suffix):
  Add function to emit AMO memory ordering suffixes.
* config/riscv/riscv.c (riscv_print_operand): Call
  riscv_print_amo_memory_ordering_suffix() instead of
  riscv_memmodel_needs_amo_acquire().
---
 gcc/config/riscv/riscv.cc | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1a130f1fe3b..983a567c69c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3574,24 +3574,26 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
   fputc (')', file);
 }
 
-/* Return true if the .AQ suffix should be added to an AMO to implement the
-   acquire portion of memory model MODEL.  */
+/* Print the memory ordering suffix for AMOs.  */
 
-static bool
-riscv_memmodel_needs_amo_acquire (const enum memmodel model)
+static void
+riscv_print_amo_memory_ordering_suffix (FILE *file, const enum memmodel model)
 {
   switch (model)
 {
-  case MEMMODEL_ACQ_REL:
-  case MEMMODEL_SEQ_CST:
-  case MEMMODEL_ACQUIRE:
+  case MEMMODEL_RELAXED:
+   break;
   case MEMMODEL_CONSUME:
-   return true;
-
+  case MEMMODEL_ACQUIRE:
+   fputs (".aq", file);
+   break;
   case MEMMODEL_RELEASE:
-  case MEMMODEL_RELAXED:
-   return false;
-
+   fputs (".rl", file);
+   break;
+  case MEMMODEL_ACQ_REL:
+  case MEMMODEL_SEQ_CST:
+   fputs (".aqrl", file);
+   break;
   default:
gcc_unreachable ();
 }
@@ -3658,8 +3660,7 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   break;
 
 case 'A':
-  if (riscv_memmodel_needs_amo_acquire (model))
-   fputs (".aq", file);
+  riscv_print_amo_memory_ordering_suffix (file, model);
   break;
 
 case 'F':
-- 
2.35.3



[PATCH v3 3/9] RISC-V: Eliminate %F specifier from riscv_print_operand() [PR 100265]

2022-05-26 Thread Christoph Muellner via Gcc-patches
A previous patch took care, that the proper memory ordering suffixes
for AMOs are emitted. Therefore there is no reason to keep the fence
generation mechanism for release operations.

gcc/
PR 100265
* config/riscv/riscv.c (riscv_memmodel_needs_release_fence):
  Remove function.
* config/riscv/riscv.c (riscv_print_operand): Remove
  %F format specifier.
* config/riscv/sync.md: Remove %F format specifier uses.
---
 gcc/config/riscv/riscv.cc | 29 -
 gcc/config/riscv/sync.md  | 16 
 2 files changed, 8 insertions(+), 37 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 983a567c69c..5bb22044ce9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3599,29 +3599,6 @@ riscv_print_amo_memory_ordering_suffix (FILE *file, 
const enum memmodel model)
 }
 }
 
-/* Return true if a FENCE should be emitted to before a memory access to
-   implement the release portion of memory model MODEL.  */
-
-static bool
-riscv_memmodel_needs_release_fence (const enum memmodel model)
-{
-  switch (model)
-{
-  case MEMMODEL_ACQ_REL:
-  case MEMMODEL_SEQ_CST:
-  case MEMMODEL_RELEASE:
-   return true;
-
-  case MEMMODEL_ACQUIRE:
-  case MEMMODEL_CONSUME:
-  case MEMMODEL_RELAXED:
-   return false;
-
-  default:
-   gcc_unreachable ();
-}
-}
-
 /* Implement TARGET_PRINT_OPERAND.  The RISCV-specific operand codes are:
 
'h' Print the high-part relocation associated with OP, after stripping
@@ -3629,7 +3606,6 @@ riscv_memmodel_needs_release_fence (const enum memmodel 
model)
'R' Print the low-part relocation associated with OP.
'C' Print the integer branch condition for comparison OP.
'A' Print the atomic operation suffix for memory model OP.
-   'F' Print a FENCE if the memory model requires a release.
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.
'S' Print shift-index of single-bit mask OP.
@@ -3663,11 +3639,6 @@ riscv_print_operand (FILE *file, rtx op, int letter)
   riscv_print_amo_memory_ordering_suffix (file, model);
   break;
 
-case 'F':
-  if (riscv_memmodel_needs_release_fence (model))
-   fputs ("fence iorw,ow; ", file);
-  break;
-
 case 'i':
   if (code != REG)
 fputs ("i", file);
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 86b41e6b00a..ddaeda0116d 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -65,7 +65,7 @@ (define_insn "atomic_store"
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_ATOMIC_STORE))]
   "TARGET_ATOMIC"
-  "%F2amoswap.%A2 zero,%z1,%0"
+  "amoswap.%A2 zero,%z1,%0"
   [(set (attr "length") (const_int 8))])
 
 (define_insn "atomic_"
@@ -76,8 +76,8 @@ (define_insn "atomic_"
   (match_operand:SI 2 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F2amo.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A2 zero,%z1,%0"
+)
 
 (define_insn "atomic_fetch_"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -89,8 +89,8 @@ (define_insn "atomic_fetch_"
   (match_operand:SI 3 "const_int_operand")] ;; model
 UNSPEC_SYNC_OLD_OP))]
   "TARGET_ATOMIC"
-  "%F3amo.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amo.%A3 %0,%z2,%1"
+)
 
 (define_insn "atomic_exchange"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -101,8 +101,8 @@ (define_insn "atomic_exchange"
(set (match_dup 1)
(match_operand:GPR 2 "register_operand" "0"))]
   "TARGET_ATOMIC"
-  "%F3amoswap.%A3 %0,%z2,%1"
-  [(set (attr "length") (const_int 8))])
+  "amoswap.%A3 %0,%z2,%1"
+)
 
 (define_insn "atomic_cas_value_strong"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -115,7 +115,7 @@ (define_insn "atomic_cas_value_strong"
 UNSPEC_COMPARE_AND_SWAP))
(clobber (match_scratch:GPR 6 "=&r"))]
   "TARGET_ATOMIC"
-  "%F5 1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 
1:"
+  "1: lr.%A5 %0,%1; bne %0,%z2,1f; sc.%A4 %6,%z3,%1; bnez %6,1b; 1:"
   [(set (attr "length") (const_int 20))])
 
 (define_expand "atomic_compare_and_swap"
-- 
2.35.3



[PATCH v3 4/9] RISC-V: Use STORE instead of AMOSWAP for atomic stores [PR 100265]

2022-05-26 Thread Christoph Muellner via Gcc-patches
Using AMOSWAP as atomic store does not allow us to do sub-word accesses.
Further, it is not consistent with our atomic_load () implementation.
The benefit of AMOSWAP is that the resulting code sequence will be
smaller (comapred to FENCE+STORE), however, this does not weight
out for the lack of sub-word accesses.
Additionally, HW implementors have claimed that an optimal
implementation AMOSWAP is slightly more expensive than FENCE+STORE.
So let's use STORE instead of AMOSWAP.

gcc/
PR 100265
* config/riscv/sync.md (atomic_store):
  Remove.
---
 gcc/config/riscv/sync.md | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index ddaeda0116d..86f4cef6af9 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -57,17 +57,6 @@ (define_insn "mem_thread_fence_1"
 
 ;; Atomic memory operations.
 
-;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
-(define_insn "atomic_store"
-  [(set (match_operand:GPR 0 "memory_operand" "=A")
-(unspec_volatile:GPR
-  [(match_operand:GPR 1 "reg_or_0_operand" "rJ")
-   (match_operand:SI 2 "const_int_operand")]  ;; model
-  UNSPEC_ATOMIC_STORE))]
-  "TARGET_ATOMIC"
-  "amoswap.%A2 zero,%z1,%0"
-  [(set (attr "length") (const_int 8))])
-
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.35.3



[PATCH v3 5/9] RISC-V: Emit fences according to chosen memory model [PR 100265]

2022-05-26 Thread Christoph Muellner via Gcc-patches
mem_thread_fence gets the desired memory model as operand.
Let's emit fences according to this value (as defined in section
"Code Porting and Mapping Guidelines" of the unpriv spec).

gcc/
PR 100265
* config/riscv/sync.md (mem_thread_fence):
  Emit fences according to given operand.
* config/riscv/sync.md (mem_fence):
  Add INSNs for different fence flavours.
* config/riscv/sync.md (mem_thread_fence_1):
  Remove.
---
 gcc/config/riscv/sync.md | 41 +++-
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 86f4cef6af9..ae80f94f2e0 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -34,26 +34,41 @@ (define_code_attr atomic_optab
 ;; Memory barriers.
 
 (define_expand "mem_thread_fence"
-  [(match_operand:SI 0 "const_int_operand" "")] ;; model
+  [(match_operand:SI 0 "const_int_operand")] ;; model
   ""
 {
-  if (INTVAL (operands[0]) != MEMMODEL_RELAXED)
-{
-  rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (mem) = 1;
-  emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
-}
+  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
+  if (!(is_mm_relaxed (model)))
+  emit_insn (gen_mem_fence (operands[0]));
   DONE;
 })
 
-;; Until the RISC-V memory model (hence its mapping from C++) is finalized,
-;; conservatively emit a full FENCE.
-(define_insn "mem_thread_fence_1"
+(define_expand "mem_fence"
+  [(set (match_dup 1)
+   (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")]
+   UNSPEC_MEMORY_BARRIER))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[1]) = 1;
+})
+
+(define_insn "*mem_fence"
   [(set (match_operand:BLK 0 "" "")
-   (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
-   (match_operand:SI 1 "const_int_operand" "")] ;; model
+   (unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")]
+   UNSPEC_MEMORY_BARRIER))]
   ""
-  "fence\tiorw,iorw")
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[1]));
+  if (is_mm_consume (model) || is_mm_acquire (model))
+return "fence\tr, rw";
+  else if (is_mm_release (model))
+return "fence\trw, w";
+  else if (is_mm_acq_rel (model))
+return "fence.tso";
+  else
+return "fence\trw, rw";
+})
 
 ;; Atomic memory operations.
 
-- 
2.35.3



[PATCH v3 6/9] RISC-V: Implement atomic_{load,store} [PR 100265]

2022-05-26 Thread Christoph Muellner via Gcc-patches
A recent commit introduced a mechanism to emit proper fences
for RISC-V. Additionally, we already have emit_move_insn ().
Let's reuse this code and provide atomic_load and
atomic_store for RISC-V (as defined in section
"Code Porting and Mapping Guidelines" of the unpriv spec).
Note, that this works also for sub-word atomics.

gcc/
PR 100265
* config/riscv/sync.md (atomic_load): New.
* config/riscv/sync.md (atomic_store): New.
---
 gcc/config/riscv/sync.md | 41 
 1 file changed, 41 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index ae80f94f2e0..9eb0dde9086 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -23,6 +23,7 @@ (define_c_enum "unspec" [
   UNSPEC_COMPARE_AND_SWAP
   UNSPEC_SYNC_OLD_OP
   UNSPEC_SYNC_EXCHANGE
+  UNSPEC_ATOMIC_LOAD
   UNSPEC_ATOMIC_STORE
   UNSPEC_MEMORY_BARRIER
 ])
@@ -72,6 +73,46 @@ (define_insn "*mem_fence"
 
 ;; Atomic memory operations.
 
+(define_expand "atomic_load"
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+(unspec_volatile:ANYI
+  [(match_operand:ANYI 1 "memory_operand" "A")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_ATOMIC_LOAD))]
+  ""
+  {
+rtx target = operands[0];
+rtx mem = operands[1];
+enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+if (is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_SEQ_CST)));
+emit_move_insn (target, mem);
+if (is_mm_acquire (model) || is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_ACQUIRE)));
+
+DONE;
+})
+
+(define_expand "atomic_store"
+  [(set (match_operand:ANYI 0 "memory_operand" "=A")
+(unspec_volatile:ANYI
+  [(match_operand:ANYI 1 "reg_or_0_operand" "rJ")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_ATOMIC_STORE))]
+  ""
+  {
+rtx mem = operands[0];
+rtx val = operands[1];
+enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+
+if (is_mm_release (model) || is_mm_seq_cst (model))
+  emit_insn (gen_mem_fence (GEN_INT (MEMMODEL_RELEASE)));
+emit_move_insn (mem, val);
+
+DONE;
+})
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.35.3



[PATCH v3 7/9] RISC-V: Model INSNs for LR and SC [PR 100266]

2022-05-26 Thread Christoph Muellner via Gcc-patches
In order to emit LR/SC sequences, let's provide INSNs, which
take care of memory ordering constraints.

gcc/
PR 100266
* config/rsicv/sync.md (UNSPEC_LOAD_RESERVED): New.
* config/rsicv/sync.md (UNSPEC_STORE_CONDITIONAL): New.
* config/riscv/sync.md (riscv_load_reserved): New.
* config/riscv/sync.md (riscv_store_conditional): New.
---
 gcc/config/riscv/sync.md | 24 
 1 file changed, 24 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 9eb0dde9086..3494683947e 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -26,6 +26,8 @@ (define_c_enum "unspec" [
   UNSPEC_ATOMIC_LOAD
   UNSPEC_ATOMIC_STORE
   UNSPEC_MEMORY_BARRIER
+  UNSPEC_LOAD_RESERVED
+  UNSPEC_STORE_CONDITIONAL
 ])
 
 (define_code_iterator any_atomic [plus ior xor and])
@@ -113,6 +115,28 @@ (define_expand "atomic_store"
 DONE;
 })
 
+(define_insn "@riscv_load_reserved"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(unspec_volatile:GPR
+  [(match_operand:GPR 1 "memory_operand" "A")
+   (match_operand:SI 2 "const_int_operand")]  ;; model
+  UNSPEC_LOAD_RESERVED))]
+  "TARGET_ATOMIC"
+  "lr.%A2 %0, %1"
+)
+
+(define_insn "@riscv_store_conditional"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+(unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
+   (set (match_operand:GPR 1 "memory_operand" "=A")
+(unspec_volatile:GPR
+  [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")]  ;; model
+  UNSPEC_STORE_CONDITIONAL))]
+  "TARGET_ATOMIC"
+  "sc.%A3 %0, %z2, %1"
+)
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.35.3



[PATCH v3 8/9] RISC-V: Add s.ext-consuming INSNs for LR and SC [PR 100266]

2022-05-26 Thread Christoph Muellner via Gcc-patches
The current model of the LR and SC INSNs requires a sign-extension
to use the generated SImode value for conditional branches, which
only operate on XLEN registers.
However, the sign-extension is actually not required in both cases,
therefore this patch introduces additional INSNs that consume
the sign-extension.

Rationale:
The sign-extension of the loaded value of a LR.W is specified as
sign-extended. Therefore, a sign-extension is not required.
The sign-extension of the success value a SC.W is specified as
non-zero. As sign-extended non-zero value remains non-zero,
therefore the sign-extension is not required.

gcc/
PR 100266
* config/riscv/sync.md (riscv_load_reserved): New.
* config/riscv/sync.md (riscv_store_conditional): New.
---
 gcc/config/riscv/sync.md | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 3494683947e..66548bf891b 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -125,6 +125,21 @@ (define_insn "@riscv_load_reserved"
   "lr.%A2 %0, %1"
 )
 
+;; This pattern allows to consume a sign-extension of the loaded value.
+;; This is legal, because the specification of LR.W defines the loaded
+;; value to be sign-extended.
+
+(define_insn "riscv_load_reserved"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+(sign_extend:DI
+  (unspec_volatile:SI
+   [(match_operand:SI 1 "memory_operand" "A")
+(match_operand:SI 2 "const_int_operand")]  ;; model
+   UNSPEC_LOAD_RESERVED)))]
+  "TARGET_ATOMIC && TARGET_64BIT"
+  "lr.w%A2 %0, %1"
+)
+
 (define_insn "@riscv_store_conditional"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 (unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
@@ -137,6 +152,25 @@ (define_insn "@riscv_store_conditional"
   "sc.%A3 %0, %z2, %1"
 )
 
+;; This pattern allows to consume a sign-extension of the success
+;; value of SC.W, which can then be used for instructions which
+;; require values of XLEN-size (e.g. conditional branches).
+;; This is legal, because any non-zero value remains non-zero
+;; after sign-extension.
+
+(define_insn "riscv_store_conditional"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+(sign_extend:DI
+  (unspec_volatile:SI [(const_int 0)] UNSPEC_STORE_CONDITIONAL)))
+   (set (match_operand:SI 1 "memory_operand" "=A")
+(unspec_volatile:SI
+  [(match_operand:SI 2 "reg_or_0_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")]  ;; model
+  UNSPEC_STORE_CONDITIONAL))]
+  "TARGET_ATOMIC && TARGET_64BIT"
+  "sc.w%A3 %0, %z2, %1"
+)
+
 (define_insn "atomic_"
   [(set (match_operand:GPR 0 "memory_operand" "+A")
(unspec_volatile:GPR
-- 
2.35.3



[PATCH v3 9/9] RISC-V: Introduce predicate "riscv_sync_memory_operand" [PR 100266]

2022-05-26 Thread Christoph Muellner via Gcc-patches
Atomic instructions require zero-offset memory addresses.
If we allow all addresses, the nonzero-offset addresses will
be prepared in an extra register in an extra instruction before
the actual atomic instruction.

This patch introduces the predicate "riscv_sync_memory_operand",
which restricts the memory operand to be suitable for atomic
instructions.

gcc/
PR 100266
* config/riscv/sync.md (riscv_sync_memory_operand): New.
* config/riscv/sync.md (riscv_load_reserved): Use new predicate.
* config/riscv/sync.md (riscv_store_conditional): Likewise.
* config/riscv/sync.md (atomic_): Likewise.
* config/riscv/sync.md (atomic_fetch_): Likewise.
* config/riscv/sync.md (atomic_exchange): Likewise.
* config/riscv/sync.md (atomic_compare_and_swap): Likewise.
---
 gcc/config/riscv/sync.md | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 66548bf891b..8f184d8bbb4 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -30,6 +30,10 @@ (define_c_enum "unspec" [
   UNSPEC_STORE_CONDITIONAL
 ])
 
+(define_predicate "riscv_sync_memory_operand"
+  (and (match_operand 0 "memory_operand")
+   (match_code "reg" "0")))
+
 (define_code_iterator any_atomic [plus ior xor and])
 (define_code_attr atomic_optab
   [(plus "add") (ior "or") (xor "xor") (and "and")])
@@ -118,7 +122,7 @@ (define_expand "atomic_store"
 (define_insn "@riscv_load_reserved"
   [(set (match_operand:GPR 0 "register_operand" "=r")
 (unspec_volatile:GPR
-  [(match_operand:GPR 1 "memory_operand" "A")
+  [(match_operand:GPR 1 "riscv_sync_memory_operand" "A")
(match_operand:SI 2 "const_int_operand")]  ;; model
   UNSPEC_LOAD_RESERVED))]
   "TARGET_ATOMIC"
@@ -133,7 +137,7 @@ (define_insn "riscv_load_reserved"
   [(set (match_operand:DI 0 "register_operand" "=r")
 (sign_extend:DI
   (unspec_volatile:SI
-   [(match_operand:SI 1 "memory_operand" "A")
+   [(match_operand:SI 1 "riscv_sync_memory_operand" "A")
 (match_operand:SI 2 "const_int_operand")]  ;; model
UNSPEC_LOAD_RESERVED)))]
   "TARGET_ATOMIC && TARGET_64BIT"
@@ -143,7 +147,7 @@ (define_insn "riscv_load_reserved"
 (define_insn "@riscv_store_conditional"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 (unspec_volatile:GPR [(const_int 0)] UNSPEC_STORE_CONDITIONAL))
-   (set (match_operand:GPR 1 "memory_operand" "=A")
+   (set (match_operand:GPR 1 "riscv_sync_memory_operand" "=A")
 (unspec_volatile:GPR
   [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
(match_operand:SI 3 "const_int_operand")]  ;; model
@@ -162,7 +166,7 @@ (define_insn "riscv_store_conditional"
   [(set (match_operand:DI 0 "register_operand" "=&r")
 (sign_extend:DI
   (unspec_volatile:SI [(const_int 0)] UNSPEC_STORE_CONDITIONAL)))
-   (set (match_operand:SI 1 "memory_operand" "=A")
+   (set (match_operand:SI 1 "riscv_sync_memory_operand" "=A")
 (unspec_volatile:SI
   [(match_operand:SI 2 "reg_or_0_operand" "rJ")
(match_operand:SI 3 "const_int_operand")]  ;; model
@@ -172,7 +176,7 @@ (define_insn "riscv_store_conditional"
 )
 
 (define_insn "atomic_"
-  [(set (match_operand:GPR 0 "memory_operand" "+A")
+  [(set (match_operand:GPR 0 "riscv_sync_memory_operand" "+A")
(unspec_volatile:GPR
  [(any_atomic:GPR (match_dup 0)
 (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
@@ -184,7 +188,7 @@ (define_insn "atomic_"
 
 (define_insn "atomic_fetch_"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
-   (match_operand:GPR 1 "memory_operand" "+A"))
+   (match_operand:GPR 1 "riscv_sync_memory_operand" "+A"))
(set (match_dup 1)
(unspec_volatile:GPR
  [(any_atomic:GPR (match_dup 1)
@@ -198,7 +202,7 @@ (define_insn "atomic_fetch_"
 (define_insn "atomic_exchange"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
(unspec_volatile:GPR
- [(match_operand:GPR 1 "memory_operand" "+A")
+ [(match_operand:GPR 1 "riscv_sync_memory_operand" "+A")
   (match_operand:SI 3 "const_int_operand")] ;; model
  UNSPEC_SYNC_EXCHANGE))
(set (match_dup 1)
@@ -222,14 +226,14 @@ (define_insn "atomic_cas_value_strong"
   [(set (attr "length") (const_int 20))])
 
 (define_expand "atomic_compare_and_swap"
-  [(match_operand:SI 0 "register_operand" "")   ;; bool output
-   (match_operand:GPR 1 "register_operand" "")  ;; val output
-   (match_operand:GPR 2 "memory_operand" "");; memory
-   (match_operand:GPR 3 "reg_or_0_operand" "")  ;; expected value
-   (match_operand:GPR 4 "reg_or_0_operand" "")  ;; desired value
-   (match_operand:SI 5 "const_int_operand" "")  ;; is_weak
-   (match_operand:SI 6 "const_int_operand" "")  ;; mod_s
-   (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+  [(match_operand:SI 0 "register_operand" "")   ;; bool o

[RFC PATCH] RISC-V: Add Zawrs ISA extension support

2022-06-01 Thread Christoph Muellner via Gcc-patches
This patch adds support for the Zawrs ISA extension.
The patch depends on the corresponding Binutils patch
to be usable (see [1])

The specification can be found here:
https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc

Note, that the Zawrs extension is not frozen or ratified yet.
Therefore this patch is an RFC and not intended to get merged.

[1] https://sourceware.org/pipermail/binutils/2022-April/120559.html

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Add zawrs extension.
* config/riscv/riscv-opts.h (MASK_ZAWRS): New.
(TARGET_ZAWRS): New.
* config/riscv/riscv.opt: New.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zawrs.c: New test.

Signed-off-by: Christoph Muellner 
---
 gcc/common/config/riscv/riscv-common.cc |  4 
 gcc/config/riscv/riscv-opts.h   |  3 +++
 gcc/config/riscv/riscv.opt  |  3 +++
 gcc/testsuite/gcc.target/riscv/zawrs.c  | 13 +
 4 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zawrs.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0e5be2ce105..7dc5a64006a 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -149,6 +149,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zifencei", ISA_SPEC_CLASS_20191213, 2, 0},
   {"zifencei", ISA_SPEC_CLASS_20190608, 2, 0},
 
+  {"zawrs", ISA_SPEC_CLASS_NONE, 1, 0},
+
   {"zba", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbb", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbc", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1098,6 +1100,8 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zicsr",&gcc_options::x_riscv_zi_subext, MASK_ZICSR},
   {"zifencei", &gcc_options::x_riscv_zi_subext, MASK_ZIFENCEI},
 
+  {"zawrs", &gcc_options::x_riscv_za_subext, MASK_ZAWRS},
+
   {"zba",&gcc_options::x_riscv_zb_subext, MASK_ZBA},
   {"zbb",&gcc_options::x_riscv_zb_subext, MASK_ZBB},
   {"zbc",&gcc_options::x_riscv_zb_subext, MASK_ZBC},
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 1e153b3a6e7..ced086b955d 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -73,6 +73,9 @@ enum stack_protector_guard {
 #define TARGET_ZICSR((riscv_zi_subext & MASK_ZICSR) != 0)
 #define TARGET_ZIFENCEI ((riscv_zi_subext & MASK_ZIFENCEI) != 0)
 
+#define MASK_ZAWRS   (1 << 0)
+#define TARGET_ZAWRS ((riscv_za_subext & MASK_ZAWRS) != 0)
+
 #define MASK_ZBA  (1 << 0)
 #define MASK_ZBB  (1 << 1)
 #define MASK_ZBC  (1 << 2)
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 9e9fe6d8ccd..f01850e3b19 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -197,6 +197,9 @@ long riscv_stack_protector_guard_offset = 0
 TargetVariable
 int riscv_zi_subext
 
+TargetVariable
+int riscv_za_subext
+
 TargetVariable
 int riscv_zb_subext
 
diff --git a/gcc/testsuite/gcc.target/riscv/zawrs.c 
b/gcc/testsuite/gcc.target/riscv/zawrs.c
new file mode 100644
index 000..0b7e2662343
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zawrs.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zawrs" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc_zawrs" { target { rv32 } } } */
+
+#ifndef __riscv_zawrs
+#error Feature macro not defined
+#endif
+
+int
+foo (int a)
+{
+  return a;
+}
-- 
2.35.3