The RISC-V cpymemsi expansion is called, whenever the by-pieces infrastructure will not take care of the builtin expansion. The code emitted by the by-pieces infrastructure may emits code, that includes unaligned accesses if riscv_slow_unaligned_access_p is false.
The RISC-V cpymemsi expansion is handled via riscv_expand_block_move(). The current implementation of this function does not check riscv_slow_unaligned_access_p and never emits unaligned accesses. Since by-pieces emits unaligned accesses, it is reasonable to implement the same behaviour in the cpymemsi expansion. And that's what this patch is doing. The patch checks riscv_slow_unaligned_access_p at the entry and sets the allowed alignment accordingly. This alignment is then propagated down to the routines that emit the actual instructions. The changes introduced by this patch can be seen in the adjustments of the cpymem tests. gcc/ChangeLog: * config/riscv/riscv-string.cc (riscv_block_move_straight): Add parameter align. (riscv_adjust_block_mem): Replace parameter length by align. (riscv_block_move_loop): Add parameter align. (riscv_expand_block_move_scalar): Set alignment properly if the target has fast unaligned access. gcc/testsuite/ChangeLog: * gcc.target/riscv/cpymem-32-ooo.c: Adjust for unaligned access. * gcc.target/riscv/cpymem-64-ooo.c: Likewise. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> --- gcc/config/riscv/riscv-string.cc | 53 +++++++++++-------- .../gcc.target/riscv/cpymem-32-ooo.c | 20 +++++-- .../gcc.target/riscv/cpymem-64-ooo.c | 14 ++++- 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index b09b51d7526..8fc0877772f 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -610,11 +610,13 @@ riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align) return false; } -/* Emit straight-line code to move LENGTH bytes from SRC to DEST. +/* Emit straight-line code to move LENGTH bytes from SRC to DEST + with accesses that are ALIGN bytes aligned. Assume that the areas do not overlap. */ static void -riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) +riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT align) { unsigned HOST_WIDE_INT offset, delta; unsigned HOST_WIDE_INT bits; @@ -622,8 +624,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) enum machine_mode mode; rtx *regs; - bits = MAX (BITS_PER_UNIT, - MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); + bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align)); mode = mode_for_size (bits, MODE_INT, 0).require (); delta = bits / BITS_PER_UNIT; @@ -648,21 +649,20 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) { src = adjust_address (src, BLKmode, offset); dest = adjust_address (dest, BLKmode, offset); - move_by_pieces (dest, src, length - offset, - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); + move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN); } } /* Helper function for doing a loop-based block operation on memory - reference MEM. Each iteration of the loop will operate on LENGTH - bytes of MEM. + reference MEM. Create a new base register for use within the loop and point it to the start of MEM. Create a new memory reference that uses this - register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ + register and has an alignment of ALIGN. Store them in *LOOP_REG + and *LOOP_MEM respectively. */ static void -riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, +riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align, rtx *loop_reg, rtx *loop_mem) { *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); @@ -670,15 +670,17 @@ riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, /* Although the new mem does not refer to a known location, it does keep up to LENGTH bytes of alignment. */ *loop_mem = change_address (mem, BLKmode, *loop_reg); - set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); + set_mem_align (*loop_mem, align); } /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER - bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that - the memory regions do not overlap. */ + bytes at a time. LENGTH must be at least BYTES_PER_ITER. The alignment + of the access can be set by ALIGN. Assume that the memory regions do not + overlap. */ static void riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT align, unsigned HOST_WIDE_INT bytes_per_iter) { rtx label, src_reg, dest_reg, final_src, test; @@ -688,8 +690,8 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, length -= leftover; /* Create registers and memory references for use within the loop. */ - riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); - riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); + riscv_adjust_block_mem (src, align, &src_reg, &src); + riscv_adjust_block_mem (dest, align, &dest_reg, &dest); /* Calculate the value that SRC_REG should have after the last iteration of the loop. */ @@ -701,7 +703,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, emit_label (label); /* Emit the loop body. */ - riscv_block_move_straight (dest, src, bytes_per_iter); + riscv_block_move_straight (dest, src, bytes_per_iter, align); /* Move on to the next block. */ riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); @@ -713,7 +715,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, /* Mop up any left-over bytes. */ if (leftover) - riscv_block_move_straight (dest, src, leftover); + riscv_block_move_straight (dest, src, leftover, align); else emit_insn(gen_nop ()); } @@ -730,8 +732,16 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length) unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); unsigned HOST_WIDE_INT factor, align; - align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); - factor = BITS_PER_WORD / align; + if (riscv_slow_unaligned_access_p) + { + align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); + factor = BITS_PER_WORD / align; + } + else + { + align = hwi_length * BITS_PER_UNIT; + factor = 1; + } if (optimize_function_for_size_p (cfun) && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false)) @@ -739,7 +749,7 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length) if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)) { - riscv_block_move_straight (dest, src, INTVAL (length)); + riscv_block_move_straight (dest, src, hwi_length, align); return true; } else if (optimize && align >= BITS_PER_WORD) @@ -759,7 +769,8 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length) iter_words = i; } - riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD); + riscv_block_move_loop (dest, src, bytes, align, + iter_words * UNITS_PER_WORD); return true; } diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c index 33fb9891d82..946a773f77a 100644 --- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c +++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c @@ -64,12 +64,12 @@ COPY_ALIGNED_N(8) /* **copy_11: ** ... -** lbu\t[at][0-9],0\([at][0-9]\) ** ... -** lbu\t[at][0-9],10\([at][0-9]\) +** lw\t[at][0-9],0\([at][0-9]\) ** ... -** sb\t[at][0-9],0\([at][0-9]\) +** sw\t[at][0-9],0\([at][0-9]\) ** ... +** lbu\t[at][0-9],10\([at][0-9]\) ** sb\t[at][0-9],10\([at][0-9]\) ** ... */ @@ -91,7 +91,12 @@ COPY_ALIGNED_N(11) /* **copy_15: ** ... -** (call|tail)\tmemcpy +** lw\t[at][0-9],0\([at][0-9]\) +** ... +** sw\t[at][0-9],0\([at][0-9]\) +** ... +** lbu\t[at][0-9],14\([at][0-9]\) +** sb\t[at][0-9],14\([at][0-9]\) ** ... */ COPY_N(15) @@ -112,7 +117,12 @@ COPY_ALIGNED_N(15) /* **copy_27: ** ... -** (call|tail)\tmemcpy +** lw\t[at][0-9],20\([at][0-9]\) +** ... +** sw\t[at][0-9],20\([at][0-9]\) +** ... +** lbu\t[at][0-9],26\([at][0-9]\) +** sb\t[at][0-9],26\([at][0-9]\) ** ... */ COPY_N(27) diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c index 8e40e52fa91..08a927b9483 100644 --- a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c +++ b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c @@ -89,7 +89,12 @@ COPY_ALIGNED_N(11) /* **copy_15: ** ... -** (call|tail)\tmemcpy +** ld\t[at][0-9],0\([at][0-9]\) +** ... +** sd\t[at][0-9],0\([at][0-9]\) +** ... +** lbu\t[at][0-9],14\([at][0-9]\) +** sb\t[at][0-9],14\([at][0-9]\) ** ... */ COPY_N(15) @@ -110,7 +115,12 @@ COPY_ALIGNED_N(15) /* **copy_27: ** ... -** (call|tail)\tmemcpy +** ld\t[at][0-9],16\([at][0-9]\) +** ... +** sd\t[at][0-9],16\([at][0-9]\) +** ... +** lbu\t[at][0-9],26\([at][0-9]\) +** sb\t[at][0-9],26\([at][0-9]\) ** ... */ COPY_N(27) -- 2.44.0