gcc/ChangeLog * config/riscv/riscv.md (movmem<mode>): Use riscv_vector::expand_block_move, if and only if we know the entire operation can be performed using one vector load followed by one vector store
gcc/testsuite/ChangeLog PR target/112109 * gcc.target/riscv/rvv/base/movmem-1.c: New test --- gcc/config/riscv/riscv.md | 22 +++++++ .../gcc.target/riscv/rvv/base/movmem-1.c | 60 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index ee8b71c22aa..1b3f66fd15c 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -2365,6 +2365,28 @@ FAIL; }) +;; Inlining general memmove is a pessimisation: we can't avoid having to decide +;; which direction to go at runtime, which is costly in instruction count +;; however for situations where the entire move fits in one vector operation +;; we can do all reads before doing any writes so we don't have to worry +;; so generate the inline vector code in such situations +;; nb. prefer scalar path for tiny memmoves. +(define_expand "movmem<mode>" + [(parallel [(set (match_operand:BLK 0 "general_operand") + (match_operand:BLK 1 "general_operand")) + (use (match_operand:P 2 "const_int_operand")) + (use (match_operand:SI 3 "const_int_operand"))])] + "TARGET_VECTOR" +{ + if ((INTVAL (operands[2]) >= TARGET_MIN_VLEN/8) + && (INTVAL (operands[2]) <= TARGET_MIN_VLEN) + && riscv_vector::expand_block_move (operands[0], operands[1], + operands[2])) + DONE; + else + FAIL; +}) + ;; Expand in-line code to clear the instruction cache between operand[0] and ;; operand[1]. (define_expand "clear_cache" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c new file mode 100644 index 00000000000..0ecc3f7e3b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/movmem-1.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-O3 --param=riscv-autovec-lmul=dynamic" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define MIN_VECTOR_BYTES (__riscv_v_min_vlen / 8) + +/* Tiny memmoves should not be vectorised. +** f1: +** li\s+a2,\d+ +** tail\s+memmove +*/ +char * +f1 (char *a, char const *b) +{ + return __builtin_memmove (a, b, MIN_VECTOR_BYTES - 1); +} + +/* Vectorise+inline minimum vector register width with LMUL=1 +** f2: +** ( +** vsetivli\s+zero,16,e8,m1,ta,ma +** | +** li\s+[ta][0-7],\d+ +** vsetvli\s+zero,[ta][0-7],e8,m1,ta,ma +** ) +** vle8\.v\s+v\d+,0\(a1\) +** vse8\.v\s+v\d+,0\(a0\) +** ret +*/ +char * +f2 (char *a, char const *b) +{ + return __builtin_memmove (a, b, MIN_VECTOR_BYTES); +} + +/* Vectorise+inline up to LMUL=8 +** f3: +** li\s+[ta][0-7],\d+ +** vsetvli\s+zero,[ta][0-7],e8,m8,ta,ma +** vle8\.v\s+v\d+,0\(a1\) +** vse8\.v\s+v\d+,0\(a0\) +** ret +*/ +char * +f3 (char *a, char const *b) +{ + return __builtin_memmove (a, b, MIN_VECTOR_BYTES * 8); +} + +/* Don't vectorise if the move is too large for one operation +** f4: +** li\s+a2,\d+ +** tail\s+memmove +*/ +char * +f4 (char *a, char const *b) +{ + return __builtin_memmove (a, b, MIN_VECTOR_BYTES * 8 + 1); +} -- 2.34.1