The mem-to-mem insn pattern is splitted from reg-to-mem/mem-to-reg/reg-to-reg causes ICE in RA since RA prefer they stay together.
Now, we split mem-to-mem as a pure pre-RA split pattern and only allow define_insn match mem-to-mem VLS move in pre-RA stage (Forbid mem-to-mem move after RA). Tested no difference. Committed. PR target/111566 gcc/ChangeLog: * config/riscv/vector.md (*mov<mode>_mem_to_mem): Only allow mem-to-mem move for VLS modes size <= MAX_BITS_PER_WORD gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/fortran/pr111566.f90: New test. --- gcc/config/riscv/vector.md | 60 ++++++++++--------- .../gcc.target/riscv/rvv/fortran/pr111566.f90 | 31 ++++++++++ 2 files changed, 62 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index d5300a33946..a6dbaa74a10 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1222,48 +1222,50 @@ DONE; }) -(define_insn_and_split "*mov<mode>_mem_to_mem" +;; Some VLS modes (like V2SImode) have size <= a general purpose +;; register width, we optimize such mem-to-mem move into mem-to-mem +;; scalar move. Otherwise, we always force operands[1] into register +;; so that we will never get mem-to-mem move after RA. +(define_split [(set (match_operand:VLS_AVL_IMM 0 "memory_operand") (match_operand:VLS_AVL_IMM 1 "memory_operand"))] - "TARGET_VECTOR && can_create_pseudo_p ()" - "#" - "&& 1" + "TARGET_VECTOR && can_create_pseudo_p () + && GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD" [(const_int 0)] { - if (GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD) - { - /* Opitmize the following case: - - typedef int8_t v2qi __attribute__ ((vector_size (2))); - v2qi v = *(v2qi*)in; - *(v2qi*)out = v; - - We prefer scalar load/store instead of vle.v/vse.v when - the VLS modes size is smaller scalar mode. */ - machine_mode mode; - unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant (); - if (FLOAT_MODE_P (<MODE>mode)) - mode = mode_for_size (size, MODE_FLOAT, 0).require (); - else - mode = mode_for_size (size, MODE_INT, 0).require (); - emit_move_insn (gen_lowpart (mode, operands[0]), - gen_lowpart (mode, operands[1])); - } + /* Opitmize the following case: + + typedef int8_t v2qi __attribute__ ((vector_size (2))); + v2qi v = *(v2qi*)in; + *(v2qi*)out = v; + + We prefer scalar load/store instead of vle.v/vse.v when + the VLS modes size is smaller scalar mode. */ + machine_mode mode; + unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant (); + if (FLOAT_MODE_P (<MODE>mode)) + mode = mode_for_size (size, MODE_FLOAT, 0).require (); else - { - operands[1] = force_reg (<MODE>mode, operands[1]); - emit_move_insn (operands[0], operands[1]); - } + mode = mode_for_size (size, MODE_INT, 0).require (); + emit_move_insn (gen_lowpart (mode, operands[0]), + gen_lowpart (mode, operands[1])); DONE; } - [(set_attr "type" "vmov")] ) +;; We recognize mem-to-mem move in pre-RA stage so that we won't have +;; ICE (unrecognizable insn: (set (mem) (mem))). Then, the previous +;; mem-to-mem split pattern will force operands[1] into a register so +;; that mem-to-mem move will never happen after RA. +;; +;; We don't allow mem-to-mem move in post-RA stage since we +;; don't have an instruction to split mem-to-mem move after RA. (define_insn_and_split "*mov<mode>" [(set (match_operand:VLS_AVL_IMM 0 "reg_or_mem_operand" "=vr, m, vr") (match_operand:VLS_AVL_IMM 1 "reg_or_mem_operand" " m,vr, vr"))] "TARGET_VECTOR - && (register_operand (operands[0], <MODE>mode) + && (can_create_pseudo_p () + || register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" "@ # diff --git a/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 new file mode 100644 index 00000000000..2e30dc9bfaa --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 @@ -0,0 +1,31 @@ +! { dg-do compile } +! { dg-options "-march=rv64gcv -mabi=lp64d -Ofast -fallow-argument-mismatch -fmax-stack-var-size=65536 -S -std=legacy -w" } + +module a + integer,parameter :: SHR_KIND_R8 = selected_real_kind(12) +end module a +module b + use a, c => shr_kind_r8 +contains + subroutine d(cg , km, i1, i2) + real (c) ch(i2,km) + real (c) cg(4,i1:i2,km) + real dc(i2,km) + real(c) ci(i2,km) + real(c) cj(i2,km) + do k=2,ck + do i=i1,0 + cl = ci(i,k) *ci(i,1) / cj(i,k)+ch(i,1) + cm = cg(1,i,k) - min(e,cg(1,i,co)) + dc(i,k) = sign(cm, cl) + enddo + enddo + if ( cq == 0 ) then + do i=i1,i2 + if( cr <= cs ) then + cg= sign( min(ct, cg), cg) + endif + enddo + endif + end subroutine d +end module b -- 2.36.3