pan2...@intel.com writes: > From: Pan Li <pan2...@intel.com> > > After we enabled the labe-combine pass after the mode-switching pass, it > will try to combine below insn patterns into op. Aka: > > (insn 40 5 41 2 (set (reg:SI 11 a1 [151]) > (reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi} > (nil)) > (insn 41 40 7 2 (set (reg:SI 69 frm) > (const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore} > (nil)) > (insn 42 10 11 2 (set (reg:SI 69 frm) > (reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore} > (nil)) > > trying to combine definition of r11 in: > 40: a1:SI=frm:SI > into: > 42: frm:SI=a1:SI > instruction becomes a no-op: > (set (reg:SI 69 frm) > (reg:SI 69 frm)) > original cost = 4 + 4 (weighted: 8.000000), replacement cost = > 2147483647; keeping replacement > rescanning insn with uid = 42. > updating insn 42 in-place > verify found no changes in insn with uid = 42. > deleting insn 40 > > For example we have code as blow: > 9 │ int test_exampe () { > 10 │ test (); > 11 │ > 12 │ size_t vl = 4; > 13 │ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl); > 14 │ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); > 15 │ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); > 16 │ > 17 │ __riscv_vse16_v_f16m1(b, va, vl); > 18 │ > 19 │ return 0; > 20 │ } > > it will be compiled to: > 53 │ main: > 54 │ addi sp,sp,-16 > 55 │ sd ra,8(sp) > 56 │ call initialize > 57 │ lui a6,%hi(b) > 58 │ lui a2,%hi(a) > 59 │ addi a3,a6,%lo(b) > 60 │ addi a2,a2,%lo(a) > 61 │ li a4,4 > 62 │ .L8: > 63 │ fsrmi 2 > 64 │ vsetvli a5,a4,e16,m1,ta,ma > 65 │ vle16.v v1,0(a2) > 66 │ slli a1,a5,1 > 67 │ subw a4,a4,a5 > 68 │ add a2,a2,a1 > 69 │ vfnmadd.vv v1,v1,v1 > >> The fsrm a0 insn is deleted by late-combine << > 70 │ vfmsub.vv v1,v1,v1 > 71 │ vse16.v v1,0(a3) > 72 │ add a3,a3,a1 > 73 │ bgt a4,zero,.L8 > 74 │ lh a4,%lo(b)(a6) > 75 │ li a5,-20480 > 76 │ addi a5,a5,-1382 > 77 │ bne a4,a5,.L14 > 78 │ ld ra,8(sp) > 79 │ li a0,0 > 80 │ addi sp,sp,16 > 81 │ jr ra > > This patch would like to add the FRM register to the global_regs as it > is a cooperatively-managed global register. And then the fsrm insn will > not be eliminated by late-combine. The related spec17 cam4 failure may > also caused by this issue too. > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > > PR target/118103 > > gcc/ChangeLog: > > * config/riscv/riscv.cc (riscv_conditional_register_usage): Add > the FRM as the global_regs. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/pr118103-1.c: New test. > * gcc.target/riscv/rvv/base/pr118103-run-1.c: New test. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/config/riscv/riscv.cc | 4 +- > .../gcc.target/riscv/rvv/base/pr118103-1.c | 27 ++++++++++ > .../riscv/rvv/base/pr118103-run-1.c | 50 +++++++++++++++++++ > 3 files changed, 80 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 5a3a0504177..fe24376e9c5 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void) > call_used_regs[r] = 1; > } > > - if (!TARGET_HARD_FLOAT) > + if (TARGET_HARD_FLOAT) > + global_regs[FRM_REGNUM] = 1; > + else > { > for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) > fixed_regs[regno] = call_used_regs[regno] = 1; > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c > new file mode 100644 > index 00000000000..1afa5d3afb5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c > @@ -0,0 +1,27 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */ > + > +#include "riscv_vector.h" > + > +#define N 4 > +typedef _Float16 float16_t; > +float16_t a[N]; float16_t b[N]; > + > +extern void test (); > + > +int test_exampe () { > + test (); > + > + size_t vl = N; > + vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl); > + va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); > + va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); > + > + __riscv_vse16_v_f16m1(b, va, vl); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */ > +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */ > +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c > new file mode 100644 > index 00000000000..709e1cc34a8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c > @@ -0,0 +1,50 @@ > +/* { dg-do run { target { riscv_zvfh } } } */ > +/* { dg-options "-O3" } */ > + > +#include "riscv_vector.h" > +#define N 4 > +typedef _Float16 float16_t; > +float16_t a[N]; float16_t b[N]; > + > +void initialize () { > + uint16_t tmp_0[N] = {43883, 3213, 238, 275, }; > + > + for (int i = 0; i < N; ++i) > + { > + union { float16_t f16; uint16_t u16; } converter; > + converter.u16 = tmp_0[i]; > + a[i] = converter.f16; > + } > + > + for (int i = 0; i < N; ++i) > + b[i] = 0; > +} > + > +void compute () > +{ > + int avl = N; > + float16_t* ptr_a = a; float16_t* ptr_b = b; > + > + for (size_t vl; avl > 0; avl -= vl) > + { > + vl = __riscv_vsetvl_e16m1(avl); > + vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl); > + va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); > + va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); > + __riscv_vse16_v_f16m1(ptr_b, va, vl); > + ptr_a += vl; ptr_b += vl; > + } > +} > + > +int main () > +{ > + initialize(); > + compute(); > + > + short *tmp = (short *)b;
Don't you need -fno-strict-aliasing in the test then? > + > + if (*tmp != -21862) > + __builtin_abort (); > + > + return 0; > +}