pan2...@intel.com writes:

> From: Pan Li <pan2...@intel.com>
>
> After we enabled the labe-combine pass after the mode-switching pass, it
> will try to combine below insn patterns into op.  Aka:
>
> (insn 40 5 41 2 (set (reg:SI 11 a1 [151])
>   (reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi}
>   (nil))
> (insn 41 40 7 2 (set (reg:SI 69 frm)
>   (const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore}
>   (nil))
> (insn 42 10 11 2 (set (reg:SI 69 frm)
>   (reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore}
>     (nil))
>
> trying to combine definition of r11 in:
> 40: a1:SI=frm:SI
>     into:
> 42: frm:SI=a1:SI
>     instruction becomes a no-op:
> (set (reg:SI 69 frm)
> (reg:SI 69 frm))
> original cost = 4 + 4 (weighted: 8.000000), replacement cost =
> 2147483647; keeping replacement
> rescanning insn with uid = 42.
> updating insn 42 in-place
> verify found no changes in insn with uid = 42.
> deleting insn 40
>
> For example we have code as blow:
>    9   │ int test_exampe () {
>   10   │   test ();
>   11   │
>   12   │   size_t vl = 4;
>   13   │   vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
>   14   │   va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
>   15   │   va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
>   16   │
>   17   │   __riscv_vse16_v_f16m1(b, va, vl);
>   18   │
>   19   │   return 0;
>   20   │ }
>
> it will be compiled to:
>   53   │ main:
>   54   │     addi    sp,sp,-16
>   55   │     sd  ra,8(sp)
>   56   │     call    initialize
>   57   │     lui a6,%hi(b)
>   58   │     lui a2,%hi(a)
>   59   │     addi    a3,a6,%lo(b)
>   60   │     addi    a2,a2,%lo(a)
>   61   │     li  a4,4
>   62   │ .L8:
>   63   │     fsrmi   2
>   64   │     vsetvli a5,a4,e16,m1,ta,ma
>   65   │     vle16.v v1,0(a2)
>   66   │     slli    a1,a5,1
>   67   │     subw    a4,a4,a5
>   68   │     add a2,a2,a1
>   69   │     vfnmadd.vv  v1,v1,v1
>   >> The fsrm a0 insn is deleted by late-combine <<
>   70   │     vfmsub.vv   v1,v1,v1
>   71   │     vse16.v v1,0(a3)
>   72   │     add a3,a3,a1
>   73   │     bgt a4,zero,.L8
>   74   │     lh  a4,%lo(b)(a6)
>   75   │     li  a5,-20480
>   76   │     addi    a5,a5,-1382
>   77   │     bne a4,a5,.L14
>   78   │     ld  ra,8(sp)
>   79   │     li  a0,0
>   80   │     addi    sp,sp,16
>   81   │     jr  ra
>
> This patch would like to add the FRM register to the global_regs as it
> is a cooperatively-managed global register.  And then the fsrm insn will
> not be eliminated by late-combine.  The related spec17 cam4 failure may
> also caused by this issue too.
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
>
>       PR target/118103
>
> gcc/ChangeLog:
>
>       * config/riscv/riscv.cc (riscv_conditional_register_usage): Add
>       the FRM as the global_regs.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/riscv/rvv/base/pr118103-1.c: New test.
>       * gcc.target/riscv/rvv/base/pr118103-run-1.c: New test.
>
> Signed-off-by: Pan Li <pan2...@intel.com>
> ---
>  gcc/config/riscv/riscv.cc                     |  4 +-
>  .../gcc.target/riscv/rvv/base/pr118103-1.c    | 27 ++++++++++
>  .../riscv/rvv/base/pr118103-run-1.c           | 50 +++++++++++++++++++
>  3 files changed, 80 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 5a3a0504177..fe24376e9c5 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void)
>       call_used_regs[r] = 1;
>      }
>  
> -  if (!TARGET_HARD_FLOAT)
> +  if (TARGET_HARD_FLOAT)
> +    global_regs[FRM_REGNUM] = 1;
> +  else
>      {
>        for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
>       fixed_regs[regno] = call_used_regs[regno] = 1;
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
> new file mode 100644
> index 00000000000..1afa5d3afb5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */
> +
> +#include "riscv_vector.h"
> +
> +#define N 4
> +typedef _Float16 float16_t;
> +float16_t a[N]; float16_t b[N];
> +
> +extern void test ();
> +
> +int test_exampe () {
> +  test ();
> +
> +  size_t vl = N;
> +  vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
> +  va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
> +  va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
> +
> +  __riscv_vse16_v_f16m1(b, va, vl);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */
> +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */
> +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
> new file mode 100644
> index 00000000000..709e1cc34a8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do run { target { riscv_zvfh } } } */
> +/* { dg-options "-O3" } */
> +
> +#include "riscv_vector.h"
> +#define N 4
> +typedef _Float16 float16_t;
> +float16_t a[N]; float16_t b[N];
> +
> +void initialize () {
> +  uint16_t tmp_0[N] = {43883, 3213, 238, 275, };
> +
> +  for (int i = 0; i < N; ++i)
> +    {
> +      union { float16_t f16; uint16_t u16; } converter;
> +      converter.u16 = tmp_0[i];
> +      a[i] = converter.f16; 
> +    }
> +
> +  for (int i = 0; i < N; ++i)
> +    b[i] = 0;
> +}
> +
> +void compute ()
> +{
> +  int avl = N;
> +  float16_t* ptr_a = a; float16_t* ptr_b = b;
> +
> +  for (size_t vl; avl > 0; avl -= vl)
> +    {
> +      vl = __riscv_vsetvl_e16m1(avl);
> +      vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl);
> +      va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
> +      va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
> +      __riscv_vse16_v_f16m1(ptr_b, va, vl);
> +      ptr_a += vl; ptr_b += vl;
> +    }
> +}
> +
> +int main ()
> +{
> +  initialize();
> +  compute();
> +
> +  short *tmp = (short *)b;

Don't you need -fno-strict-aliasing in the test then?

> +
> +  if (*tmp != -21862)
> +    __builtin_abort ();
> +
> +  return 0;
> +}

Reply via email to