https://gcc.gnu.org/g:55d288d4ff5360c572f2a017ba9385840ac5134e

commit r15-7215-g55d288d4ff5360c572f2a017ba9385840ac5134e
Author: Pan Li <pan2...@intel.com>
Date:   Sat Jan 25 15:45:10 2025 +0800

    RISC-V: Make FRM as global register [PR118103]
    
    After we enabled the labe-combine pass after the mode-switching pass, it
    will try to combine below insn patterns into op.  Aka:
    
    (insn 40 5 41 2 (set (reg:SI 11 a1 [151])
      (reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi}
      (nil))
    (insn 41 40 7 2 (set (reg:SI 69 frm)
      (const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore}
      (nil))
    (insn 42 10 11 2 (set (reg:SI 69 frm)
      (reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore}
        (nil))
    
    trying to combine definition of r11 in:
    40: a1:SI=frm:SI
        into:
    42: frm:SI=a1:SI
        instruction becomes a no-op:
    (set (reg:SI 69 frm)
    (reg:SI 69 frm))
    original cost = 4 + 4 (weighted: 8.000000), replacement cost =
    2147483647; keeping replacement
    rescanning insn with uid = 42.
    updating insn 42 in-place
    verify found no changes in insn with uid = 42.
    deleting insn 40
    
    For example we have code as blow:
       9   │ int test_exampe () {
      10   │   test ();
      11   │
      12   │   size_t vl = 4;
      13   │   vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
      14   │   va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, 
vl);
      15   │   va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
      16   │
      17   │   __riscv_vse16_v_f16m1(b, va, vl);
      18   │
      19   │   return 0;
      20   │ }
    
    it will be compiled to:
      53   │ main:
      54   │     addi    sp,sp,-16
      55   │     sd  ra,8(sp)
      56   │     call    initialize
      57   │     lui a6,%hi(b)
      58   │     lui a2,%hi(a)
      59   │     addi    a3,a6,%lo(b)
      60   │     addi    a2,a2,%lo(a)
      61   │     li  a4,4
      62   │ .L8:
      63   │     fsrmi   2
      64   │     vsetvli a5,a4,e16,m1,ta,ma
      65   │     vle16.v v1,0(a2)
      66   │     slli    a1,a5,1
      67   │     subw    a4,a4,a5
      68   │     add a2,a2,a1
      69   │     vfnmadd.vv  v1,v1,v1
      >> The fsrm a0 insn is deleted by late-combine <<
      70   │     vfmsub.vv   v1,v1,v1
      71   │     vse16.v v1,0(a3)
      72   │     add a3,a3,a1
      73   │     bgt a4,zero,.L8
      74   │     lh  a4,%lo(b)(a6)
      75   │     li  a5,-20480
      76   │     addi    a5,a5,-1382
      77   │     bne a4,a5,.L14
      78   │     ld  ra,8(sp)
      79   │     li  a0,0
      80   │     addi    sp,sp,16
      81   │     jr  ra
    
    This patch would like to add the FRM register to the global_regs as it
    is a cooperatively-managed global register.  And then the fsrm insn will
    not be eliminated by late-combine.  The related spec17 cam4 failure may
    also caused by this issue too.
    
    The below test suites are passed for this patch.
    * The rv64gcv fully regression test.
    
            PR target/118103
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.cc (riscv_conditional_register_usage): Add
            the FRM as the global_regs.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/base/pr118103-1.c: New test.
            * gcc.target/riscv/rvv/base/pr118103-run-1.c: New test.
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/config/riscv/riscv.cc                          |  4 +-
 .../gcc.target/riscv/rvv/base/pr118103-1.c         | 27 ++++++++++++
 .../gcc.target/riscv/rvv/base/pr118103-run-1.c     | 50 ++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4652454b8fec..dd50fe4eddfb 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void)
        call_used_regs[r] = 1;
     }
 
-  if (!TARGET_HARD_FLOAT)
+  if (TARGET_HARD_FLOAT)
+    global_regs[FRM_REGNUM] = 1;
+  else
     {
       for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
        fixed_regs[regno] = call_used_regs[regno] = 1;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
new file mode 100644
index 000000000000..1afa5d3afb50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */
+
+#include "riscv_vector.h"
+
+#define N 4
+typedef _Float16 float16_t;
+float16_t a[N]; float16_t b[N];
+
+extern void test ();
+
+int test_exampe () {
+  test ();
+
+  size_t vl = N;
+  vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl);
+  va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
+  va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
+
+  __riscv_vse16_v_f16m1(b, va, vl);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */
+/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
new file mode 100644
index 000000000000..62375c63ee86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { riscv_zvfh } } } */
+/* { dg-options "-O3 -fno-strict-aliasing" } */
+
+#include "riscv_vector.h"
+#define N 4
+typedef _Float16 float16_t;
+float16_t a[N]; float16_t b[N];
+
+void initialize () {
+  uint16_t tmp_0[N] = {43883, 3213, 238, 275, };
+
+  for (int i = 0; i < N; ++i)
+    {
+      union { float16_t f16; uint16_t u16; } converter;
+      converter.u16 = tmp_0[i];
+      a[i] = converter.f16; 
+    }
+
+  for (int i = 0; i < N; ++i)
+    b[i] = 0;
+}
+
+void compute ()
+{
+  int avl = N;
+  float16_t* ptr_a = a; float16_t* ptr_b = b;
+
+  for (size_t vl; avl > 0; avl -= vl)
+    {
+      vl = __riscv_vsetvl_e16m1(avl);
+      vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl);
+      va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl);
+      va = __riscv_vfmsac_vv_f16m1(va, va, va, vl);
+      __riscv_vse16_v_f16m1(ptr_b, va, vl);
+      ptr_a += vl; ptr_b += vl;
+    }
+}
+
+int main ()
+{
+  initialize();
+  compute();
+
+  short *tmp = (short *)b;
+
+  if (*tmp != -21862)
+    __builtin_abort ();
+
+  return 0;
+}

Reply via email to