[Committed V2] RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move

Juzhe-Zhong Fri, 05 Jan 2024 18:09:18 -0800

V2: Address comments from Robin.

While working on fixing a bug, I notice this following code has redundant move:


#include "riscv_vector.h"
void
f (float x, float y, void *out)
{
  float f[4] = { x, x, x, y };
  vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
  __riscv_vse32_v_f32m1 (out, v, 4);
}

Before this patch:

f:
        vsetivli        zero,4,e32,m1,ta,ma
        addi    sp,sp,-16
        vfmv.v.f        v1,fa0
        vfslide1down.vf v1,v1,fa1
        vmv.v.v v1,v1                       ----> redundant move.
        vse32.v v1,0(a0)
        addi    sp,sp,16
        jr      ra

The rootcause is that the complicate vmv.v.v pattern doesn't simplify it
into simple (set (reg) (reg)) reg-to-reg move pattern.

Currently, we support such simplification for VLMAX.

However, the case I found is non-VLMAX but with LEN = NUNITS which should be
considered as equivalent to VLMAX.

Add a simple fix for such situation.

Tested on both RV32/RV64 no regressions.

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (whole_reg_to_reg_move_p): New function.
        * config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
        * config/riscv/vector.md: Allow non-vlmax with len = NUNITS 
simplification.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/base/vf_avl-4.c: New test.

---
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv-v.cc                   | 23 +++++++++++++++++++
 gcc/config/riscv/vector.md                    |  9 ++------
 .../gcc.target/riscv/rvv/base/vf_avl-4.c      | 13 +++++++++++
 4 files changed, 39 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0f0337cfb38..00a5b645abe 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -687,6 +687,7 @@ bool imm_avl_p (machine_mode);
 bool can_be_broadcasted_p (rtx);
 bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
+bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ec859645415..2491522191a 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5117,4 +5117,27 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
   return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN;
 }
 
+/* Return true it is whole register-register move.  */
+bool
+whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
+{
+  /* An operation is a whole-register move if either
+     (1) Its vlmax operand equals VLMAX
+     (2) Its vl operand equals the number of units of its mode.  */
+  if (register_operand (ops[0], mode)
+      && register_operand (ops[3], mode)
+      && satisfies_constraint_vu (ops[2])
+      && satisfies_constraint_Wc1 (ops[1]))
+    {
+      if (INTVAL (ops[avl_type_index]) == VLMAX)
+       return true;
+      /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+        into NON-VLMAX with LEN = NUNITS.  */
+      else if (CONST_INT_P (ops[4])
+              && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
+       return true;
+    }
+  return false;
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3d2c1c3ce8f..be5beb5ab64 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1724,10 +1724,7 @@
    vse<sew>.v\t%3,%0%p1
    vmv.v.v\t%0,%3
    vmv.v.v\t%0,%3"
-  "&& register_operand (operands[0], <MODE>mode)
-   && register_operand (operands[3], <MODE>mode)
-   && satisfies_constraint_vu (operands[2])
-   && INTVAL (operands[7]) == riscv_vector::VLMAX"
+  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
@@ -1776,9 +1773,7 @@
    vmmv.m\t%0,%3
    vmclr.m\t%0
    vmset.m\t%0"
-  "&& register_operand (operands[0], <MODE>mode)
-   && register_operand (operands[3], <MODE>mode)
-   && INTVAL (operands[5]) == riscv_vector::VLMAX"
+  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
new file mode 100644
index 00000000000..1b4bfd96481
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param 
riscv-autovec-preference=fixed-vlmax" } */
+
+#include "riscv_vector.h"
+void
+f (float x, float y, void *out)
+{
+  float f[4] = { x, x, x, y };
+  vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
+  __riscv_vse32_v_f32m1 (out, v, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
-- 
2.36.3

[Committed V2] RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move

Reply via email to