[gcc r16-742] RISC-V: Combine vec_duplicate + vrsub.vv to vrsub.vx on GR2VR cost

Pan Li via Gcc-cvs Mon, 19 May 2025 18:25:29 -0700

https://gcc.gnu.org/g:4a8ce14a58177905878400bae5980eed32973085


commit r16-742-g4a8ce14a58177905878400bae5980eed32973085
Author: Pan Li <pan2...@intel.com>
Date:   Sun May 18 16:41:01 2025 +0800

    RISC-V: Combine vec_duplicate + vrsub.vv to vrsub.vx on GR2VR cost
    
    This patch would like to combine the vec_duplicate + vrub.vv to the
    vrsub.vx.  From example as below code.  The related pattern will depend
    on the cost of vec_duplicate from GR2VR.  Then the late-combine will
    take action if the cost of GR2VR is zero, and reject the combination
    if the GR2VR cost is greater than zero.
    
    Assume we have example code like below, GR2VR cost is 0.
    
      #define DEF_VX_BINARY_REVERSE_CASE_0(T, OP, NAME)                   \
      void                                                                \
      test_vx_binary_reverse_##NAME##_##T##_case_0 (T * restrict out,     \
                                                    T * restrict in, T x, \
                                                    unsigned n)           \
      {                                                                   \
        for (unsigned i = 0; i < n; i++)                                  \
          out[i] = x OP in[i];                                            \
      }
    
      DEF_VX_BINARY_REVERSE_CASE_0(int32_t, -)
    
    Before this patch:
      54   │ test_vx_binary_reverse_rsub_int32_t_case_0:
      55   │     beq a3,zero,.L27
      56   │     vsetvli a5,zero,e32,m1,ta,ma
      57   │     vmv.v.x v2,a2
      58   │     slli    a3,a3,32
      59   │     srli    a3,a3,32
      60   │ .L22:
      61   │     vsetvli a5,a3,e32,m1,ta,ma
      62   │     vle32.v v1,0(a1)
      63   │     slli    a4,a5,2
      64   │     sub a3,a3,a5
      65   │     add a1,a1,a4
      66   │     vsub.vv v1,v2,v1
      67   │     vse32.v v1,0(a0)
      68   │     add a0,a0,a4
      69   │     bne a3,zero,.L22
    
    After this patch:
      50   │ test_vx_binary_reverse_rsub_int32_t_case_0:
      51   │     beq a3,zero,.L27
      52   │     slli    a3,a3,32
      53   │     srli    a3,a3,32
      54   │ .L22:
      55   │     vsetvli a5,a3,e32,m1,ta,ma
      56   │     vle32.v v1,0(a1)
      57   │     slli    a4,a5,2
      58   │     sub a3,a3,a5
      59   │     add a1,a1,a4
      60   │     vrsub.vx    v1,v1,a2
      61   │     vse32.v v1,0(a0)
      62   │     add a0,a0,a4
      63   │     bne a3,zero,.L22
    
    The below test suites are passed for this patch.
    * The rv64gcv fully regression test.
    
    gcc/ChangeLog:
    
            * config/riscv/autovec-opt.md: Leverage the new add func to
            expand the vx insn.
            * config/riscv/riscv-protos.h (expand_vx_binary_vec_dup_vec): Add
            new func decl to expand format v = vop(vec_dup(x), v).
            (expand_vx_binary_vec_vec_dup): Diito but for format
            v = vop(v, vec_dup(x)).
            * config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add new
            func impl to expand vx for v = vop(vec_dup(x), v).
            (expand_vx_binary_vec_vec_dup): Diito but for another format
            v = vop(v, vec_dup(x)).
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/config/riscv/autovec-opt.md | 16 +++++++-------
 gcc/config/riscv/riscv-protos.h |  2 ++
 gcc/config/riscv/riscv-v.cc     | 49 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 9c6bf06c3a9a..a972eda8de45 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1691,25 +1691,25 @@
   "&& 1"
   [(const_int 0)]
   {
-    rtx ops[] = {operands[0], operands[2], operands[1]};
-    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
-                                  riscv_vector::BINARY_OP, ops);
+    riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2],
+                                               operands[1], <CODE>,
+                                               <MODE>mode);
   }
   [(set_attr "type" "vialu")])
 
 (define_insn_and_split "*<optab>_vx_<mode>"
  [(set (match_operand:V_VLSI    0 "register_operand")
        (any_int_binop_no_shift_vx:V_VLSI
-        (match_operand:V_VLSI  2 "<binop_rhs2_predicate>")
+        (match_operand:V_VLSI  1 "<binop_rhs2_predicate>")
         (vec_duplicate:V_VLSI
-          (match_operand:<VEL> 1 "register_operand"))))]
+          (match_operand:<VEL> 2 "register_operand"))))]
   "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
   "&& 1"
   [(const_int 0)]
   {
-    rtx ops[] = {operands[0], operands[2], operands[1]};
-    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
-                                  riscv_vector::BINARY_OP, ops);
+    riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1],
+                                               operands[2], <CODE>,
+                                               <MODE>mode);
   }
   [(set_attr "type" "vialu")])
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 271a9a3228d8..b39b858acac8 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -667,6 +667,8 @@ void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, 
machine_mode,
                             machine_mode);
 void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
                             machine_mode);
+void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
+void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
                          bool, void (*)(rtx *, rtx), enum avl_type);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 66c8b2921e26..1b5ef51886e3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5498,6 +5498,55 @@ expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode 
vec_mode,
   expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode);
 }
 
+/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1).
+   Aka the first op comes from the vec_duplicate, and the second op is
+   the vector reg.  */
+
+void
+expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2,
+                             rtx_code code, machine_mode mode)
+{
+  enum insn_code icode;
+
+  switch (code)
+    {
+    case PLUS:
+      icode = code_for_pred_scalar (code, mode);
+      break;
+    case MINUS:
+      icode = code_for_pred_sub_reverse_scalar (mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx ops[] = {op_0, op_1, op_2};
+  emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x)).
+   Aka the second op comes from the vec_duplicate, and the first op is
+   the vector reg.  */
+
+void
+expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2,
+                             rtx_code code, machine_mode mode)
+{
+  enum insn_code icode;
+
+  switch (code)
+    {
+    case MINUS:
+      icode = code_for_pred_scalar (code, mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx ops[] = {op_0, op_1, op_2};
+  emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
 /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
    well.  */
 void

[gcc r16-742] RISC-V: Combine vec_duplicate + vrsub.vv to vrsub.vx on GR2VR cost

Reply via email to