From: Juzhe-Zhong <juzhe.zh...@rivai.ai>

This patch optimizes the following seriese vector:
[nunits - 1, nunits - 2, ...., 0]

Before this patch:
vid
vmul
vadd

After this patch:
vid
vrsub

This patch is an obvious and simple optimization, ok for trunk?

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (expand_vec_series): Optimize reverse series 
index vector.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Add assembly check.

---
 gcc/config/riscv/riscv-v.cc                     | 17 +++++++++++++++++
 .../riscv/rvv/autovec/vls-vlmax/perm-4.c        |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 1cd3bd3438e..75cf00b7eba 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -530,6 +530,8 @@ expand_vec_series (rtx dest, rtx base, rtx step)
   machine_mode mode = GET_MODE (dest);
   machine_mode mask_mode;
   gcc_assert (get_mask_mode (mode).exists (&mask_mode));
+  poly_int64 nunits_m1 = GET_MODE_NUNITS (mode) - 1;
+  poly_int64 value;
 
   /* VECT_IV = BASE + I * STEP.  */
 
@@ -545,6 +547,21 @@ expand_vec_series (rtx dest, rtx base, rtx step)
   rtx step_adj;
   if (rtx_equal_p (step, const1_rtx))
     step_adj = vid;
+  else if (rtx_equal_p (step, constm1_rtx) && poly_int_rtx_p (base, &value)
+          && known_eq (nunits_m1, value))
+    {
+      /* Special case:
+          {nunits - 1, nunits - 2, ... , 0}.
+          nunits can be either const_int or const_poly_int.
+
+        Code sequence:
+          vid.v v
+          vrsub nunits - 1, v.  */
+      rtx ops[] = {dest, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
+      insn_code icode = code_for_pred_sub_reverse_scalar (mode);
+      emit_vlmax_insn (icode, RVV_BINOP, ops);
+      return;
+    }
   else
     {
       step_adj = gen_reg_rtx (mode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
index 179c8274a92..aa328810c30 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
@@ -56,3 +56,5 @@
 TEST_ALL (PERMUTE)
 
 /* { dg-final { scan-assembler-times 
{vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */
+/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
+/* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */
-- 
2.36.1

Reply via email to