From: Zhongyao Chen <[email protected]>

Improve RISC-V vector code generation by preferring tail-agnostic (ta) and
mask-agnostic (ma) policies for vector instructions when merge operands
are undefined. This optimization, controlled by a uarch-specific 
`prefer_agnostic`
tuning parameter, reduces `vsetvl` instructions and avoids conservative
undisturbed policy selections, addressing PR target/118945. A new
regression test verifies correct policy selection.

        PR target/118945

gcc/ChangeLog:

        * config/riscv/riscv.cc:
                (riscv_prefer_agnostic_p): New function.
                (riscv_tune_param): Add prefer_agnostic member.
                (various tune info structures): Initialize prefer_agnostic.
        * config/riscv/riscv-protos.h (riscv_prefer_agnostic_p): Add prototype.
        * config/riscv/riscv-v.cc:
                (get_prefer_tail_policy, get_prefer_mask_policy): Use 
riscv_prefer_agnostic_p.
        * config/riscv/riscv-vsetvl.cc (tail_policy_eq2_p): New function.
                (mask_policy_eq2_p): New function.
        * config/riscv/riscv-vsetvl.def:
                Adjust policy compatibility rule to use tail_policy_eq2_p and 
mask_policy_eq2_p.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr118945-1.c: New file.
        * gcc.target/riscv/rvv/autovec/pr118945-2.c: New file.

Signed-off-by: Zhongyao Chen <[email protected]>
---
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv-v.cc                   | 12 +++------
 gcc/config/riscv/riscv-vsetvl.cc              | 20 ++++++++++++++
 gcc/config/riscv/riscv-vsetvl.def             | 12 ++++-----
 gcc/config/riscv/riscv.cc                     | 22 +++++++++++++++-
 .../gcc.target/riscv/rvv/autovec/pr118945-1.c | 13 ++++++++++
 .../gcc.target/riscv/rvv/autovec/pr118945-2.c | 26 +++++++++++++++++++
 7 files changed, 91 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2d3fd0e07..d1ec934c2 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -832,6 +832,7 @@ extern bool th_print_operand_address (FILE *, machine_mode, 
rtx);
 #endif
 
 extern bool strided_load_broadcast_p (void);
+extern bool riscv_prefer_agnostic_p (void);
 extern bool riscv_use_divmod_expander (void);
 void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, 
int);
 extern bool
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 8021bc14e..1d7d8a61b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2140,10 +2140,8 @@ get_ma (rtx ma)
 enum tail_policy
 get_prefer_tail_policy ()
 {
-  /* TODO: By default, we choose to use TAIL_ANY which allows
-     compiler pick up either agnostic or undisturbed. Maybe we
-     will have a compile option like -mprefer=agnostic to set
-     this value???.  */
+  if (riscv_prefer_agnostic_p ())
+    return TAIL_AGNOSTIC;
   return TAIL_ANY;
 }
 
@@ -2151,10 +2149,8 @@ get_prefer_tail_policy ()
 enum mask_policy
 get_prefer_mask_policy ()
 {
-  /* TODO: By default, we choose to use MASK_ANY which allows
-     compiler pick up either agnostic or undisturbed. Maybe we
-     will have a compile option like -mprefer=agnostic to set
-     this value???.  */
+  if (riscv_prefer_agnostic_p ())
+    return MASK_AGNOSTIC;
   return MASK_ANY;
 }
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 4fe0ae6d9..e05619817 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1568,11 +1568,31 @@ private:
   {
     return prev.get_ta () == next.get_ta ();
   }
+  inline bool tail_policy_eq2_p (const vsetvl_info &prev,
+                                const vsetvl_info &next)
+  {
+    if (!riscv_prefer_agnostic_p ())
+      return true;
+
+    return (((prev.get_policy_demand () == policy_demand_type::ignore_policy)
+             || (next.get_policy_demand () == 
policy_demand_type::ignore_policy))
+            && tail_policy_eq_p (prev, next));
+  }
   inline bool mask_policy_eq_p (const vsetvl_info &prev,
                                const vsetvl_info &next)
   {
     return prev.get_ma () == next.get_ma ();
   }
+  inline bool mask_policy_eq2_p (const vsetvl_info &prev,
+                                const vsetvl_info &next)
+  {
+    if (!riscv_prefer_agnostic_p ())
+      return true;
+
+    return (((prev.get_policy_demand () == policy_demand_type::ignore_policy)
+             || (next.get_policy_demand () == 
policy_demand_type::ignore_policy))
+            && mask_policy_eq_p (prev, next));
+  }
   inline bool tail_mask_policy_eq_p (const vsetvl_info &prev,
                                     const vsetvl_info &next)
   {
diff --git a/gcc/config/riscv/riscv-vsetvl.def 
b/gcc/config/riscv/riscv-vsetvl.def
index 0f999d227..b1b069a42 100644
--- a/gcc/config/riscv/riscv-vsetvl.def
+++ b/gcc/config/riscv/riscv-vsetvl.def
@@ -132,8 +132,8 @@ DEF_POLICY_RULE (tail_policy_only, tail_policy_only, 
tail_policy_only,
                 tail_policy_eq_p, tail_policy_eq_p, use_tail_policy)
 DEF_POLICY_RULE (tail_policy_only, mask_policy_only, tail_mask_policy,
                 always_true, always_false, use_mask_policy)
-DEF_POLICY_RULE (tail_policy_only, ignore_policy, tail_policy_only, 
always_true,
-                always_true, nop)
+DEF_POLICY_RULE (tail_policy_only, ignore_policy, tail_policy_only, 
tail_policy_eq2_p,
+                tail_policy_eq2_p, nop)
 
 DEF_POLICY_RULE (mask_policy_only, tail_mask_policy, tail_mask_policy,
                 mask_policy_eq_p, always_false, use_tail_policy)
@@ -141,14 +141,14 @@ DEF_POLICY_RULE (mask_policy_only, tail_policy_only, 
tail_mask_policy,
                 always_true, always_false, use_tail_policy)
 DEF_POLICY_RULE (mask_policy_only, mask_policy_only, mask_policy_only,
                 mask_policy_eq_p, mask_policy_eq_p, use_mask_policy)
-DEF_POLICY_RULE (mask_policy_only, ignore_policy, mask_policy_only, 
always_true,
-                always_true, nop)
+DEF_POLICY_RULE (mask_policy_only, ignore_policy, mask_policy_only, 
mask_policy_eq2_p,
+                mask_policy_eq2_p, nop)
 
 DEF_POLICY_RULE (ignore_policy, tail_mask_policy, tail_mask_policy, 
always_true,
                 always_false, use_tail_mask_policy)
-DEF_POLICY_RULE (ignore_policy, tail_policy_only, tail_policy_only, 
always_true,
+DEF_POLICY_RULE (ignore_policy, tail_policy_only, tail_policy_only, 
tail_policy_eq2_p,
                 always_false, use_tail_policy)
-DEF_POLICY_RULE (ignore_policy, mask_policy_only, mask_policy_only, 
always_true,
+DEF_POLICY_RULE (ignore_policy, mask_policy_only, mask_policy_only, 
mask_policy_eq2_p,
                 always_false, use_mask_policy)
 DEF_POLICY_RULE (ignore_policy, ignore_policy, ignore_policy, always_true,
                 always_true, nop)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index b95333897..6c750ec8e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -317,6 +317,7 @@ struct riscv_tune_param
   const char *function_align;
   const char *jump_align;
   const char *loop_align;
+  bool prefer_agnostic;
 };
 
 
@@ -481,6 +482,7 @@ static const struct riscv_tune_param generic_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for rocket.  */
@@ -505,6 +507,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -529,6 +532,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for Sifive p400 Series.  */
@@ -553,6 +557,7 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for Sifive p600 Series.  */
@@ -577,6 +582,7 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -601,6 +607,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for xiangshan nanhu.  */
@@ -625,6 +632,7 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for a generic ooo profile.  */
@@ -649,6 +657,7 @@ static const struct riscv_tune_param generic_ooo_tune_info 
= {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                         /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for Tenstorrent Ascalon 8 wide.  */
@@ -673,6 +682,7 @@ static const struct riscv_tune_param 
tt_ascalon_d8_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -697,6 +707,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic */
 };
 
 /* Costs to use when optimizing for MIPS P8700 */
@@ -720,7 +731,8 @@ static const struct riscv_tune_param mips_p8700_tune_info = 
{
   NULL,         /* vector cost */
   NULL,         /* function_align */
   NULL,         /* jump_align */
-  NULL,         /* loop_align */
+  NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic */
 };
 
 static bool riscv_avoid_shrink_wrapping_separate ();
@@ -12842,6 +12854,14 @@ strided_load_broadcast_p ()
   return tune_param->use_zero_stride_load;
 }
 
+/* Return TRUE if we should prefer agnostic vector code, FALSE otherwise. */
+
+bool
+riscv_prefer_agnostic_p ()
+{
+  return tune_param->prefer_agnostic;
+}
+
 /* Return TRUE if we should use the divmod expander, FALSE otherwise.  This
    allows the behavior to be tuned for specific implementations as well as
    when optimizing for size.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
new file mode 100644
index 000000000..49705bf7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=generic-ooo -O3 -march=rv64gcv_zvl256b_zba -mabi=lp64d 
-mrvv-max-lmul=m2 -mrvv-vector-bits=scalable" } */
+
+int test(int* in, int n)
+{
+  int accum = 0;
+  for (int i = 0; i < n; i++)
+        accum += in[i];
+
+  return accum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
new file mode 100644
index 000000000..9b9844446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rva23u64 -mtune=generic-ooo -Ofast -S 
-fno-schedule-insns -fno-schedule-insns2" } */
+
+void vmult(
+    double* dst,
+    const double* src,
+    const unsigned int* rowstart,
+    const unsigned int* colnums,
+    const double* val,
+    const unsigned int n_rows
+) {
+    const double* val_ptr = &val[rowstart[0]];
+    const unsigned int* colnum_ptr = &colnums[rowstart[0]];
+    double* dst_ptr = dst;
+
+    for (unsigned int row = 0; row < n_rows; ++row) {
+        double s = 0.;
+        const double* const val_end_of_row = &val[rowstart[row + 1]];
+        while (val_ptr != val_end_of_row) {
+            s += *val_ptr++ * src[*colnum_ptr++];
+        }
+        *dst_ptr++ = s;
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 5 } } */
-- 
2.43.0

Reply via email to