https://gcc.gnu.org/g:1c5e99ce8744c166d82cb96bbb2d58392b5fb8d7

commit r16-1421-g1c5e99ce8744c166d82cb96bbb2d58392b5fb8d7
Author: Edwin Lu <e...@rivosinc.com>
Date:   Tue Jun 10 13:26:42 2025 -0700

    RISC-V: Prevent speculative vsetvl insn scheduling
    
    The instruction scheduler appears to be speculatively hoisting vsetvl
    insns outside of their basic block without checking for data
    dependencies. This resulted in a situation where the following occurs
    
            vsetvli a5,a1,e32,m1,tu,ma
            vle32.v v2,0(a0)
            sub     a1,a1,a5 <-- a1 potentially set to 0
            sh2add  a0,a5,a0
            vfmacc.vv       v1,v2,v2
            vsetvli a5,a1,e32,m1,tu,ma <-- incompatible vinfo. update vl to 0
            beq     a1,zero,.L12 <-- check if avl is 0
    
    This patch would essentially delay the vsetvl update to after the branch
    to prevent unnecessarily updating the vinfo at the end of a basic block.
    
            PR/117974
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.cc (struct riscv_tune_param): Add tune
            param.
            (riscv_sched_can_speculate_insn): Implement.
            (TARGET_SCHED_CAN_SPECULATE_INSN): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/vsetvl/pr117974.c: New test.
    
    Signed-off-by: Edwin Lu <e...@rivosinc.com>

Diff:
---
 gcc/config/riscv/riscv.cc                          | 35 ++++++++++++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr117974.c         | 19 ++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0c99850c14d8..c6e0ffb0755e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -302,6 +302,7 @@ struct riscv_tune_param
   bool vector_unaligned_access;
   bool use_divmod_expansion;
   bool overlap_op_by_pieces;
+  bool speculative_sched_vsetvl;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
   const char *function_align;
@@ -464,6 +465,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
   NULL,                                                /* function_align */
@@ -486,6 +488,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
   NULL,                                                /* function_align */
@@ -508,6 +511,7 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
   false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
   NULL,                                                /* function_align */
@@ -530,6 +534,7 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
   false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
   NULL,                                                /* function_align */
@@ -552,6 +557,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   false,                                       /* vector_unaligned_access */
   false,       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
   NULL,                                                /* function_align */
@@ -574,6 +580,7 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
   false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
   NULL,                                                /* vector cost */
   NULL,                                                /* function_align */
@@ -596,6 +603,7 @@ static const struct riscv_tune_param generic_ooo_tune_info 
= {
   true,                                                /* 
vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   true,                                                /* overlap_op_by_pieces 
*/
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
   NULL,                                                /* function_align */
@@ -618,6 +626,7 @@ static const struct riscv_tune_param 
tt_ascalon_d8_tune_info = {
   true,                                                /* 
vector_unaligned_access */
   true,                                                /* use_divmod_expansion 
*/
   true,                                                /* overlap_op_by_pieces 
*/
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
   NULL,                                                /* function_align */
@@ -640,6 +649,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   false,                                       /* vector_unaligned_access */
   false,                                       /* use_divmod_expansion */
   false,                                       /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
   NULL,                                                /* function_align */
@@ -662,6 +672,7 @@ static const struct riscv_tune_param mips_p8700_tune_info = 
{
   false,        /* vector_unaligned_access */
   true,         /* use_divmod_expansion */
   false,        /* overlap_op_by_pieces */
+  false,                                       /* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                          /* fusible_ops */
   NULL,         /* vector cost */
   NULL,         /* function_align */
@@ -10441,6 +10452,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn 
*insn, int cost,
   return new_cost;
 }
 
+/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook.  Return true if insn can
+   can be scheduled for speculative execution.  Reject vsetvl instructions to
+   prevent the scheduler from hoisting them out of basic blocks without
+   checking for data dependencies PR117974.  */
+static bool
+riscv_sched_can_speculate_insn (rtx_insn *insn)
+{
+  /* Gate speculative scheduling of vsetvl instructions behind tune param.  */
+  if (tune_param->speculative_sched_vsetvl)
+    return true;
+
+  switch (get_attr_type (insn))
+    {
+      case TYPE_VSETVL:
+      case TYPE_VSETVL_PRE:
+       return false;
+      default:
+       return true;
+    }
+}
+
 /* Auxiliary function to emit RISC-V ELF attribute. */
 static void
 riscv_emit_attribute ()
@@ -14745,6 +14777,9 @@ synthesize_and (rtx operands[3])
 #undef  TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
 
+#undef TARGET_SCHED_CAN_SPECULATE_INSN
+#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
+
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
new file mode 100644
index 000000000000..bf99240d15f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
-Ofast" } */
+
+float g(float q[], int N){
+    float dqnorm = 0.0;
+
+    #pragma GCC unroll 4
+
+    for (int i=0; i < N; i++) {
+        dqnorm = dqnorm + q[i] * q[i];
+    }
+    return dqnorm;
+}
+
+/* need slightly different test for when -funroll-loops is enabled to keep
+   test output stable.  Otherwise test may be flakey.  */
+/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,zero,.L12\s+vsetvli} 3 { 
target { no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times 
{beq\s+[a-x0-9]+,[a-x0-9]+,.L12\s+vsetvli} 3 { target { any-opts 
"-funroll-loops" } } } } */
+

Reply via email to