On 6/10/25 13:35, Edwin Lu wrote:
> The instruction scheduler appears to be speculatively hoisting vsetvl
> insns outside of their basic block without checking for data
> dependencies. This resulted in a situation where the following occurs
>
>         vsetvli a5,a1,e32,m1,tu,ma
>         vle32.v v2,0(a0)
>         sub     a1,a1,a5 <-- a1 potentially set to 0
>         sh2add  a0,a5,a0
>         vfmacc.vv       v1,v2,v2
>         vsetvli a5,a1,e32,m1,tu,ma <-- incompatible vinfo. update vl to 0
>         beq     a1,zero,.L12 <-- check if avl is 0
>
> This patch would essentially delay the vsetvl update to after the branch
> to prevent unnecessarily updating the vinfo at the end of a basic block.
>
>       PR 117974
>
> gcc/ChangeLog:
>
>       * config/riscv/riscv.cc (struct riscv_tune_param): Add tune
>         param.
>       (riscv_sched_can_speculate_insn): Implement.
>       (TARGET_SCHED_CAN_SPECULATE_INSN): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/riscv/rvv/vsetvl/pr117974.c: New test.
>
> Signed-off-by: Edwin Lu <e...@rivosinc.com>
> ---
> V2: add testcase
> V3: add opt flag to test performance
> V4: change opt flag to tune param
> V5: adjust test to prevent failures
> ---
>  gcc/config/riscv/riscv.cc                     | 35 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/pr117974.c    | 17 +++++++++
>  2 files changed, 52 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 14ac2f3cdbc..866fde707ed 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -302,6 +302,7 @@ struct riscv_tune_param
>    bool vector_unaligned_access;
>    bool use_divmod_expansion;
>    bool overlap_op_by_pieces;
> +  bool speculative_sched_vsetvl;
>    unsigned int fusible_ops;
>    const struct cpu_vector_cost *vec_costs;
>    const char *function_align;
> @@ -464,6 +465,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>    false,                                     /* vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
>    NULL,                                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -486,6 +488,7 @@ static const struct riscv_tune_param sifive_7_tune_info = 
> {
>    false,                                     /* vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
>    NULL,                                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -508,6 +511,7 @@ static const struct riscv_tune_param 
> sifive_p400_tune_info = {
>    false,                                     /* vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
>    &generic_vector_cost,                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -530,6 +534,7 @@ static const struct riscv_tune_param 
> sifive_p600_tune_info = {
>    false,                                     /* vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
>    &generic_vector_cost,                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -552,6 +557,7 @@ static const struct riscv_tune_param thead_c906_tune_info 
> = {
>    false,                                     /* vector_unaligned_access */
>    false,     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
>    NULL,                                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -574,6 +580,7 @@ static const struct riscv_tune_param 
> xiangshan_nanhu_tune_info = {
>    false,                                     /* vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
>    NULL,                                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -596,6 +603,7 @@ static const struct riscv_tune_param 
> generic_ooo_tune_info = {
>    true,                                              /* 
> vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    true,                                              /* overlap_op_by_pieces 
> */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
>    &generic_vector_cost,                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -618,6 +626,7 @@ static const struct riscv_tune_param 
> tt_ascalon_d8_tune_info = {
>    true,                                              /* 
> vector_unaligned_access */
>    true,                                              /* use_divmod_expansion 
> */
>    true,                                              /* overlap_op_by_pieces 
> */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
>    &generic_vector_cost,                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -640,6 +649,7 @@ static const struct riscv_tune_param 
> optimize_size_tune_info = {
>    false,                                     /* vector_unaligned_access */
>    false,                                     /* use_divmod_expansion */
>    false,                                     /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                           /* fusible_ops */
>    NULL,                                              /* vector cost */
>    NULL,                                              /* function_align */
> @@ -662,6 +672,7 @@ static const struct riscv_tune_param mips_p8700_tune_info 
> = {
>    false,        /* vector_unaligned_access */
>    true,         /* use_divmod_expansion */
>    false,        /* overlap_op_by_pieces */
> +  false,                                     /* speculative_sched_vsetvl */
>    RISCV_FUSE_NOTHING,                                /* fusible_ops */
>    NULL,         /* vector cost */
>    NULL,         /* function_align */
> @@ -10438,6 +10449,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn 
> *insn, int cost,
>    return new_cost;
>  }
>
> +/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook.  Return true if insn can
> +   can be scheduled for speculative execution.  Reject vsetvl instructions to
> +   prevent the scheduler from hoisting them out of basic blocks without
> +   checking for data dependencies PR117974.  */
> +static bool
> +riscv_sched_can_speculate_insn (rtx_insn *insn)
> +{
> +  /* Gate speculative scheduling of vsetvl instructions behind tune param.  
> */
> +  if (tune_param->speculative_sched_vsetvl)
> +    return true;
> +
> +  switch (get_attr_type (insn))
> +    {
> +      case TYPE_VSETVL:
> +      case TYPE_VSETVL_PRE:
> +     return false;
> +      default:
> +     return true;
> +    }
> +}
> +
>  /* Auxiliary function to emit RISC-V ELF attribute. */
>  static void
>  riscv_emit_attribute ()
> @@ -14742,6 +14774,9 @@ synthesize_and (rtx operands[3])
>  #undef  TARGET_SCHED_ADJUST_COST
>  #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
>
> +#undef TARGET_SCHED_CAN_SPECULATE_INSN
> +#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
> +
>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
> new file mode 100644
> index 00000000000..f19c3bffc69
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
> -Ofast" } */
> +
> +float g(float q[], int N){
> +    float dqnorm = 0.0;
> +
> +    #pragma GCC unroll 4
> +
> +    for (int i=0; i < N; i++) {
> +        dqnorm = dqnorm + q[i] * q[i];
> +    }
> +    return dqnorm;
> +}
> +
> +/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,zero,.L12\s+vsetvli} 3 
> { target { no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times 
> {beq\s+[a-x0-9]+,[a-x0-9]+,.L12\s+vsetvli} 3 { target { any-opts 
> "-funroll-loops" } } } } */

Maybe add a one-liner comment on the need for removing funroll-loops (...not
related to patch per se but just to keep the test output stable...)
No need to resping/resend.

Thx,
-Vineet

Reply via email to