The instruction scheduler appears to be speculatively hoisting vsetvl insns outside of their basic block without checking for data dependencies. This resulted in a situation where the following occurs
vsetvli a5,a1,e32,m1,tu,ma vle32.v v2,0(a0) sub a1,a1,a5 <-- a1 potentially set to 0 sh2add a0,a5,a0 vfmacc.vv v1,v2,v2 vsetvli a5,a1,e32,m1,tu,ma <-- incompatible vinfo. update vl to 0 beq a1,zero,.L12 <-- check if avl is 0 This patch would essentially delay the vsetvl update to after the branch to prevent unnecessarily updating the vinfo at the end of a basic block. PR 117974 gcc/ChangeLog: * config/riscv/riscv.cc (struct riscv_tune_param): Add tune param. (riscv_sched_can_speculate_insn): Implement. (TARGET_SCHED_CAN_SPECULATE_INSN): Implement. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/pr117974.c: New test. Signed-off-by: Edwin Lu <e...@rivosinc.com> --- V2: add testcase V3: add opt flag to test performance V4: change opt flag to tune param --- gcc/config/riscv/riscv.cc | 34 +++++++++++++++++++ .../gcc.target/riscv/rvv/vsetvl/pr117974.c | 16 +++++++++ 2 files changed, 50 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 4c5bb02754d..2e5cd0dbcd2 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -297,6 +297,7 @@ struct riscv_tune_param bool vector_unaligned_access; bool use_divmod_expansion; bool overlap_op_by_pieces; + bool speculative_sched_vsetvl; unsigned int fusible_ops; const struct cpu_vector_cost *vec_costs; const char *function_align; @@ -459,6 +460,7 @@ static const struct riscv_tune_param rocket_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -481,6 +483,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -503,6 +506,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -525,6 +529,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -547,6 +552,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -569,6 +575,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -591,6 +598,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = { true, /* vector_unaligned_access */ false, /* use_divmod_expansion */ true, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -613,6 +621,7 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = { true, /* vector_unaligned_access */ true, /* use_divmod_expansion */ true, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ &generic_vector_cost, /* vector cost */ NULL, /* function_align */ @@ -635,6 +644,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ + false, /* speculative_sched_vsetvl */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ NULL, /* function_align */ @@ -10335,6 +10345,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost, return new_cost; } +/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can + can be scheduled for speculative execution. Reject vsetvl instructions to + prevent the scheduler from hoisting them out of basic blocks without + checking for data dependencies PR117974. */ +static bool +riscv_sched_can_speculate_insn (rtx_insn *insn) +{ + /* Gate speculative scheduling of vsetvl instructions behind tune param. */ + if (tune_param->speculative_sched_vsetvl) + return true; + + switch (get_attr_type (insn)) + { + case TYPE_VSETVL: + case TYPE_VSETVL_PRE: + return false; + default: + return true; + } +} + /* Auxiliary function to emit RISC-V ELF attribute. */ static void riscv_emit_attribute () @@ -14519,6 +14550,9 @@ synthesize_ior_xor (rtx_code code, rtx operands[3]) #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost +#undef TARGET_SCHED_CAN_SPECULATE_INSN +#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn + #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c new file mode 100644 index 00000000000..22e2ec7337c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl -Ofast" } */ + +float g(float q[], int N){ + float dqnorm = 0.0; + + #pragma GCC unroll 4 + + for (int i=0; i < N; i++) { + dqnorm = dqnorm + q[i] * q[i]; + } + return dqnorm; +} + +/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,zero,.L12\s+vsetvli} 3 } } */ + -- 2.43.0