On 2/24/25 16:07, Edwin Lu wrote: > See [1] thread for original patch which spawned this one. > > We are currently seeing the following code where we perform a vsetvl > before a branching instruction against the avl. > > vsetvli a5,a1,e32,m1,tu,ma > vle32.v v2,0(a0) > sub a1,a1,a5 <-- a1 potentially set to 0 > sh2add a0,a5,a0 > vfmacc.vv v1,v2,v2 > vsetvli a5,a1,e32,m1,tu,ma <-- incompatible vinfo. update vl to 0 > beq a1,zero,.L12 <-- check if avl is 0 > > Since we are branching off of the avl, we don't need to update vl until > after the branch is taken. Search the ready queue for vsetvls scheduled > before branching instructions that branch off of the same regno and > promote the branches to execute first. This can improve performancy by > potentially avoiding setting VL=0 which may be expensive on some uarches. > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675622.html > > PR/117974 > > gcc/ChangeLog: > > * config/riscv/riscv.cc (vsetvl_avl_regno): New helper function. > (insn_increases_zeroness_p): Ditto. > (riscv_promote_ready): Ditto. > (riscv_sched_reorder): Implement hook. > (TARGET_SCHED_REORDER): Define Hook. > * config/riscv/riscv.opt: New flag. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/vsetvl/pr117974.c: New test. > > Signed-off-by: Edwin Lu <e...@rivosinc.com> > Co-authored-by: Palmer Dabbelt <pal...@rivosinc.com> > --- > gcc/config/riscv/riscv.cc | 103 ++++++++++++++++++ > gcc/config/riscv/riscv.opt | 4 + > .../gcc.target/riscv/rvv/vsetvl/pr117974.c | 15 +++ > 3 files changed, 122 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 89aa25d5da9..cf0866fa3fb 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -14035,6 +14035,106 @@ bool need_shadow_stack_push_pop_p () > return is_zicfiss_p () && riscv_save_return_addr_reg_p (); > } > > +static int > +vsetvl_avl_regno(rtx_insn *insn) > +{ > + if (recog_memoized (insn) < 0) > + return -1; > + > + if (get_attr_type (insn) != TYPE_VSETVL > + && get_attr_type (insn) != TYPE_VSETVL_PRE) > + return -1; > + > + extract_insn (insn); > + /* From vector.md, vsetvl operands are as follows: > + ;; operands[0]: VL. > + ;; operands[1]: AVL. > + ;; operands[2]: SEW > + ;; operands[3]: LMUL > + ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic) > + ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic) > + Return regno of avl operand. */ > + return REGNO (recog_data.operand[1]); > +} > + > +static bool > +insn_increases_zeroness_p(rtx_insn *insn, int regno)
Mnir point, but this is just one case of introducing VL=0 zeroness specific to branch, not the general case. > +{ > + /* Check for branching against zero. */ > + if (JUMP_P (insn)) > + { > + extract_insn (insn); > + bool match_reg = false; > + bool comp_zero = false; > + for (int i = 0; i < recog_data.n_operands; i++) > + { > + if (REG_P (recog_data.operand[i]) > + && REGNO (recog_data.operand[i]) == regno) > + match_reg = true; > + if (CONST_INT_P (recog_data.operand[i]) > + && XINT (recog_data.operand[i], 0) == 0 > + && XWINT (recog_data.operand[i], 0) == 0) > + comp_zero = true; > + } > + return match_reg && comp_zero; > + } > + return false; > +} > + > +/* Copied from MIPS. Removes the instruction at index LOWER from ready > + queue READY and reinserts it in from of the instruction at index > + HIGHER. LOWER must be <= HIGHER. */ > +static void > +riscv_promote_ready (rtx_insn **ready, int lower, int higher) > +{ > + rtx_insn *new_head; > + int i; > + > + new_head = ready[lower]; > + for (i = lower; i < higher; i++) > + ready[i] = ready[i + 1]; > + ready[i] = new_head; > +} > + > +/* Attempt to avoid issuing VSETVL-type instructions before a branch that > + ensures they are non-zero, as setting VL=0 dynamically can be slow. */ > +static int > +riscv_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose > ATTRIBUTE_UNUSED, > + rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED) > +{ > + if (! TARGET_AVOID_VL_EQ_0) > + return riscv_issue_rate (); > + > + for (int i = *nreadyp - 1; i >= 0; i--) > + { > + /* Find the vsetvl. */ > + int avl_regno = vsetvl_avl_regno (ready[i]); > + if (avl_regno == -1 || i == 0) > + continue; > + for (int j = i - 1; j >= 0; j--) > + { > + /* Exit if another vsetvl is found before finding a branch insn > + in the ready queue. */ > + if (recog_memoized (ready[j]) >= 0 > + && get_attr_type (ready[j]) == TYPE_VSETVL > + && get_attr_type (ready[j]) == TYPE_VSETVL_PRE) > + break; > + /* Find branch. */ > + if (recog_memoized (ready[j]) >= 0 > + && insn_increases_zeroness_p (ready[j], avl_regno)) > + { > + /* Right now the only zeroness-increasing pattern we recognize > + is a branch-not-zero, so there's no sense in looking for any > + more zeroness at that point. */ > + riscv_promote_ready (ready, j, i); > + break; > + } > + } > + } > + > + return riscv_issue_rate (); > +} > + > /* Initialize the GCC target structure. */ > #undef TARGET_ASM_ALIGNED_HI_OP > #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" > @@ -14430,6 +14530,9 @@ bool need_shadow_stack_push_pop_p () > #undef TARGET_DOCUMENTATION_NAME > #define TARGET_DOCUMENTATION_NAME "RISC-V" > > +#undef TARGET_SCHED_REORDER > +#define TARGET_SCHED_REORDER riscv_sched_reorder > + > struct gcc_target targetm = TARGET_INITIALIZER; > > #include "gt-riscv.h" > diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt > index 7515c8ea13d..c6cab61fdc0 100644 > --- a/gcc/config/riscv/riscv.opt > +++ b/gcc/config/riscv/riscv.opt > @@ -681,3 +681,7 @@ Specifies whether the fence.tso instruction should be > used. > mautovec-segment > Target Integer Var(riscv_mautovec_segment) Init(1) > Enable (default) or disable generation of vector segment load/store > instructions. > + > +mavoid-vl0 > +Target Var(TARGET_AVOID_VL_EQ_0) Init(1) > +Avoid (default) code that dynamically sets VL=0 where possible. As stated above, this is not the general case of VL=0 avoidance but intersection of VL=0 and a branch, so better to phrase it that way, Also as others have said in earlier threads, this would be better as a cpu tune, specifically part of vector tuning, maybe default for OoO tune. Otherwise looks pretty cool ! -Vineet > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c > b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c > new file mode 100644 > index 00000000000..275922eb0bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl > -Ofast" } */ > + > +float g(float q[], int N){ > + float dqnorm = 0.0; > + > + #pragma GCC unroll 4 > + > + for (int i=0; i < N; i++) { > + dqnorm = dqnorm + q[i] * q[i]; > + } > + return dqnorm; > +} > + > +/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,zero,.L12\s+vsetvli} 3 > } } */ > -- > 2.43.0 >