On Sun, Jun 30, 2024 at 4:15 AM Ajit Agarwal <aagar...@linux.ibm.com> wrote: > > Hello All: > > This patch determines Unroll factor based on loop register pressure. > > Unroll factor is quotient of max of available registers in loop > by number of liveness. > > If available registers increases unroll factor increases. > Wherein unroll factor decreases if number of liveness increases.
Unrolling as implemented does not increase register lifetime unless -fsplit-ivs-in-unroller or -fvariable-expansion-in-unroller. But I do not see you looking at those transforms at all. Richard. > Loop unrolling is based on loop variables that determines unroll > factor. Loop variables of the loop are the variables that increases > register pressure and take advantage of existing register pressure > calculation. > > Available registers are determined by the number of hard registers > available for each register class minus max reg pressure of loop > for given register class. > > Bootstrapped and regtested on powerpc64-linux-gnu. > > Thanks & Regards > Ajit > > > rtl-optimization: Loop unroll factor based on register pressure > > Unroll factor is calculated based on loop register pressure. > > Unroll factor is quotient of max of available registers in loop > by number of liveness. > > If available registers increases unroll factor increases. > Wherein unroll factor decreases if number of liveness increases. > > Loop unrolling is based on loop variables that determines unroll > factor. Loop variables of the loop are the variables that increases > register pressure and take advantage of existing register pressure > calculation. > > Available registers are determined by the number of hard registers > available for each register class minus max reg pressure of loop > for given register class. > > 2024-06-29 Ajit Kumar Agarwal <aagar...@linux.ibm.com> > > gcc/ChangeLog: > > * loop-unroll.cc: Add calculation of register pressure of > the loop and use of that to calculate unroll factor. > --- > gcc/loop-unroll.cc | 331 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 328 insertions(+), 3 deletions(-) > > diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc > index bfdfe6c2bb7..6936ba7afb9 100644 > --- a/gcc/loop-unroll.cc > +++ b/gcc/loop-unroll.cc > @@ -35,6 +35,11 @@ along with GCC; see the file COPYING3. If not see > #include "dojump.h" > #include "expr.h" > #include "dumpfile.h" > +#include "regs.h" > +#include "ira.h" > +#include "rtl-iter.h" > +#include "regset.h" > +#include "df.h" > > /* This pass performs loop unrolling. We only perform this > optimization on innermost loops (with single exception) because > @@ -65,6 +70,38 @@ along with GCC; see the file COPYING3. If not see > showed that this choice may affect performance in order of several %. > */ > > +class loop_data > +{ > +public: > + class loop *outermost_exit; /* The outermost exit of the loop. */ > + bool has_call; /* True if the loop contains a call. */ > + /* Maximal register pressure inside loop for given register class > + (defined only for the pressure classes). */ > + int max_reg_pressure[N_REG_CLASSES]; > + /* Loop regs referenced and live pseudo-registers. */ > + bitmap_head regs_ref; > + bitmap_head regs_live; > +}; > + > +#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux) > + > +/* Record all regs that are set in any one insn. Communication from > + mark_reg_{store,clobber} and global_conflicts. Asm can refer to > + all hard-registers. */ > +static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS > + ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2]; > +/* Number of regs stored in the previous array. */ > +static int n_regs_set; > + > +/* Currently processed loop. */ > +static class loop *curr_loop; > + > +/* Registers currently living. */ > +static bitmap_head curr_regs_live; > + > +/* Current reg pressure for each pressure class. */ > +static int curr_reg_pressure[N_REG_CLASSES]; > + > /* Information about induction variables to split. */ > > struct iv_to_split > @@ -272,11 +309,262 @@ decide_unrolling (int flags) > } > } > > +/* Return pressure class and number of needed hard registers (through > + *NREGS) of register REGNO. */ > +static enum reg_class > +get_regno_pressure_class (int regno, int *nregs) > +{ > + if (regno >= FIRST_PSEUDO_REGISTER) > + { > + enum reg_class pressure_class; > + pressure_class = reg_allocno_class (regno); > + pressure_class = ira_pressure_class_translate[pressure_class]; > + *nregs > + = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)]; > + return pressure_class; > + } > + else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno) > + && ! TEST_HARD_REG_BIT (eliminable_regset, regno)) > + { > + *nregs = 1; > + return ira_pressure_class_translate[REGNO_REG_CLASS (regno)]; > + } > + else > + { > + *nregs = 0; > + return NO_REGS; > + } > +} > + > +/* Increase (if INCR_P) or decrease current register pressure for > + register REGNO. */ > +static void > +change_pressure (int regno, bool incr_p) > +{ > + int nregs; > + enum reg_class pressure_class; > + > + pressure_class = get_regno_pressure_class (regno, &nregs); > + if (! incr_p) > + curr_reg_pressure[pressure_class] -= nregs; > + else > + { > + curr_reg_pressure[pressure_class] += nregs; > + if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class] > + < curr_reg_pressure[pressure_class]) > + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class] > + = curr_reg_pressure[pressure_class]; > + } > +} > + > +/* Mark REGNO birth. */ > +static void > +mark_regno_live (int regno) > +{ > + class loop *loop; > + > + for (loop = curr_loop; > + loop != current_loops->tree_root; > + loop = loop_outer (loop)) > + bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno); > + if (!bitmap_set_bit (&curr_regs_live, regno)) > + return; > + change_pressure (regno, true); > +} > + > +/* Mark REGNO death. */ > +static void > +mark_regno_death (int regno) > +{ > + if (! bitmap_clear_bit (&curr_regs_live, regno)) > + return; > + change_pressure (regno, false); > +} > + > +/* Mark setting register REG. */ > +static void > +mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED, > + void *data ATTRIBUTE_UNUSED) > +{ > + if (GET_CODE (reg) == SUBREG) > + reg = SUBREG_REG (reg); > + > + if (! REG_P (reg)) > + return; > + > + regs_set[n_regs_set++] = reg; > + > + unsigned int end_regno = END_REGNO (reg); > + for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno) > + mark_regno_live (regno); > +} > + > +/* Mark clobbering register REG. */ > +static void > +mark_reg_clobber (rtx reg, const_rtx setter, void *data) > +{ > + if (GET_CODE (setter) == CLOBBER) > + mark_reg_store (reg, setter, data); > +} > + > +/* Mark register REG death. */ > +static void > +mark_reg_death (rtx reg) > +{ > + unsigned int end_regno = END_REGNO (reg); > + for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno) > + mark_regno_death (regno); > +} > + > +/* Mark occurrence of registers in X for the current loop. */ > +static void > +mark_ref_regs (rtx x) > +{ > + RTX_CODE code; > + int i; > + const char *fmt; > + > + if (!x) > + return; > + > + code = GET_CODE (x); > + if (code == REG) > + { > + class loop *loop; > + > + for (loop = curr_loop; > + loop != current_loops->tree_root; > + loop = loop_outer (loop)) > + bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x)); > + return; > + } > + > + fmt = GET_RTX_FORMAT (code); > + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) > + if (fmt[i] == 'e') > + mark_ref_regs (XEXP (x, i)); > + else if (fmt[i] == 'E') > + { > + int j; > + > + for (j = 0; j < XVECLEN (x, i); j++) > + mark_ref_regs (XVECEXP (x, i, j)); > + } > +} > + > +/* Calculate register pressure in the loops. */ > +static void > +calculate_loop_reg_pressure (void) > +{ > + int i; > + unsigned int j; > + bitmap_iterator bi; > + basic_block bb; > + rtx_insn *insn; > + rtx link; > + class loop *parent; > + > + for (auto loop : loops_list (cfun, 0)) > + if (loop->aux == NULL) > + { > + loop->aux = xcalloc (1, sizeof (class loop_data)); > + bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack); > + bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack); > + } > + ira_setup_eliminable_regset (); > + bitmap_initialize (&curr_regs_live, ®_obstack); > + FOR_EACH_BB_FN (bb, cfun) > + { > + curr_loop = bb->loop_father; > + if (curr_loop == current_loops->tree_root) > + continue; > + > + for (class loop *loop = curr_loop; > + loop != current_loops->tree_root; > + loop = loop_outer (loop)) > + bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb)); > + > + bitmap_copy (&curr_regs_live, DF_LR_IN (bb)); > + for (i = 0; i < ira_pressure_classes_num; i++) > + curr_reg_pressure[ira_pressure_classes[i]] = 0; > + EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi) > + change_pressure (j, true); > + > + FOR_BB_INSNS (bb, insn) > + { > + if (! NONDEBUG_INSN_P (insn)) > + continue; > + > + mark_ref_regs (PATTERN (insn)); > + n_regs_set = 0; > + note_stores (insn, mark_reg_clobber, NULL); > + > + /* Mark any registers dead after INSN as dead now. */ > + > + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) > + if (REG_NOTE_KIND (link) == REG_DEAD) > + mark_reg_death (XEXP (link, 0)); > + > + /* Mark any registers set in INSN as live, > + and mark them as conflicting with all other live regs. > + Clobbers are processed again, so they conflict with > + the registers that are set. */ > + > + note_stores (insn, mark_reg_store, NULL); > + > + if (AUTO_INC_DEC) > + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) > + if (REG_NOTE_KIND (link) == REG_INC) > + mark_reg_store (XEXP (link, 0), NULL_RTX, NULL); > + > + while (n_regs_set-- > 0) > + { > + rtx note = find_regno_note (insn, REG_UNUSED, > + REGNO (regs_set[n_regs_set])); > + if (! note) > + continue; > + > + mark_reg_death (XEXP (note, 0)); > + } > + } > + } > + bitmap_release (&curr_regs_live); > + if (dump_file == NULL) > + return; > + for (auto loop : loops_list (cfun, 0)) > + { > + parent = loop_outer (loop); > + fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n", > + loop->num, (parent == NULL ? -1 : parent->num), > + loop->header->index, loop_depth (loop)); > + fprintf (dump_file, "\n ref. regnos:"); > + EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi) > + fprintf (dump_file, " %d", j); > + fprintf (dump_file, "\n live regnos:"); > + EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi) > + fprintf (dump_file, " %d", j); > + fprintf (dump_file, "\n Pressure:"); > + for (i = 0; (int) i < ira_pressure_classes_num; i++) > + { > + enum reg_class pressure_class; > + > + pressure_class = ira_pressure_classes[i]; > + if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0) > + continue; > + fprintf (dump_file, " %s=%d", reg_class_names[pressure_class], > + LOOP_DATA (loop)->max_reg_pressure[pressure_class]); > + } > + fprintf (dump_file, "\n"); > + } > +} > + > /* Unroll LOOPS. */ > void > unroll_loops (int flags) > { > bool changed = false; > + df_analyze (); > + calculate_loop_reg_pressure (); > > /* Now decide rest of unrolling. */ > decide_unrolling (flags); > @@ -339,6 +627,40 @@ loop_exit_at_end_p (class loop *loop) > return true; > } > > +/* Determine unroll factor for a given LOOP. Unroll factor > + is calculated with quotient of maximum number of available registers > + with number of liveness. > + If available registers is more unrolling factor is more. And if > + number of liveness is more unroll factor is less. */ > +int > +determine_unroll_factor (class loop *loop) > +{ > + int nunroll = 0; > + int avail = 0; > + int max_pressure = 0; > + int num_live = 0; > + for (int i = 0; (int) i < ira_pressure_classes_num; i++) > + { > + enum reg_class pressure_class; > + > + pressure_class = ira_pressure_classes[i]; > + max_pressure = LOOP_DATA (loop)->max_reg_pressure[pressure_class]; > + if (max_pressure == 0) > + continue; > + > + if (((ira_class_hard_regs_num[pressure_class] - max_pressure) > avail) > + || avail == 0) > + avail = ira_class_hard_regs_num[pressure_class] - max_pressure; > + > + num_live++; > + } > + > + if (avail > 0 && num_live > 0) > + nunroll = avail / num_live; > + > + return nunroll; > +} > + > /* Decide whether to unroll LOOP iterating constant number of times > and how much. */ > > @@ -357,10 +679,11 @@ decide_unroll_constant_iterations (class loop *loop, > int flags) > dump_printf (MSG_NOTE, > "considering unrolling loop with constant " > "number of iterations\n"); > - > /* nunroll = total number of copies of the original loop body in > unrolled loop (i.e. if it is 2, we have to duplicate loop body once). > */ > - nunroll = param_max_unrolled_insns / loop->ninsns; > + nunroll = determine_unroll_factor (loop); > + if (nunroll <= 0) > + nunroll = param_max_unrolled_insns / loop->ninsns; > nunroll_by_av > = param_max_average_unrolled_insns / loop->av_ninsns; > if (nunroll > nunroll_by_av) > @@ -678,7 +1001,9 @@ decide_unroll_runtime_iterations (class loop *loop, int > flags) > > /* nunroll = total number of copies of the original loop body in > unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ > - nunroll = param_max_unrolled_insns / loop->ninsns; > + nunroll = determine_unroll_factor (loop); > + if (nunroll <= 0) > + nunroll = param_max_unrolled_insns / loop->ninsns; > nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns; > if (nunroll > nunroll_by_av) > nunroll = nunroll_by_av; > -- > 2.43.5 >