On Sun, Jun 30, 2024 at 4:15 AM Ajit Agarwal <aagar...@linux.ibm.com> wrote:
>
> Hello All:
>
> This patch determines Unroll factor based on loop register pressure.
>
> Unroll factor is quotient of max of available registers in loop
> by number of liveness.
>
> If available registers increases unroll factor increases.
> Wherein unroll factor decreases if number of liveness increases.

Unrolling as implemented does not increase register lifetime unless
-fsplit-ivs-in-unroller or -fvariable-expansion-in-unroller.  But I do not
see you looking at those transforms at all.

Richard.

> Loop unrolling is based on loop variables that determines unroll
> factor. Loop variables of the loop are the variables that increases
> register pressure and take advantage of existing register pressure
> calculation.
>
> Available registers are determined by the number of hard registers
> available for each register class minus max reg pressure of loop
> for given register class.
>
> Bootstrapped and regtested on powerpc64-linux-gnu.
>
> Thanks & Regards
> Ajit
>
>
> rtl-optimization: Loop unroll factor based on register pressure
>
> Unroll factor is calculated based on loop register pressure.
>
> Unroll factor is quotient of max of available registers in loop
> by number of liveness.
>
> If available registers increases unroll factor increases.
> Wherein unroll factor decreases if number of liveness increases.
>
> Loop unrolling is based on loop variables that determines unroll
> factor. Loop variables of the loop are the variables that increases
> register pressure and take advantage of existing register pressure
> calculation.
>
> Available registers are determined by the number of hard registers
> available for each register class minus max reg pressure of loop
> for given register class.
>
> 2024-06-29  Ajit Kumar Agarwal  <aagar...@linux.ibm.com>
>
> gcc/ChangeLog:
>
>         * loop-unroll.cc: Add calculation of register pressure of
>         the loop and use of that to calculate unroll factor.
> ---
>  gcc/loop-unroll.cc | 331 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 328 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
> index bfdfe6c2bb7..6936ba7afb9 100644
> --- a/gcc/loop-unroll.cc
> +++ b/gcc/loop-unroll.cc
> @@ -35,6 +35,11 @@ along with GCC; see the file COPYING3.  If not see
>  #include "dojump.h"
>  #include "expr.h"
>  #include "dumpfile.h"
> +#include "regs.h"
> +#include "ira.h"
> +#include "rtl-iter.h"
> +#include "regset.h"
> +#include "df.h"
>
>  /* This pass performs loop unrolling.  We only perform this
>     optimization on innermost loops (with single exception) because
> @@ -65,6 +70,38 @@ along with GCC; see the file COPYING3.  If not see
>     showed that this choice may affect performance in order of several %.
>     */
>
> +class loop_data
> +{
> +public:
> +  class loop *outermost_exit;  /* The outermost exit of the loop.  */
> +  bool has_call;               /* True if the loop contains a call.  */
> +  /* Maximal register pressure inside loop for given register class
> +     (defined only for the pressure classes).  */
> +  int max_reg_pressure[N_REG_CLASSES];
> +  /* Loop regs referenced and live pseudo-registers.  */
> +  bitmap_head regs_ref;
> +  bitmap_head regs_live;
> +};
> +
> +#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
> +
> +/* Record all regs that are set in any one insn.  Communication from
> +   mark_reg_{store,clobber} and global_conflicts.  Asm can refer to
> +   all hard-registers.  */
> +static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
> +                    ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
> +/* Number of regs stored in the previous array.  */
> +static int n_regs_set;
> +
> +/* Currently processed loop.  */
> +static class loop *curr_loop;
> +
> +/* Registers currently living.  */
> +static bitmap_head curr_regs_live;
> +
> +/* Current reg pressure for each pressure class.  */
> +static int curr_reg_pressure[N_REG_CLASSES];
> +
>  /* Information about induction variables to split.  */
>
>  struct iv_to_split
> @@ -272,11 +309,262 @@ decide_unrolling (int flags)
>      }
>  }
>
> +/* Return pressure class and number of needed hard registers (through
> +   *NREGS) of register REGNO.  */
> +static enum reg_class
> +get_regno_pressure_class (int regno, int *nregs)
> +{
> +  if (regno >= FIRST_PSEUDO_REGISTER)
> +    {
> +      enum reg_class pressure_class;
> +      pressure_class = reg_allocno_class (regno);
> +      pressure_class = ira_pressure_class_translate[pressure_class];
> +      *nregs
> +       = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
> +      return pressure_class;
> +    }
> +  else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
> +          && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
> +    {
> +      *nregs = 1;
> +      return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
> +    }
> +  else
> +    {
> +      *nregs = 0;
> +      return NO_REGS;
> +    }
> +}
> +
> +/* Increase (if INCR_P) or decrease current register pressure for
> +   register REGNO.  */
> +static void
> +change_pressure (int regno, bool incr_p)
> +{
> +  int nregs;
> +  enum reg_class pressure_class;
> +
> +  pressure_class = get_regno_pressure_class (regno, &nregs);
> +  if (! incr_p)
> +    curr_reg_pressure[pressure_class] -= nregs;
> +  else
> +    {
> +      curr_reg_pressure[pressure_class] += nregs;
> +      if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
> +         < curr_reg_pressure[pressure_class])
> +       LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
> +         = curr_reg_pressure[pressure_class];
> +    }
> +}
> +
> +/* Mark REGNO birth.  */
> +static void
> +mark_regno_live (int regno)
> +{
> +  class loop *loop;
> +
> +  for (loop = curr_loop;
> +       loop != current_loops->tree_root;
> +       loop = loop_outer (loop))
> +    bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
> +  if (!bitmap_set_bit (&curr_regs_live, regno))
> +    return;
> +  change_pressure (regno, true);
> +}
> +
> +/* Mark REGNO death.  */
> +static void
> +mark_regno_death (int regno)
> +{
> +  if (! bitmap_clear_bit (&curr_regs_live, regno))
> +    return;
> +  change_pressure (regno, false);
> +}
> +
> +/* Mark setting register REG.  */
> +static void
> +mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
> +               void *data ATTRIBUTE_UNUSED)
> +{
> +  if (GET_CODE (reg) == SUBREG)
> +    reg = SUBREG_REG (reg);
> +
> +  if (! REG_P (reg))
> +    return;
> +
> +  regs_set[n_regs_set++] = reg;
> +
> +  unsigned int end_regno = END_REGNO (reg);
> +  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
> +    mark_regno_live (regno);
> +}
> +
> +/* Mark clobbering register REG.  */
> +static void
> +mark_reg_clobber (rtx reg, const_rtx setter, void *data)
> +{
> +  if (GET_CODE (setter) == CLOBBER)
> +    mark_reg_store (reg, setter, data);
> +}
> +
> +/* Mark register REG death.  */
> +static void
> +mark_reg_death (rtx reg)
> +{
> +  unsigned int end_regno = END_REGNO (reg);
> +  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
> +    mark_regno_death (regno);
> +}
> +
> +/* Mark occurrence of registers in X for the current loop.  */
> +static void
> +mark_ref_regs (rtx x)
> +{
> +  RTX_CODE code;
> +  int i;
> +  const char *fmt;
> +
> +  if (!x)
> +    return;
> +
> +  code = GET_CODE (x);
> +  if (code == REG)
> +    {
> +      class loop *loop;
> +
> +      for (loop = curr_loop;
> +          loop != current_loops->tree_root;
> +          loop = loop_outer (loop))
> +       bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
> +      return;
> +    }
> +
> +  fmt = GET_RTX_FORMAT (code);
> +  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
> +    if (fmt[i] == 'e')
> +      mark_ref_regs (XEXP (x, i));
> +    else if (fmt[i] == 'E')
> +      {
> +       int j;
> +
> +       for (j = 0; j < XVECLEN (x, i); j++)
> +         mark_ref_regs (XVECEXP (x, i, j));
> +      }
> +}
> +
> +/* Calculate register pressure in the loops.  */
> +static void
> +calculate_loop_reg_pressure (void)
> +{
> +  int i;
> +  unsigned int j;
> +  bitmap_iterator bi;
> +  basic_block bb;
> +  rtx_insn *insn;
> +  rtx link;
> +  class loop *parent;
> +
> +  for (auto loop : loops_list (cfun, 0))
> +    if (loop->aux == NULL)
> +      {
> +       loop->aux = xcalloc (1, sizeof (class loop_data));
> +       bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
> +       bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
> +      }
> +  ira_setup_eliminable_regset ();
> +  bitmap_initialize (&curr_regs_live, &reg_obstack);
> +  FOR_EACH_BB_FN (bb, cfun)
> +    {
> +      curr_loop = bb->loop_father;
> +      if (curr_loop == current_loops->tree_root)
> +       continue;
> +
> +      for (class loop *loop = curr_loop;
> +          loop != current_loops->tree_root;
> +          loop = loop_outer (loop))
> +       bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
> +
> +      bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
> +      for (i = 0; i < ira_pressure_classes_num; i++)
> +       curr_reg_pressure[ira_pressure_classes[i]] = 0;
> +      EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
> +       change_pressure (j, true);
> +
> +      FOR_BB_INSNS (bb, insn)
> +       {
> +         if (! NONDEBUG_INSN_P (insn))
> +           continue;
> +
> +         mark_ref_regs (PATTERN (insn));
> +         n_regs_set = 0;
> +         note_stores (insn, mark_reg_clobber, NULL);
> +
> +         /* Mark any registers dead after INSN as dead now.  */
> +
> +         for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
> +           if (REG_NOTE_KIND (link) == REG_DEAD)
> +             mark_reg_death (XEXP (link, 0));
> +
> +         /* Mark any registers set in INSN as live,
> +            and mark them as conflicting with all other live regs.
> +            Clobbers are processed again, so they conflict with
> +            the registers that are set.  */
> +
> +         note_stores (insn, mark_reg_store, NULL);
> +
> +         if (AUTO_INC_DEC)
> +           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
> +             if (REG_NOTE_KIND (link) == REG_INC)
> +               mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
> +
> +         while (n_regs_set-- > 0)
> +           {
> +             rtx note = find_regno_note (insn, REG_UNUSED,
> +                                         REGNO (regs_set[n_regs_set]));
> +             if (! note)
> +               continue;
> +
> +             mark_reg_death (XEXP (note, 0));
> +           }
> +       }
> +    }
> +  bitmap_release (&curr_regs_live);
> +  if (dump_file == NULL)
> +    return;
> +  for (auto loop : loops_list (cfun, 0))
> +    {
> +      parent = loop_outer (loop);
> +      fprintf (dump_file, "\n  Loop %d (parent %d, header bb%d, depth %d)\n",
> +              loop->num, (parent == NULL ? -1 : parent->num),
> +              loop->header->index, loop_depth (loop));
> +      fprintf (dump_file, "\n    ref. regnos:");
> +      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
> +       fprintf (dump_file, " %d", j);
> +      fprintf (dump_file, "\n    live regnos:");
> +      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
> +       fprintf (dump_file, " %d", j);
> +      fprintf (dump_file, "\n    Pressure:");
> +      for (i = 0; (int) i < ira_pressure_classes_num; i++)
> +       {
> +         enum reg_class pressure_class;
> +
> +         pressure_class = ira_pressure_classes[i];
> +         if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
> +           continue;
> +         fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
> +                  LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
> +       }
> +      fprintf (dump_file, "\n");
> +    }
> +}
> +
>  /* Unroll LOOPS.  */
>  void
>  unroll_loops (int flags)
>  {
>    bool changed = false;
> +  df_analyze ();
> +  calculate_loop_reg_pressure ();
>
>    /* Now decide rest of unrolling.  */
>    decide_unrolling (flags);
> @@ -339,6 +627,40 @@ loop_exit_at_end_p (class loop *loop)
>    return true;
>  }
>
> +/* Determine unroll factor for a given LOOP.  Unroll factor
> +   is calculated with quotient of maximum number of available registers
> +   with number of liveness.
> +   If available registers is more unrolling factor is more.  And if
> +   number of liveness is more unroll factor is less.  */
> +int
> +determine_unroll_factor (class loop  *loop)
> +{
> +  int nunroll = 0;
> +  int avail = 0;
> +  int max_pressure = 0;
> +  int num_live = 0;
> +  for (int i = 0; (int) i < ira_pressure_classes_num; i++)
> +    {
> +      enum reg_class pressure_class;
> +
> +      pressure_class = ira_pressure_classes[i];
> +      max_pressure = LOOP_DATA (loop)->max_reg_pressure[pressure_class];
> +      if (max_pressure == 0)
> +       continue;
> +
> +      if (((ira_class_hard_regs_num[pressure_class] - max_pressure) > avail)
> +         || avail == 0)
> +       avail = ira_class_hard_regs_num[pressure_class] - max_pressure;
> +
> +      num_live++;
> +    }
> +
> +  if (avail > 0 && num_live > 0)
> +    nunroll = avail / num_live;
> +
> +  return nunroll;
> +}
> +
>  /* Decide whether to unroll LOOP iterating constant number of times
>     and how much.  */
>
> @@ -357,10 +679,11 @@ decide_unroll_constant_iterations (class loop *loop, 
> int flags)
>      dump_printf (MSG_NOTE,
>                  "considering unrolling loop with constant "
>                  "number of iterations\n");
> -
>    /* nunroll = total number of copies of the original loop body in
>       unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  
> */
> -  nunroll = param_max_unrolled_insns / loop->ninsns;
> +  nunroll = determine_unroll_factor (loop);
> +  if (nunroll <= 0)
> +    nunroll = param_max_unrolled_insns / loop->ninsns;
>    nunroll_by_av
>      = param_max_average_unrolled_insns / loop->av_ninsns;
>    if (nunroll > nunroll_by_av)
> @@ -678,7 +1001,9 @@ decide_unroll_runtime_iterations (class loop *loop, int 
> flags)
>
>    /* nunroll = total number of copies of the original loop body in
>       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
> -  nunroll = param_max_unrolled_insns / loop->ninsns;
> +  nunroll = determine_unroll_factor (loop);
> +  if (nunroll <= 0)
> +    nunroll = param_max_unrolled_insns / loop->ninsns;
>    nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
>    if (nunroll > nunroll_by_av)
>      nunroll = nunroll_by_av;
> --
> 2.43.5
>

Reply via email to