Re: Initial shrink-wrapping patch

Richard Sandiford Thu, 01 Sep 2011 06:57:36 -0700

Bernd Schmidt <[email protected]> writes:
> +/* A for_each_rtx subroutine of record_hard_reg_sets.  */
> +static int
> +record_hard_reg_uses_1 (rtx *px, void *data)
> +{
> +  rtx x = *px;
> +  HARD_REG_SET *pused = (HARD_REG_SET *)data;
> +
> +  if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER)
> +    {
> +      int nregs = hard_regno_nregs[REGNO (x)][GET_MODE (x)];
> +      while (nregs-- > 0)
> +     SET_HARD_REG_BIT (*pused, REGNO (x) + nregs);
> +    }


add_to_hard_reg_set (pused, GET_MODE (x), REGNO (x));

> +/* A subroutine of requires_stack_frame_p, called via for_each_rtx.
> +   If any change is made, set CHANGED
> +   to true.  */

Strange line break, and comment doesn't match code (no "changed" variable).
Probably moot though because:

> +/* Return true if INSN requires the stack frame to be set up.
> +   PROLOGUE_USED contains the hard registers used in the function
> +   prologue.  */
> +static bool
> +requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used)
> +{
>[...]
> +  if (for_each_rtx (&PATTERN (insn), frame_required_for_rtx, NULL))
> +    return true;
> +  CLEAR_HARD_REG_SET (hardregs);
> +  note_stores (PATTERN (insn), record_hard_reg_sets, &hardregs);
> +  if (hard_reg_set_intersect_p (hardregs, prologue_used))
> +    return true;
> +  AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
> +  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +    if (TEST_HARD_REG_BIT (hardregs, regno)
> +     && df_regs_ever_live_p (regno))
> +      return true;

...I suppose this ought to use DF instead.

Does something guarantee that non-local gotos are marked as
needing a frame?

Why do we need to check specifically for pic_offset_table_rtx?  Isn't it
handled by the generic "live registers set by the prologue" test?
Reason for asking is that pic_offset_table_rtx can be a global value,
such as for mips*-elf.

> -/* Insert gen_return at the end of block BB.  This also means updating
> -   block_for_insn appropriately.  */
> +
> +static rtx
> +gen_return_pattern (bool simple_p)
> +{
> +#ifdef HAVE_simple_return
> +  return simple_p ? gen_simple_return () : gen_return ();
> +#else
> +  gcc_assert (!simple_p);
> +  return gen_return ();
> +#endif
> +}

Missing function comment.

> +
> +/* Insert an appropriate return pattern at the end of block BB.  This
> +   also means updating block_for_insn appropriately.  */
>  
>  static void
> -emit_return_into_block (basic_block bb)
> +emit_return_into_block (bool simple_p, basic_block bb)

Pedantic, but the comment should reference SIMPLE_P.

> +#ifdef HAVE_simple_return
> +  /* Try to perform a kind of shrink-wrapping, making sure the
> +     prologue/epilogue is emitted only around those parts of the
> +     function that require it.  */
> +
> +  if (flag_shrink_wrap && HAVE_simple_return && !flag_non_call_exceptions
> +      && HAVE_prologue && !crtl->calls_eh_return)
> +    {

Maybe it should be obvious, but could you add a comment explaining why
flag_non_call_exceptions and crtl->calls_eh_return need to be checked
explicitly?  I can see why we don't want to optimise functions that
call eh_return, but I was curious why it wasn't handled by the general
insn-level analysis.

Would checking prologue_seq != NULL be better than HAVE_prologue?

> +      HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
> +      rtx p_insn;
> +
> +      VEC(basic_block, heap) *vec;
> +      basic_block bb;
> +      bitmap_head bb_antic_flags;
> +      bitmap_head bb_on_list;
> +
> +      /* Compute the registers set and used in the prologue.  */
> +      CLEAR_HARD_REG_SET (prologue_clobbered);
> +      CLEAR_HARD_REG_SET (prologue_used);
> +      for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
> +     {
> +       HARD_REG_SET this_used;
> +       if (!NONDEBUG_INSN_P (p_insn))
> +         continue;
> +
> +       CLEAR_HARD_REG_SET (this_used);
> +       note_uses (&PATTERN (p_insn), record_hard_reg_uses,
> +                  &this_used);
> +       AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
> +       IOR_HARD_REG_SET (prologue_used, this_used);
> +       note_stores (PATTERN (p_insn), record_hard_reg_sets,
> +                    &prologue_clobbered);
> +     }

Should this iterate over split_prologue_seq as well?

Could we combine prologue_seq and split_prologue_seq into a single sequence?

> +      bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
> +      bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
> +
> +      vec = VEC_alloc (basic_block, heap, n_basic_blocks);
> +
> +      FOR_EACH_BB (bb)
> +     {
> +       rtx insn;
> +       FOR_BB_INSNS (bb, insn)
> +         {
> +           if (requires_stack_frame_p (insn, prologue_used))
> +             {
> +               bitmap_set_bit (&bb_flags, bb->index);
> +               VEC_quick_push (basic_block, vec, bb);
> +               break;
> +             }
> +         }

Excess { and } in for loop.

> +     }
> +
> +      /* For every basic block that needs a prologue, mark all blocks
> +      reachable from it, so as to ensure they are also seen as
> +      requiring a prologue.  */
> +      while (!VEC_empty (basic_block, vec))
> +     {
> +       basic_block tmp_bb = VEC_pop (basic_block, vec);
> +       edge e;
> +       edge_iterator ei;
> +       FOR_EACH_EDGE (e, ei, tmp_bb->succs)
> +         {
> +           if (e->dest == EXIT_BLOCK_PTR
> +               || bitmap_bit_p (&bb_flags, e->dest->index))
> +             continue;
> +           bitmap_set_bit (&bb_flags, e->dest->index);
> +           VEC_quick_push (basic_block, vec, e->dest);
> +         }

Don't know which is the preferred style, but:

    if (e->dest != EXIT_BLOCK_PTR
        && bitmap_set_bit (&bb_flags, e->dest->index))
      VEC_quick_push (basic_block, vec, e->dest);

should be a little more efficient.

> +      /* Now walk backwards from every block that is marked as needing
> +      a prologue to compute the bb_antic_flags bitmap.  */
> +      bitmap_copy (&bb_antic_flags, &bb_flags);
> +      FOR_EACH_BB (bb)
> +     {
> +       edge e;
> +       edge_iterator ei;
> +       if (!bitmap_bit_p (&bb_flags, bb->index))
> +         continue;
> +       FOR_EACH_EDGE (e, ei, bb->preds)
> +         if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
> +           {
> +             VEC_quick_push (basic_block, vec, e->src);
> +             bitmap_set_bit (&bb_on_list, e->src->index);
> +           }

Here too I think we want:

          FOR_EACH_EDGE (e, ei, bb->preds)
            if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
                && bitmap_set_bit (&bb_on_list, e->src->index))
              VEC_quick_push (basic_block, vec, e->src);

in this case to avoid pushing the same thing twice.

> +       bitmap_clear_bit (&bb_on_list, tmp_bb->index);
> +       FOR_EACH_EDGE (e, ei, tmp_bb->succs)
> +         {
> +           if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
> +             {
> +               all_set = false;
> +               break;
> +             }
> +         }

Excess { and } in for loop.

> +       if (all_set)
> +         {
> +           bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
> +           FOR_EACH_EDGE (e, ei, tmp_bb->preds)
> +             if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
> +               {
> +                 VEC_quick_push (basic_block, vec, e->src);
> +                 bitmap_set_bit (&bb_on_list, e->src->index);
> +               }
> +         }

Same:

          FOR_EACH_EDGE (e, ei, tmp_bb->preds)
            if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
                && bitmap_set_bit (&bb_on_list, e->src->index))
              VEC_quick_push (basic_block, vec, e->src);

comment as above.

> +      /* Test whether the prologue is known to clobber any register
> +      (other than FP or SP) which are live on the edge.  */
> +      CLEAR_HARD_REG_SET (prologue_clobbered);
> +      for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
> +     if (NONDEBUG_INSN_P (p_insn))
> +       note_stores (PATTERN (p_insn), record_hard_reg_sets,
> +                    &prologue_clobbered);
> +      for (p_insn = split_prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
> +     if (NONDEBUG_INSN_P (p_insn))
> +       note_stores (PATTERN (p_insn), record_hard_reg_sets,
> +                    &prologue_clobbered);
> +      CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
> +      if (frame_pointer_needed)
> +     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);

Seems like we should be able to reuse the insn walk from the beginning
of the enclosing if statement.

> +#ifdef HAVE_simple_return
> +       simple_p = (entry_edge != orig_entry_edge
> +                   ? !bitmap_bit_p (&bb_flags, bb->index) : false);
> +#else
> +       simple_p = false;
> +#endif

Why is this bb_flags rather than bb_antic_flags?

          simple_p = (entry_edge != orig_entry_edge
                      && !bitmap_bit_p (&bb_flags, bb->index));

seems more readable, but I suppose that's personal taste.

> +           gcc_assert (simple_p);
> +           new_bb = split_edge (e);
> +           emit_barrier_after (BB_END (new_bb));
> +           emit_return_into_block (simple_p, new_bb);
> +#ifdef HAVE_simple_return
> +           if (simple_p)

Check is redundant given the gcc_assert.


> +           /* If this block has only one successor, it both jumps
> +              and falls through to the fallthru block, so we can't
> +              delete the edge.  */
> +           if (single_succ_p (bb))
> +             continue;

Seems odd that this could happen in optimised code, but if it did,
wouldn't it invalidate the simple_p transformation?  Seems like
the non-fallthrough edge would use a simple return but the
fallthrough one wouldn't.

> -      start_sequence ();
> -      emit_note (NOTE_INSN_EPILOGUE_BEG);
> -      emit_insn (gen_sibcall_epilogue ());
> -      seq = get_insns ();
> -      end_sequence ();
> +      ep_seq = gen_sibcall_epilogue ();
> +      if (ep_seq)
> +     {

Why the new null check?

Richard

Re: Initial shrink-wrapping patch

Reply via email to