Bernd Schmidt <[email protected]> writes:
> +/* A for_each_rtx subroutine of record_hard_reg_sets. */
> +static int
> +record_hard_reg_uses_1 (rtx *px, void *data)
> +{
> + rtx x = *px;
> + HARD_REG_SET *pused = (HARD_REG_SET *)data;
> +
> + if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER)
> + {
> + int nregs = hard_regno_nregs[REGNO (x)][GET_MODE (x)];
> + while (nregs-- > 0)
> + SET_HARD_REG_BIT (*pused, REGNO (x) + nregs);
> + }
add_to_hard_reg_set (pused, GET_MODE (x), REGNO (x));
> +/* A subroutine of requires_stack_frame_p, called via for_each_rtx.
> + If any change is made, set CHANGED
> + to true. */
Strange line break, and comment doesn't match code (no "changed" variable).
Probably moot though because:
> +/* Return true if INSN requires the stack frame to be set up.
> + PROLOGUE_USED contains the hard registers used in the function
> + prologue. */
> +static bool
> +requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used)
> +{
>[...]
> + if (for_each_rtx (&PATTERN (insn), frame_required_for_rtx, NULL))
> + return true;
> + CLEAR_HARD_REG_SET (hardregs);
> + note_stores (PATTERN (insn), record_hard_reg_sets, &hardregs);
> + if (hard_reg_set_intersect_p (hardregs, prologue_used))
> + return true;
> + AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
> + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> + if (TEST_HARD_REG_BIT (hardregs, regno)
> + && df_regs_ever_live_p (regno))
> + return true;
...I suppose this ought to use DF instead.
Does something guarantee that non-local gotos are marked as
needing a frame?
Why do we need to check specifically for pic_offset_table_rtx? Isn't it
handled by the generic "live registers set by the prologue" test?
Reason for asking is that pic_offset_table_rtx can be a global value,
such as for mips*-elf.
> -/* Insert gen_return at the end of block BB. This also means updating
> - block_for_insn appropriately. */
> +
> +static rtx
> +gen_return_pattern (bool simple_p)
> +{
> +#ifdef HAVE_simple_return
> + return simple_p ? gen_simple_return () : gen_return ();
> +#else
> + gcc_assert (!simple_p);
> + return gen_return ();
> +#endif
> +}
Missing function comment.
> +
> +/* Insert an appropriate return pattern at the end of block BB. This
> + also means updating block_for_insn appropriately. */
>
> static void
> -emit_return_into_block (basic_block bb)
> +emit_return_into_block (bool simple_p, basic_block bb)
Pedantic, but the comment should reference SIMPLE_P.
> +#ifdef HAVE_simple_return
> + /* Try to perform a kind of shrink-wrapping, making sure the
> + prologue/epilogue is emitted only around those parts of the
> + function that require it. */
> +
> + if (flag_shrink_wrap && HAVE_simple_return && !flag_non_call_exceptions
> + && HAVE_prologue && !crtl->calls_eh_return)
> + {
Maybe it should be obvious, but could you add a comment explaining why
flag_non_call_exceptions and crtl->calls_eh_return need to be checked
explicitly? I can see why we don't want to optimise functions that
call eh_return, but I was curious why it wasn't handled by the general
insn-level analysis.
Would checking prologue_seq != NULL be better than HAVE_prologue?
> + HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
> + rtx p_insn;
> +
> + VEC(basic_block, heap) *vec;
> + basic_block bb;
> + bitmap_head bb_antic_flags;
> + bitmap_head bb_on_list;
> +
> + /* Compute the registers set and used in the prologue. */
> + CLEAR_HARD_REG_SET (prologue_clobbered);
> + CLEAR_HARD_REG_SET (prologue_used);
> + for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
> + {
> + HARD_REG_SET this_used;
> + if (!NONDEBUG_INSN_P (p_insn))
> + continue;
> +
> + CLEAR_HARD_REG_SET (this_used);
> + note_uses (&PATTERN (p_insn), record_hard_reg_uses,
> + &this_used);
> + AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
> + IOR_HARD_REG_SET (prologue_used, this_used);
> + note_stores (PATTERN (p_insn), record_hard_reg_sets,
> + &prologue_clobbered);
> + }
Should this iterate over split_prologue_seq as well?
Could we combine prologue_seq and split_prologue_seq into a single sequence?
> + bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
> + bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
> +
> + vec = VEC_alloc (basic_block, heap, n_basic_blocks);
> +
> + FOR_EACH_BB (bb)
> + {
> + rtx insn;
> + FOR_BB_INSNS (bb, insn)
> + {
> + if (requires_stack_frame_p (insn, prologue_used))
> + {
> + bitmap_set_bit (&bb_flags, bb->index);
> + VEC_quick_push (basic_block, vec, bb);
> + break;
> + }
> + }
Excess { and } in for loop.
> + }
> +
> + /* For every basic block that needs a prologue, mark all blocks
> + reachable from it, so as to ensure they are also seen as
> + requiring a prologue. */
> + while (!VEC_empty (basic_block, vec))
> + {
> + basic_block tmp_bb = VEC_pop (basic_block, vec);
> + edge e;
> + edge_iterator ei;
> + FOR_EACH_EDGE (e, ei, tmp_bb->succs)
> + {
> + if (e->dest == EXIT_BLOCK_PTR
> + || bitmap_bit_p (&bb_flags, e->dest->index))
> + continue;
> + bitmap_set_bit (&bb_flags, e->dest->index);
> + VEC_quick_push (basic_block, vec, e->dest);
> + }
Don't know which is the preferred style, but:
if (e->dest != EXIT_BLOCK_PTR
&& bitmap_set_bit (&bb_flags, e->dest->index))
VEC_quick_push (basic_block, vec, e->dest);
should be a little more efficient.
> + /* Now walk backwards from every block that is marked as needing
> + a prologue to compute the bb_antic_flags bitmap. */
> + bitmap_copy (&bb_antic_flags, &bb_flags);
> + FOR_EACH_BB (bb)
> + {
> + edge e;
> + edge_iterator ei;
> + if (!bitmap_bit_p (&bb_flags, bb->index))
> + continue;
> + FOR_EACH_EDGE (e, ei, bb->preds)
> + if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
> + {
> + VEC_quick_push (basic_block, vec, e->src);
> + bitmap_set_bit (&bb_on_list, e->src->index);
> + }
Here too I think we want:
FOR_EACH_EDGE (e, ei, bb->preds)
if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
&& bitmap_set_bit (&bb_on_list, e->src->index))
VEC_quick_push (basic_block, vec, e->src);
in this case to avoid pushing the same thing twice.
> + bitmap_clear_bit (&bb_on_list, tmp_bb->index);
> + FOR_EACH_EDGE (e, ei, tmp_bb->succs)
> + {
> + if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
> + {
> + all_set = false;
> + break;
> + }
> + }
Excess { and } in for loop.
> + if (all_set)
> + {
> + bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
> + FOR_EACH_EDGE (e, ei, tmp_bb->preds)
> + if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
> + {
> + VEC_quick_push (basic_block, vec, e->src);
> + bitmap_set_bit (&bb_on_list, e->src->index);
> + }
> + }
Same:
FOR_EACH_EDGE (e, ei, tmp_bb->preds)
if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
&& bitmap_set_bit (&bb_on_list, e->src->index))
VEC_quick_push (basic_block, vec, e->src);
comment as above.
> + /* Test whether the prologue is known to clobber any register
> + (other than FP or SP) which are live on the edge. */
> + CLEAR_HARD_REG_SET (prologue_clobbered);
> + for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
> + if (NONDEBUG_INSN_P (p_insn))
> + note_stores (PATTERN (p_insn), record_hard_reg_sets,
> + &prologue_clobbered);
> + for (p_insn = split_prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
> + if (NONDEBUG_INSN_P (p_insn))
> + note_stores (PATTERN (p_insn), record_hard_reg_sets,
> + &prologue_clobbered);
> + CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
> + if (frame_pointer_needed)
> + CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
Seems like we should be able to reuse the insn walk from the beginning
of the enclosing if statement.
> +#ifdef HAVE_simple_return
> + simple_p = (entry_edge != orig_entry_edge
> + ? !bitmap_bit_p (&bb_flags, bb->index) : false);
> +#else
> + simple_p = false;
> +#endif
Why is this bb_flags rather than bb_antic_flags?
simple_p = (entry_edge != orig_entry_edge
&& !bitmap_bit_p (&bb_flags, bb->index));
seems more readable, but I suppose that's personal taste.
> + gcc_assert (simple_p);
> + new_bb = split_edge (e);
> + emit_barrier_after (BB_END (new_bb));
> + emit_return_into_block (simple_p, new_bb);
> +#ifdef HAVE_simple_return
> + if (simple_p)
Check is redundant given the gcc_assert.
> + /* If this block has only one successor, it both jumps
> + and falls through to the fallthru block, so we can't
> + delete the edge. */
> + if (single_succ_p (bb))
> + continue;
Seems odd that this could happen in optimised code, but if it did,
wouldn't it invalidate the simple_p transformation? Seems like
the non-fallthrough edge would use a simple return but the
fallthrough one wouldn't.
> - start_sequence ();
> - emit_note (NOTE_INSN_EPILOGUE_BEG);
> - emit_insn (gen_sibcall_epilogue ());
> - seq = get_insns ();
> - end_sequence ();
> + ep_seq = gen_sibcall_epilogue ();
> + if (ep_seq)
> + {
Why the new null check?
Richard