Hi! As I've mentioned during the weekend, currently all functions that use 32-byte vectors end up with frame pointer and stack realignment (except when they have some 32-byte aligned argument, but that means 9+ arguments, so quite unlikely). The stack realignment is there just in case, if reloaded wanted to spill anything.
This patch attempts to improve the easy case, where the function doesn't touch the stack at all and the only reason for frame pointer was the conservative guess on stack realignment need. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? PS: It would be nice if stack_alignment_needed was only set on actual needs rather than estimations and stack_alignment_estimated used for the latter (e.g. currently expand_one_var does adjust stack_alignment_needed always, I'd hope it could do it only when actually putting the var into stack), then while we would still need to assume that perhaps stack realignment might be needed during reload, if we don't spill anything requiring 32-byte alignment during reload we could perhaps keep the frame pointer, but at least optimize away the stack anding in even more cases. 2011-11-07 Jakub Jelinek <ja...@redhat.com> * function.h (requires_stack_frame_p): New prototype. * function.c (requires_stack_frame_p): No longer static. * config/i386/i386.c (ix86_finalize_stack_realign_flags): If stack_realign_fp was just a conservative guess for a function which doesn't use sp/fp/argp at all, clear frame_pointer_needed and stack realignment. --- gcc/function.h.jj 2011-11-07 12:40:36.000000000 +0100 +++ gcc/function.h 2011-11-07 18:18:57.000000000 +0100 @@ -753,6 +753,10 @@ extern void used_types_insert (tree); extern int get_next_funcdef_no (void); extern int get_last_funcdef_no (void); +#ifdef HAVE_simple_return +extern bool requires_stack_frame_p (rtx, HARD_REG_SET, HARD_REG_SET); +#endif + /* In predict.c */ extern bool optimize_function_for_size_p (struct function *); extern bool optimize_function_for_speed_p (struct function *); --- gcc/function.c.jj 2011-11-07 12:40:36.000000000 +0100 +++ gcc/function.c 2011-11-07 18:18:57.000000000 +0100 @@ -5282,7 +5282,7 @@ prologue_epilogue_contains (const_rtx in PROLOGUE_USED contains the hard registers used in the function prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the prologue to set up for the function. */ -static bool +bool requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used, HARD_REG_SET set_up_by_prologue) { --- gcc/config/i386/i386.c.jj 2011-11-07 18:19:49.000000000 +0100 +++ gcc/config/i386/i386.c 2011-11-07 19:02:06.000000000 +0100 @@ -9894,12 +9894,68 @@ ix86_finalize_stack_realign_flags (void) /* After stack_realign_needed is finalized, we can't no longer change it. */ gcc_assert (crtl->stack_realign_needed == stack_realign); + return; } - else - { - crtl->stack_realign_needed = stack_realign; - crtl->stack_realign_finalized = true; + + /* If the only reason for frame_pointer_needed is that we conservatively + assumed stack realignment might be needed, but in the end nothing that + needed the stack alignment had been spilled, clear frame_pointer_needed + and say we don't need stack realignment. */ + if (stack_realign + && !crtl->need_drap + && frame_pointer_needed + && current_function_is_leaf + && flag_omit_frame_pointer + && current_function_sp_is_unchanging + && !ix86_current_function_calls_tls_descriptor + && !crtl->accesses_prior_frames + && !cfun->calls_alloca + && !crtl->calls_eh_return + && !(flag_stack_check && STACK_CHECK_MOVING_SP) + && !ix86_frame_pointer_required () + && get_frame_size () == 0 + && ix86_nsaved_sseregs () == 0 + && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) + { + HARD_REG_SET set_up_by_prologue, prologue_used; + basic_block bb; + + CLEAR_HARD_REG_SET (prologue_used); + CLEAR_HARD_REG_SET (set_up_by_prologue); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); + add_to_hard_reg_set (&set_up_by_prologue, Pmode, + HARD_FRAME_POINTER_REGNUM); + FOR_EACH_BB (bb) + { + rtx insn; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn) + && requires_stack_frame_p (insn, prologue_used, + set_up_by_prologue)) + { + crtl->stack_realign_needed = stack_realign; + crtl->stack_realign_finalized = true; + return; + } + } + + frame_pointer_needed = false; + stack_realign = false; + crtl->max_used_stack_slot_alignment = incoming_stack_boundary; + crtl->stack_alignment_needed = incoming_stack_boundary; + crtl->stack_alignment_estimated = incoming_stack_boundary; + if (crtl->preferred_stack_boundary > incoming_stack_boundary) + crtl->preferred_stack_boundary = incoming_stack_boundary; + df_finish_pass (true); + df_scan_alloc (NULL); + df_scan_blocks (); + df_compute_regs_ever_live (true); + df_analyze (); } + + crtl->stack_realign_needed = stack_realign; + crtl->stack_realign_finalized = true; } /* Expand the prologue into a bunch of separate insns. */ Jakub