On Mon, Oct 31, 2016 at 06:29:21PM +0000, Wilco Dijkstra wrote: > This patch cleans up all code related to the frame pointer. On AArch64 we > emit a frame chain even in cases where the frame pointer is not required. > So make this explicit by introducing a boolean emit_frame_chain in > aarch64_frame record. > > When the frame pointer is enabled but not strictly required (eg. no use of > alloca), we emit a frame chain in non-leaf functions, but continue to use the > stack pointer to access locals. This results in smaller code and unwind info. > > Also simplify the complex logic in aarch64_override_options_after_change_1 > and compute whether the frame chain is required in aarch64_layout_frame > instead. As a result aarch64_frame_pointer_required is now redundant and > aarch64_can_eliminate can be greatly simplified. > > Finally convert all callee save/restore functions to use gen_frame_mem. > > Bootstrap OK. Any comments?
I'd really appreciate a second pair of eyes on this, as I'm not comfortable that I know the area well enough to review the code. Jiong, if I remember correctly, you've got a good understanding of the AArch64 frame layout, would you mind taking a look at this patch if you have time? https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02515.html I note this is still marked as an RFC, are you now proposing it as a patch to be merged to trunk? Thanks, James > > ChangeLog: > 2016-10-31 Wilco Dijkstra <wdijk...@arm.com> > > gcc/ > * config/aarch64/aarch64.h (aarch64_frame): > Add emit_frame_chain boolean. > * config/aarch64/aarch64.c (aarch64_frame_pointer_required) > Remove. > (aarch64_layout_frame): Initialise emit_frame_chain. > (aarch64_pushwb_single_reg): Use gen_frame_mem. > (aarch64_pop_regs): Likewise. > (aarch64_gen_load_pair): Likewise. > (aarch64_save_callee_saves): Likewise. > (aarch64_restore_callee_saves): Likewise. > (aarch64_expand_prologue): Use emit_frame_chain. > (aarch64_can_eliminate): Simplify. When FP needed or outgoing > arguments are large, eliminate to FP, otherwise SP. > (aarch64_override_options_after_change_1): Simplify. > (TARGET_FRAME_POINTER_REQUIRED): Remove define. > > -- > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index > fa81e4b853dafcccc08842955288861ec7e7acca..6e32dc9f6f171dde0c182fdd7857230251f71712 > 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -583,6 +583,9 @@ struct GTY (()) aarch64_frame > /* The size of the stack adjustment after saving callee-saves. */ > HOST_WIDE_INT final_adjust; > > + /* Store FP,LR and setup a frame pointer. */ > + bool emit_frame_chain; > + > unsigned wb_candidate1; > unsigned wb_candidate2; > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > f07d771ea343803e054e03f59c8c1efb698bf474..6c06ac18d16f8afa7ee1cc5e8530e285a60e2b0f > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -2728,24 +2728,6 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2) > return ""; > } > > -static bool > -aarch64_frame_pointer_required (void) > -{ > - /* In aarch64_override_options_after_change > - flag_omit_leaf_frame_pointer turns off the frame pointer by > - default. Turn it back on now if we've not got a leaf > - function. */ > - if (flag_omit_leaf_frame_pointer > - && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))) > - return true; > - > - /* Force a frame pointer for EH returns so the return address is at FP+8. > */ > - if (crtl->calls_eh_return) > - return true; > - > - return false; > -} > - > /* Mark the registers that need to be saved by the callee and calculate > the size of the callee-saved registers area and frame record (both FP > and LR may be omitted). */ > @@ -2758,6 +2740,18 @@ aarch64_layout_frame (void) > if (reload_completed && cfun->machine->frame.laid_out) > return; > > + /* Force a frame chain for EH returns so the return address is at FP+8. */ > + cfun->machine->frame.emit_frame_chain > + = frame_pointer_needed || crtl->calls_eh_return; > + > + /* Emit a frame chain if the frame pointer is enabled. > + If -momit-leaf-frame-pointer is used, do not use a frame chain > + in leaf functions which do not use LR. */ > + if (flag_omit_frame_pointer == 2 > + && !(flag_omit_leaf_frame_pointer && crtl->is_leaf > + && !df_regs_ever_live_p (LR_REGNUM))) > + cfun->machine->frame.emit_frame_chain = true; > + > #define SLOT_NOT_REQUIRED (-2) > #define SLOT_REQUIRED (-1) > > @@ -2789,7 +2783,7 @@ aarch64_layout_frame (void) > && !call_used_regs[regno]) > cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; > > - if (frame_pointer_needed) > + if (cfun->machine->frame.emit_frame_chain) > { > /* FP and LR are placed in the linkage record. */ > cfun->machine->frame.reg_offset[R29_REGNUM] = 0; > @@ -2937,7 +2931,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned > regno, > reg = gen_rtx_REG (mode, regno); > mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, > plus_constant (Pmode, base_rtx, -adjustment)); > - mem = gen_rtx_MEM (mode, mem); > + mem = gen_frame_mem (mode, mem); > > insn = emit_move_insn (mem, reg); > RTX_FRAME_RELATED_P (insn) = 1; > @@ -3011,7 +3005,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, > HOST_WIDE_INT adjustment, > { > rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment); > mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); > - emit_move_insn (reg1, gen_rtx_MEM (mode, mem)); > + emit_move_insn (reg1, gen_frame_mem (mode, mem)); > } > else > { > @@ -3062,8 +3056,6 @@ aarch64_save_callee_saves (machine_mode mode, > HOST_WIDE_INT start_offset, > unsigned start, unsigned limit, bool skip_wb) > { > rtx_insn *insn; > - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed > - ? gen_frame_mem : gen_rtx_MEM); > unsigned regno; > unsigned regno2; > > @@ -3081,8 +3073,8 @@ aarch64_save_callee_saves (machine_mode mode, > HOST_WIDE_INT start_offset, > > reg = gen_rtx_REG (mode, regno); > offset = start_offset + cfun->machine->frame.reg_offset[regno]; > - mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, > - offset)); > + mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, > + offset)); > > regno2 = aarch64_next_callee_save (regno + 1, limit); > > @@ -3095,8 +3087,8 @@ aarch64_save_callee_saves (machine_mode mode, > HOST_WIDE_INT start_offset, > rtx mem2; > > offset = start_offset + cfun->machine->frame.reg_offset[regno2]; > - mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, > - offset)); > + mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, > + offset)); > insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, > reg2)); > > @@ -3120,8 +3112,6 @@ aarch64_restore_callee_saves (machine_mode mode, > unsigned limit, bool skip_wb, rtx *cfi_ops) > { > rtx base_rtx = stack_pointer_rtx; > - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed > - ? gen_frame_mem : gen_rtx_MEM); > unsigned regno; > unsigned regno2; > HOST_WIDE_INT offset; > @@ -3139,7 +3129,7 @@ aarch64_restore_callee_saves (machine_mode mode, > > reg = gen_rtx_REG (mode, regno); > offset = start_offset + cfun->machine->frame.reg_offset[regno]; > - mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); > + mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); > > regno2 = aarch64_next_callee_save (regno + 1, limit); > > @@ -3151,7 +3141,7 @@ aarch64_restore_callee_saves (machine_mode mode, > rtx mem2; > > offset = start_offset + cfun->machine->frame.reg_offset[regno2]; > - mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); > + mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); > emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); > > *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); > @@ -3217,6 +3207,7 @@ aarch64_expand_prologue (void) > HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; > unsigned reg1 = cfun->machine->frame.wb_candidate1; > unsigned reg2 = cfun->machine->frame.wb_candidate2; > + bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; > rtx_insn *insn; > > if (flag_stack_usage_info) > @@ -3239,7 +3230,7 @@ aarch64_expand_prologue (void) > if (callee_adjust != 0) > aarch64_push_regs (reg1, reg2, callee_adjust); > > - if (frame_pointer_needed) > + if (emit_frame_chain) > { > if (callee_adjust == 0) > aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM, > @@ -3247,12 +3238,12 @@ aarch64_expand_prologue (void) > insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, > stack_pointer_rtx, > GEN_INT (callee_offset))); > - RTX_FRAME_RELATED_P (insn) = 1; > + RTX_FRAME_RELATED_P (insn) = frame_pointer_needed; > emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); > } > > aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, > - callee_adjust != 0 || frame_pointer_needed); > + callee_adjust != 0 || emit_frame_chain); > aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, > callee_adjust != 0 || frame_pointer_needed); > aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); > @@ -5157,36 +5148,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, > rtx x, > } > > static bool > -aarch64_can_eliminate (const int from, const int to) > +aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) > { > - /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into > - HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ > - > - if (frame_pointer_needed) > - { > - if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) > - return true; > - if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) > - return false; > - if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM > - && !cfun->calls_alloca) > - return true; > - if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) > - return true; > - > - return false; > - } > - else > - { > - /* If we decided that we didn't need a leaf frame pointer but then used > - LR in the function, then we'll want a frame pointer after all, so > - prevent this elimination to ensure a frame pointer is used. */ > - if (to == STACK_POINTER_REGNUM > - && flag_omit_leaf_frame_pointer > - && df_regs_ever_live_p (LR_REGNUM)) > - return false; > - } > - > + /* If we need a frame pointer, eliminate to HARD_FRAME_POINTER_REGNUM. > + Use FP as well as with a large number of outgoing arguments so > + that stack offsets are smaller - this may generate better code. */ > + if (frame_pointer_needed || crtl->outgoing_args_size > 64) > + return to == HARD_FRAME_POINTER_REGNUM; > return true; > } > > @@ -8112,24 +8080,13 @@ aarch64_parse_override_string (const char* > input_string, > static void > aarch64_override_options_after_change_1 (struct gcc_options *opts) > { > - /* The logic here is that if we are disabling all frame pointer generation > - then we do not need to disable leaf frame pointer generation as a > - separate operation. But if we are *only* disabling leaf frame pointer > - generation then we set flag_omit_frame_pointer to true, but in > - aarch64_frame_pointer_required we return false only for leaf functions. > - > - PR 70044: We have to be careful about being called multiple times for > the > - same function. Once we have decided to set flag_omit_frame_pointer just > - so that we can omit leaf frame pointers, we must then not interpret a > - second call as meaning that all frame pointer generation should be > - omitted. We do this by setting flag_omit_frame_pointer to a special, > - non-zero value. */ > - if (opts->x_flag_omit_frame_pointer == 2) > - opts->x_flag_omit_frame_pointer = 0; > - > - if (opts->x_flag_omit_frame_pointer) > - opts->x_flag_omit_leaf_frame_pointer = false; > - else if (opts->x_flag_omit_leaf_frame_pointer) > + /* PR 70044: We have to be careful about being called multiple times for > the > + same function. This means all changes should be repeatable. */ > + > + /* If the frame pointer is enabled, set the flag to a special value. > + To implement -momit-leaf-frame-pointer this special value is checked in > + aarch64_layout_frame. The frame chain is emitted only when required. > */ > + if (opts->x_flag_omit_frame_pointer == 0) > opts->x_flag_omit_frame_pointer = 2; > > /* If not optimizing for size, set the default > @@ -14126,9 +14083,6 @@ aarch64_optab_supported_p (int op, machine_mode > mode1, machine_mode, > #undef TARGET_FUNCTION_VALUE_REGNO_P > #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p > > -#undef TARGET_FRAME_POINTER_REQUIRED > -#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required > - > #undef TARGET_GIMPLE_FOLD_BUILTIN > #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin >