On Mon, Oct 31, 2016 at 06:29:21PM +0000, Wilco Dijkstra wrote:
> This patch cleans up all code related to the frame pointer.  On AArch64 we
> emit a frame chain even in cases where the frame pointer is not required.
> So make this explicit by introducing a boolean emit_frame_chain in
> aarch64_frame record.
> 
> When the frame pointer is enabled but not strictly required (eg. no use of
> alloca), we emit a frame chain in non-leaf functions, but continue to use the
> stack pointer to access locals.  This results in smaller code and unwind info.
> 
> Also simplify the complex logic in aarch64_override_options_after_change_1
> and compute whether the frame chain is required in aarch64_layout_frame
> instead.  As a result aarch64_frame_pointer_required is now redundant and
> aarch64_can_eliminate can be greatly simplified.
> 
> Finally convert all callee save/restore functions to use gen_frame_mem.
> 
> Bootstrap OK. Any comments?

I'd really appreciate a second pair of eyes on this, as I'm not
comfortable that I know the area well enough to review the code.

Jiong, if I remember correctly, you've got a good understanding of the
AArch64 frame layout, would you mind taking a look at this patch if you
have time?

https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02515.html

I note this is still marked as an RFC, are you now proposing it as a
patch to be merged to trunk?

Thanks,
James

> 
> ChangeLog:
> 2016-10-31  Wilco Dijkstra  <wdijk...@arm.com>
> 
>     gcc/
>       * config/aarch64/aarch64.h (aarch64_frame):
>       Add emit_frame_chain boolean.
>       * config/aarch64/aarch64.c (aarch64_frame_pointer_required)
>       Remove.
>       (aarch64_layout_frame): Initialise emit_frame_chain.
>       (aarch64_pushwb_single_reg): Use gen_frame_mem.
>       (aarch64_pop_regs): Likewise.
>       (aarch64_gen_load_pair): Likewise.
>       (aarch64_save_callee_saves): Likewise.
>       (aarch64_restore_callee_saves): Likewise.
>       (aarch64_expand_prologue): Use emit_frame_chain.
>       (aarch64_can_eliminate): Simplify. When FP needed or outgoing
>       arguments are large, eliminate to FP, otherwise SP.
>       (aarch64_override_options_after_change_1): Simplify.
>       (TARGET_FRAME_POINTER_REQUIRED): Remove define.
> 
> --
> 
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 
> fa81e4b853dafcccc08842955288861ec7e7acca..6e32dc9f6f171dde0c182fdd7857230251f71712
>  100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -583,6 +583,9 @@ struct GTY (()) aarch64_frame
>    /* The size of the stack adjustment after saving callee-saves.  */
>    HOST_WIDE_INT final_adjust;
>  
> +  /* Store FP,LR and setup a frame pointer.  */
> +  bool emit_frame_chain;
> +
>    unsigned wb_candidate1;
>    unsigned wb_candidate2;
>  
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> f07d771ea343803e054e03f59c8c1efb698bf474..6c06ac18d16f8afa7ee1cc5e8530e285a60e2b0f
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2728,24 +2728,6 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
>    return "";
>  }
>  
> -static bool
> -aarch64_frame_pointer_required (void)
> -{
> -  /* In aarch64_override_options_after_change
> -     flag_omit_leaf_frame_pointer turns off the frame pointer by
> -     default.  Turn it back on now if we've not got a leaf
> -     function.  */
> -  if (flag_omit_leaf_frame_pointer
> -      && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
> -    return true;
> -
> -  /* Force a frame pointer for EH returns so the return address is at FP+8.  
> */
> -  if (crtl->calls_eh_return)
> -    return true;
> -
> -  return false;
> -}
> -
>  /* Mark the registers that need to be saved by the callee and calculate
>     the size of the callee-saved registers area and frame record (both FP
>     and LR may be omitted).  */
> @@ -2758,6 +2740,18 @@ aarch64_layout_frame (void)
>    if (reload_completed && cfun->machine->frame.laid_out)
>      return;
>  
> +  /* Force a frame chain for EH returns so the return address is at FP+8.  */
> +  cfun->machine->frame.emit_frame_chain
> +    = frame_pointer_needed || crtl->calls_eh_return;
> +
> +  /* Emit a frame chain if the frame pointer is enabled.
> +     If -momit-leaf-frame-pointer is used, do not use a frame chain
> +     in leaf functions which do not use LR.  */
> +  if (flag_omit_frame_pointer == 2
> +      && !(flag_omit_leaf_frame_pointer && crtl->is_leaf
> +        && !df_regs_ever_live_p (LR_REGNUM)))
> +    cfun->machine->frame.emit_frame_chain = true;
> +
>  #define SLOT_NOT_REQUIRED (-2)
>  #define SLOT_REQUIRED     (-1)
>  
> @@ -2789,7 +2783,7 @@ aarch64_layout_frame (void)
>       && !call_used_regs[regno])
>        cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
>  
> -  if (frame_pointer_needed)
> +  if (cfun->machine->frame.emit_frame_chain)
>      {
>        /* FP and LR are placed in the linkage record.  */
>        cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
> @@ -2937,7 +2931,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned 
> regno,
>    reg = gen_rtx_REG (mode, regno);
>    mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
>                           plus_constant (Pmode, base_rtx, -adjustment));
> -  mem = gen_rtx_MEM (mode, mem);
> +  mem = gen_frame_mem (mode, mem);
>  
>    insn = emit_move_insn (mem, reg);
>    RTX_FRAME_RELATED_P (insn) = 1;
> @@ -3011,7 +3005,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, 
> HOST_WIDE_INT adjustment,
>      {
>        rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
>        mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
> -      emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
> +      emit_move_insn (reg1, gen_frame_mem (mode, mem));
>      }
>    else
>      {
> @@ -3062,8 +3056,6 @@ aarch64_save_callee_saves (machine_mode mode, 
> HOST_WIDE_INT start_offset,
>                          unsigned start, unsigned limit, bool skip_wb)
>  {
>    rtx_insn *insn;
> -  rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
> -                                              ? gen_frame_mem : gen_rtx_MEM);
>    unsigned regno;
>    unsigned regno2;
>  
> @@ -3081,8 +3073,8 @@ aarch64_save_callee_saves (machine_mode mode, 
> HOST_WIDE_INT start_offset,
>  
>        reg = gen_rtx_REG (mode, regno);
>        offset = start_offset + cfun->machine->frame.reg_offset[regno];
> -      mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
> -                                           offset));
> +      mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
> +                                             offset));
>  
>        regno2 = aarch64_next_callee_save (regno + 1, limit);
>  
> @@ -3095,8 +3087,8 @@ aarch64_save_callee_saves (machine_mode mode, 
> HOST_WIDE_INT start_offset,
>         rtx mem2;
>  
>         offset = start_offset + cfun->machine->frame.reg_offset[regno2];
> -       mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
> -                                                offset));
> +       mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
> +                                                  offset));
>         insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
>                                                   reg2));
>  
> @@ -3120,8 +3112,6 @@ aarch64_restore_callee_saves (machine_mode mode,
>                             unsigned limit, bool skip_wb, rtx *cfi_ops)
>  {
>    rtx base_rtx = stack_pointer_rtx;
> -  rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
> -                                              ? gen_frame_mem : gen_rtx_MEM);
>    unsigned regno;
>    unsigned regno2;
>    HOST_WIDE_INT offset;
> @@ -3139,7 +3129,7 @@ aarch64_restore_callee_saves (machine_mode mode,
>  
>        reg = gen_rtx_REG (mode, regno);
>        offset = start_offset + cfun->machine->frame.reg_offset[regno];
> -      mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
> +      mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
>  
>        regno2 = aarch64_next_callee_save (regno + 1, limit);
>  
> @@ -3151,7 +3141,7 @@ aarch64_restore_callee_saves (machine_mode mode,
>         rtx mem2;
>  
>         offset = start_offset + cfun->machine->frame.reg_offset[regno2];
> -       mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
> +       mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
>         emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
>  
>         *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
> @@ -3217,6 +3207,7 @@ aarch64_expand_prologue (void)
>    HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
>    unsigned reg1 = cfun->machine->frame.wb_candidate1;
>    unsigned reg2 = cfun->machine->frame.wb_candidate2;
> +  bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
>    rtx_insn *insn;
>  
>    if (flag_stack_usage_info)
> @@ -3239,7 +3230,7 @@ aarch64_expand_prologue (void)
>    if (callee_adjust != 0)
>      aarch64_push_regs (reg1, reg2, callee_adjust);
>  
> -  if (frame_pointer_needed)
> +  if (emit_frame_chain)
>      {
>        if (callee_adjust == 0)
>       aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
> @@ -3247,12 +3238,12 @@ aarch64_expand_prologue (void)
>        insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
>                                      stack_pointer_rtx,
>                                      GEN_INT (callee_offset)));
> -      RTX_FRAME_RELATED_P (insn) = 1;
> +      RTX_FRAME_RELATED_P (insn) = frame_pointer_needed;
>        emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
>      }
>  
>    aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
> -                          callee_adjust != 0 || frame_pointer_needed);
> +                          callee_adjust != 0 || emit_frame_chain);
>    aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
>                            callee_adjust != 0 || frame_pointer_needed);
>    aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
> @@ -5157,36 +5148,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, 
> rtx x,
>  }
>  
>  static bool
> -aarch64_can_eliminate (const int from, const int to)
> +aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
>  {
> -  /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
> -     HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
> -
> -  if (frame_pointer_needed)
> -    {
> -      if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
> -     return true;
> -      if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
> -     return false;
> -      if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
> -       && !cfun->calls_alloca)
> -     return true;
> -      if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
> -     return true;
> -
> -      return false;
> -    }
> -  else
> -    {
> -      /* If we decided that we didn't need a leaf frame pointer but then used
> -      LR in the function, then we'll want a frame pointer after all, so
> -      prevent this elimination to ensure a frame pointer is used.  */
> -      if (to == STACK_POINTER_REGNUM
> -       && flag_omit_leaf_frame_pointer
> -       && df_regs_ever_live_p (LR_REGNUM))
> -     return false;
> -    }
> -
> +  /* If we need a frame pointer, eliminate to HARD_FRAME_POINTER_REGNUM.
> +     Use FP as well as with a large number of outgoing arguments so
> +     that stack offsets are smaller - this may generate better code.  */
> +  if (frame_pointer_needed || crtl->outgoing_args_size > 64)
> +    return to == HARD_FRAME_POINTER_REGNUM;
>    return true;
>  }
>  
> @@ -8112,24 +8080,13 @@ aarch64_parse_override_string (const char* 
> input_string,
>  static void
>  aarch64_override_options_after_change_1 (struct gcc_options *opts)
>  {
> -  /* The logic here is that if we are disabling all frame pointer generation
> -     then we do not need to disable leaf frame pointer generation as a
> -     separate operation.  But if we are *only* disabling leaf frame pointer
> -     generation then we set flag_omit_frame_pointer to true, but in
> -     aarch64_frame_pointer_required we return false only for leaf functions.
> -
> -     PR 70044: We have to be careful about being called multiple times for 
> the
> -     same function.  Once we have decided to set flag_omit_frame_pointer just
> -     so that we can omit leaf frame pointers, we must then not interpret a
> -     second call as meaning that all frame pointer generation should be
> -     omitted.  We do this by setting flag_omit_frame_pointer to a special,
> -     non-zero value.  */
> -  if (opts->x_flag_omit_frame_pointer == 2)
> -    opts->x_flag_omit_frame_pointer = 0;
> -
> -  if (opts->x_flag_omit_frame_pointer)
> -    opts->x_flag_omit_leaf_frame_pointer = false;
> -  else if (opts->x_flag_omit_leaf_frame_pointer)
> +  /* PR 70044: We have to be careful about being called multiple times for 
> the
> +     same function.  This means all changes should be repeatable.  */
> +
> +  /* If the frame pointer is enabled, set the flag to a special value.
> +     To implement -momit-leaf-frame-pointer this special value is checked in
> +     aarch64_layout_frame.  The frame chain is emitted only when required.  
> */
> +  if (opts->x_flag_omit_frame_pointer == 0)
>      opts->x_flag_omit_frame_pointer = 2;
>  
>    /* If not optimizing for size, set the default
> @@ -14126,9 +14083,6 @@ aarch64_optab_supported_p (int op, machine_mode 
> mode1, machine_mode,
>  #undef TARGET_FUNCTION_VALUE_REGNO_P
>  #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
>  
> -#undef TARGET_FRAME_POINTER_REQUIRED
> -#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
> -
>  #undef TARGET_GIMPLE_FOLD_BUILTIN
>  #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
>  

Reply via email to