On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> get_frame_size () returns used stack slots during compilation, which
> may be optimized out later.  This patch does the followings:
>
> 1. Add no_stack_frame to machine_function to indicate that the function
> doesn't need a stack frame.

Can you please add "stack_frame_required" to machine_function with
inverted meaning instead? Every single usage of no_stack_frame is
inverted, and it is hard to follow this negative logic through the
code.

> 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
> 3. Always call ix86_find_max_used_stack_alignment to check if stack
> frame is needed.
>
> Tested on i686 and x86-64 with
>
> --with-arch=native --with-cpu=native
>
> Tested on AVX512 machine configured with
>
> --with-arch=native --with-cpu=native
>
> gcc/
>
>         PR target/88483
>         * config/i386/i386.c (ix86_get_frame_size): New function.
>         (ix86_frame_pointer_required): Replace get_frame_size with
>         ix86_get_frame_size.
>         (ix86_compute_frame_layout): Likewise.
>         (ix86_find_max_used_stack_alignment): Changed to void.  Set
>         no_stack_frame.
>         (ix86_finalize_stack_frame_flags): Always call
>         ix86_find_max_used_stack_alignment.  Replace get_frame_size with
>         ix86_get_frame_size.
>         * config/i386/i386.h (machine_function): Add no_stack_frame.
>
> gcc/testsuite/
>
>         PR target/88483
>         * gcc.target/i386/stackalign/pr88483-1.c: New test.
>         * gcc.target/i386/stackalign/pr88483-2.c: Likewise.
> ---
>  gcc/config/i386/i386.c                        | 53 ++++++++++++-------
>  gcc/config/i386/i386.h                        |  3 ++
>  .../gcc.target/i386/stackalign/pr88483-1.c    | 18 +++++++
>  .../gcc.target/i386/stackalign/pr88483-2.c    | 18 +++++++
>  4 files changed, 74 insertions(+), 18 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 54607748b0b..d0b2a4f8b70 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
>           && (frame.nregs + frame.nsseregs) == 0);
>  }
>
> +/* Return stack frame size.  get_frame_size () returns used stack slots
> +   during compilation, which may be optimized out later.  no_stack_frame
> +   is set to true if stack frame isn't needed.  */
> +
> +static HOST_WIDE_INT
> +ix86_get_frame_size (void)
> +{
> +  if (cfun->machine->no_stack_frame)
> +    return 0;
> +  else
> +    return get_frame_size ();
> +}
> +
>  /* Value should be nonzero if functions must have frame pointers.
>     Zero means the frame pointer need not be set up (and parms may
>     be accessed via the stack pointer) in functions that seem suitable.  */
> @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
>
>    /* Win64 SEH, very large frames need a frame-pointer as maximum stack
>       allocation is 4GB.  */
> -  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
> +  if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
>      return true;
>
>    /* SSE saves require frame-pointer when stack is misaligned.  */
> @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
>    unsigned HOST_WIDE_INT stack_alignment_needed;
>    HOST_WIDE_INT offset;
>    unsigned HOST_WIDE_INT preferred_alignment;
> -  HOST_WIDE_INT size = get_frame_size ();
> +  HOST_WIDE_INT size = ix86_get_frame_size ();
>    HOST_WIDE_INT to_allocate;
>
>    /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
> @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
>    return "";
>  }
>
> -/* Return true if stack frame is required.  Update STACK_ALIGNMENT
> -   to the largest alignment, in bits, of stack slot used if stack
> -   frame is required and CHECK_STACK_SLOT is true.  */
> +/* Set no_stack_frame to true if stack frame isn't required.  Update
> +   STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
> +   used if stack frame is required and CHECK_STACK_SLOT is true.  */

>From the above comment, you can see the confusion caused by using
negative logic for no_stack_frame.

> -static bool
> +static void
>  ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
>                                     bool check_stack_slot)
>  {
> @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int 
> &stack_alignment,
>           }
>      }
>
> -  return require_stack_frame;
> +  cfun->machine->no_stack_frame = !require_stack_frame;
>  }
>
>  /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
>        return;
>      }
>
> +  /* It is always safe to compute max_used_stack_alignment.  We
> +     compute it only if 128-bit aligned load/store may be generated
> +     on misaligned stack slot which will lead to segfault. */
> +  bool check_stack_slot
> +    = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
> +  ix86_find_max_used_stack_alignment (stack_alignment,
> +                                     check_stack_slot);
> +
>    /* If the only reason for frame_pointer_needed is that we conservatively
>       assumed stack realignment might be needed or -fno-omit-frame-pointer
>       is used, but in the end nothing that needed the stack alignment had
> @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
>            && flag_exceptions
>            && cfun->can_throw_non_call_exceptions)
>        && !ix86_frame_pointer_required ()
> -      && get_frame_size () == 0
> +      && ix86_get_frame_size () == 0
>        && ix86_nsaved_sseregs () == 0
>        && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
>      {
> -      if (ix86_find_max_used_stack_alignment (stack_alignment,
> -                                             stack_realign))
> +      if (!cfun->machine->no_stack_frame)
>         {
>           /* Stack frame is required.  If stack alignment needed is less
>              than incoming stack boundary, don't realign stack.  */
> @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void)
>           recompute_frame_layout_p = true;
>         }
>      }
> -  else if (crtl->max_used_stack_slot_alignment >= 128)
> +  else if (crtl->max_used_stack_slot_alignment >= 128
> +          && !cfun->machine->no_stack_frame)
>      {
>        /* We don't need to realign stack.  max_used_stack_alignment is
>          used to decide how stack frame should be aligned.  This is
> -        independent of any psABIs nor 32-bit vs 64-bit.  It is always
> -        safe to compute max_used_stack_alignment.  We compute it only
> -        if 128-bit aligned load/store may be generated on misaligned
> -        stack slot which will lead to segfault.   */
> -      if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> -       cfun->machine->max_used_stack_alignment
> -         = stack_alignment / BITS_PER_UNIT;
> +        independent of any psABIs nor 32-bit vs 64-bit.  */
> +      cfun->machine->max_used_stack_alignment
> +       = stack_alignment / BITS_PER_UNIT;;

Double semicolon.

>      }
>
>    if (crtl->stack_realign_needed != stack_realign)
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index be1480fdcf8..6b4186d3dac 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2754,6 +2754,9 @@ struct GTY(()) machine_function {
>    /* If true, ENDBR is queued at function entrance.  */
>    BOOL_BITFIELD endbr_queued_at_entrance : 1;
>
> +  /* Nonzero if the function doesn't need a stack frame.  */
> +  BOOL_BITFIELD no_stack_frame : 1;
> +
>    /* The largest alignment, in bytes, of stack slot actually used.  */
>    unsigned int max_used_stack_alignment;
>
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c 
> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> new file mode 100644
> index 00000000000..c8bb0832fe2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +
> +struct B
> +{
> +  char a[12];
> +  int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> +  struct B x = {};
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t 
> \]*%\[re\]?sp" } } */
> +/* { dg-final { scan-assembler-not 
> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c 
> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> new file mode 100644
> index 00000000000..e94fa1d18fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +
> +struct B
> +{
> +  char a[12];
> +  int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> +  struct B x = {};
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t 
> \]*%\[re\]?sp" } } */
> +/* { dg-final { scan-assembler-not 
> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> --
> 2.20.1
>

Reply via email to