On Tue, May 21, 2019 at 5:01 PM H.J. Lu <[email protected]> wrote:
>
> get_frame_size () returns used stack slots during compilation, which
> may be optimized out later. This patch does the followings:
>
> 1. Add no_stack_frame to machine_function to indicate that the function
> doesn't need a stack frame.
Can you please add "stack_frame_required" to machine_function with
inverted meaning instead? Every single usage of no_stack_frame is
inverted, and it is hard to follow this negative logic through the
code.
> 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame.
> 3. Always call ix86_find_max_used_stack_alignment to check if stack
> frame is needed.
>
> Tested on i686 and x86-64 with
>
> --with-arch=native --with-cpu=native
>
> Tested on AVX512 machine configured with
>
> --with-arch=native --with-cpu=native
>
> gcc/
>
> PR target/88483
> * config/i386/i386.c (ix86_get_frame_size): New function.
> (ix86_frame_pointer_required): Replace get_frame_size with
> ix86_get_frame_size.
> (ix86_compute_frame_layout): Likewise.
> (ix86_find_max_used_stack_alignment): Changed to void. Set
> no_stack_frame.
> (ix86_finalize_stack_frame_flags): Always call
> ix86_find_max_used_stack_alignment. Replace get_frame_size with
> ix86_get_frame_size.
> * config/i386/i386.h (machine_function): Add no_stack_frame.
>
> gcc/testsuite/
>
> PR target/88483
> * gcc.target/i386/stackalign/pr88483-1.c: New test.
> * gcc.target/i386/stackalign/pr88483-2.c: Likewise.
> ---
> gcc/config/i386/i386.c | 53 ++++++++++++-------
> gcc/config/i386/i386.h | 3 ++
> .../gcc.target/i386/stackalign/pr88483-1.c | 18 +++++++
> .../gcc.target/i386/stackalign/pr88483-2.c | 18 +++++++
> 4 files changed, 74 insertions(+), 18 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 54607748b0b..d0b2a4f8b70 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void)
> && (frame.nregs + frame.nsseregs) == 0);
> }
>
> +/* Return stack frame size. get_frame_size () returns used stack slots
> + during compilation, which may be optimized out later. no_stack_frame
> + is set to true if stack frame isn't needed. */
> +
> +static HOST_WIDE_INT
> +ix86_get_frame_size (void)
> +{
> + if (cfun->machine->no_stack_frame)
> + return 0;
> + else
> + return get_frame_size ();
> +}
> +
> /* Value should be nonzero if functions must have frame pointers.
> Zero means the frame pointer need not be set up (and parms may
> be accessed via the stack pointer) in functions that seem suitable. */
> @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void)
>
> /* Win64 SEH, very large frames need a frame-pointer as maximum stack
> allocation is 4GB. */
> - if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
> + if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
> return true;
>
> /* SSE saves require frame-pointer when stack is misaligned. */
> @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void)
> unsigned HOST_WIDE_INT stack_alignment_needed;
> HOST_WIDE_INT offset;
> unsigned HOST_WIDE_INT preferred_alignment;
> - HOST_WIDE_INT size = get_frame_size ();
> + HOST_WIDE_INT size = ix86_get_frame_size ();
> HOST_WIDE_INT to_allocate;
>
> /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
> @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end)
> return "";
> }
>
> -/* Return true if stack frame is required. Update STACK_ALIGNMENT
> - to the largest alignment, in bits, of stack slot used if stack
> - frame is required and CHECK_STACK_SLOT is true. */
> +/* Set no_stack_frame to true if stack frame isn't required. Update
> + STACK_ALIGNMENT to the largest alignment, in bits, of stack slot
> + used if stack frame is required and CHECK_STACK_SLOT is true. */
>From the above comment, you can see the confusion caused by using
negative logic for no_stack_frame.
> -static bool
> +static void
> ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
> bool check_stack_slot)
> {
> @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int
> &stack_alignment,
> }
> }
>
> - return require_stack_frame;
> + cfun->machine->no_stack_frame = !require_stack_frame;
> }
>
> /* Finalize stack_realign_needed and frame_pointer_needed flags, which
> @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void)
> return;
> }
>
> + /* It is always safe to compute max_used_stack_alignment. We
> + compute it only if 128-bit aligned load/store may be generated
> + on misaligned stack slot which will lead to segfault. */
> + bool check_stack_slot
> + = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
> + ix86_find_max_used_stack_alignment (stack_alignment,
> + check_stack_slot);
> +
> /* If the only reason for frame_pointer_needed is that we conservatively
> assumed stack realignment might be needed or -fno-omit-frame-pointer
> is used, but in the end nothing that needed the stack alignment had
> @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void)
> && flag_exceptions
> && cfun->can_throw_non_call_exceptions)
> && !ix86_frame_pointer_required ()
> - && get_frame_size () == 0
> + && ix86_get_frame_size () == 0
> && ix86_nsaved_sseregs () == 0
> && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
> {
> - if (ix86_find_max_used_stack_alignment (stack_alignment,
> - stack_realign))
> + if (!cfun->machine->no_stack_frame)
> {
> /* Stack frame is required. If stack alignment needed is less
> than incoming stack boundary, don't realign stack. */
> @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void)
> recompute_frame_layout_p = true;
> }
> }
> - else if (crtl->max_used_stack_slot_alignment >= 128)
> + else if (crtl->max_used_stack_slot_alignment >= 128
> + && !cfun->machine->no_stack_frame)
> {
> /* We don't need to realign stack. max_used_stack_alignment is
> used to decide how stack frame should be aligned. This is
> - independent of any psABIs nor 32-bit vs 64-bit. It is always
> - safe to compute max_used_stack_alignment. We compute it only
> - if 128-bit aligned load/store may be generated on misaligned
> - stack slot which will lead to segfault. */
> - if (ix86_find_max_used_stack_alignment (stack_alignment, true))
> - cfun->machine->max_used_stack_alignment
> - = stack_alignment / BITS_PER_UNIT;
> + independent of any psABIs nor 32-bit vs 64-bit. */
> + cfun->machine->max_used_stack_alignment
> + = stack_alignment / BITS_PER_UNIT;;
Double semicolon.
> }
>
> if (crtl->stack_realign_needed != stack_realign)
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index be1480fdcf8..6b4186d3dac 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2754,6 +2754,9 @@ struct GTY(()) machine_function {
> /* If true, ENDBR is queued at function entrance. */
> BOOL_BITFIELD endbr_queued_at_entrance : 1;
>
> + /* Nonzero if the function doesn't need a stack frame. */
> + BOOL_BITFIELD no_stack_frame : 1;
> +
> /* The largest alignment, in bytes, of stack slot actually used. */
> unsigned int max_used_stack_alignment;
>
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> new file mode 100644
> index 00000000000..c8bb0832fe2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2" } */
> +
> +struct B
> +{
> + char a[12];
> + int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> + struct B x = {};
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t
> \]*%\[re\]?sp" } } */
> +/* { dg-final { scan-assembler-not
> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> new file mode 100644
> index 00000000000..e94fa1d18fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +
> +struct B
> +{
> + char a[12];
> + int b;
> +};
> +
> +struct B
> +f2 (void)
> +{
> + struct B x = {};
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t
> \]*%\[re\]?sp" } } */
> +/* { dg-final { scan-assembler-not
> "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */
> --
> 2.20.1
>