On Tue, May 21, 2019 at 5:01 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > get_frame_size () returns used stack slots during compilation, which > may be optimized out later. This patch does the followings: > > 1. Add no_stack_frame to machine_function to indicate that the function > doesn't need a stack frame.
Can you please add "stack_frame_required" to machine_function with inverted meaning instead? Every single usage of no_stack_frame is inverted, and it is hard to follow this negative logic through the code. > 2. Change ix86_find_max_used_stack_alignment to set no_stack_frame. > 3. Always call ix86_find_max_used_stack_alignment to check if stack > frame is needed. > > Tested on i686 and x86-64 with > > --with-arch=native --with-cpu=native > > Tested on AVX512 machine configured with > > --with-arch=native --with-cpu=native > > gcc/ > > PR target/88483 > * config/i386/i386.c (ix86_get_frame_size): New function. > (ix86_frame_pointer_required): Replace get_frame_size with > ix86_get_frame_size. > (ix86_compute_frame_layout): Likewise. > (ix86_find_max_used_stack_alignment): Changed to void. Set > no_stack_frame. > (ix86_finalize_stack_frame_flags): Always call > ix86_find_max_used_stack_alignment. Replace get_frame_size with > ix86_get_frame_size. > * config/i386/i386.h (machine_function): Add no_stack_frame. > > gcc/testsuite/ > > PR target/88483 > * gcc.target/i386/stackalign/pr88483-1.c: New test. > * gcc.target/i386/stackalign/pr88483-2.c: Likewise. > --- > gcc/config/i386/i386.c | 53 ++++++++++++------- > gcc/config/i386/i386.h | 3 ++ > .../gcc.target/i386/stackalign/pr88483-1.c | 18 +++++++ > .../gcc.target/i386/stackalign/pr88483-2.c | 18 +++++++ > 4 files changed, 74 insertions(+), 18 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 54607748b0b..d0b2a4f8b70 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -5012,6 +5012,19 @@ ix86_can_use_return_insn_p (void) > && (frame.nregs + frame.nsseregs) == 0); > } > > +/* Return stack frame size. get_frame_size () returns used stack slots > + during compilation, which may be optimized out later. no_stack_frame > + is set to true if stack frame isn't needed. */ > + > +static HOST_WIDE_INT > +ix86_get_frame_size (void) > +{ > + if (cfun->machine->no_stack_frame) > + return 0; > + else > + return get_frame_size (); > +} > + > /* Value should be nonzero if functions must have frame pointers. > Zero means the frame pointer need not be set up (and parms may > be accessed via the stack pointer) in functions that seem suitable. */ > @@ -5035,7 +5048,7 @@ ix86_frame_pointer_required (void) > > /* Win64 SEH, very large frames need a frame-pointer as maximum stack > allocation is 4GB. */ > - if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) > + if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) > return true; > > /* SSE saves require frame-pointer when stack is misaligned. */ > @@ -5842,7 +5855,7 @@ ix86_compute_frame_layout (void) > unsigned HOST_WIDE_INT stack_alignment_needed; > HOST_WIDE_INT offset; > unsigned HOST_WIDE_INT preferred_alignment; > - HOST_WIDE_INT size = get_frame_size (); > + HOST_WIDE_INT size = ix86_get_frame_size (); > HOST_WIDE_INT to_allocate; > > /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit > @@ -7436,11 +7449,11 @@ output_probe_stack_range (rtx reg, rtx end) > return ""; > } > > -/* Return true if stack frame is required. Update STACK_ALIGNMENT > - to the largest alignment, in bits, of stack slot used if stack > - frame is required and CHECK_STACK_SLOT is true. */ > +/* Set no_stack_frame to true if stack frame isn't required. Update > + STACK_ALIGNMENT to the largest alignment, in bits, of stack slot > + used if stack frame is required and CHECK_STACK_SLOT is true. */ >From the above comment, you can see the confusion caused by using negative logic for no_stack_frame. > -static bool > +static void > ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, > bool check_stack_slot) > { > @@ -7489,7 +7502,7 @@ ix86_find_max_used_stack_alignment (unsigned int > &stack_alignment, > } > } > > - return require_stack_frame; > + cfun->machine->no_stack_frame = !require_stack_frame; > } > > /* Finalize stack_realign_needed and frame_pointer_needed flags, which > @@ -7519,6 +7532,14 @@ ix86_finalize_stack_frame_flags (void) > return; > } > > + /* It is always safe to compute max_used_stack_alignment. We > + compute it only if 128-bit aligned load/store may be generated > + on misaligned stack slot which will lead to segfault. */ > + bool check_stack_slot > + = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); > + ix86_find_max_used_stack_alignment (stack_alignment, > + check_stack_slot); > + > /* If the only reason for frame_pointer_needed is that we conservatively > assumed stack realignment might be needed or -fno-omit-frame-pointer > is used, but in the end nothing that needed the stack alignment had > @@ -7538,12 +7559,11 @@ ix86_finalize_stack_frame_flags (void) > && flag_exceptions > && cfun->can_throw_non_call_exceptions) > && !ix86_frame_pointer_required () > - && get_frame_size () == 0 > + && ix86_get_frame_size () == 0 > && ix86_nsaved_sseregs () == 0 > && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) > { > - if (ix86_find_max_used_stack_alignment (stack_alignment, > - stack_realign)) > + if (!cfun->machine->no_stack_frame) > { > /* Stack frame is required. If stack alignment needed is less > than incoming stack boundary, don't realign stack. */ > @@ -7631,17 +7651,14 @@ ix86_finalize_stack_frame_flags (void) > recompute_frame_layout_p = true; > } > } > - else if (crtl->max_used_stack_slot_alignment >= 128) > + else if (crtl->max_used_stack_slot_alignment >= 128 > + && !cfun->machine->no_stack_frame) > { > /* We don't need to realign stack. max_used_stack_alignment is > used to decide how stack frame should be aligned. This is > - independent of any psABIs nor 32-bit vs 64-bit. It is always > - safe to compute max_used_stack_alignment. We compute it only > - if 128-bit aligned load/store may be generated on misaligned > - stack slot which will lead to segfault. */ > - if (ix86_find_max_used_stack_alignment (stack_alignment, true)) > - cfun->machine->max_used_stack_alignment > - = stack_alignment / BITS_PER_UNIT; > + independent of any psABIs nor 32-bit vs 64-bit. */ > + cfun->machine->max_used_stack_alignment > + = stack_alignment / BITS_PER_UNIT;; Double semicolon. > } > > if (crtl->stack_realign_needed != stack_realign) > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index be1480fdcf8..6b4186d3dac 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -2754,6 +2754,9 @@ struct GTY(()) machine_function { > /* If true, ENDBR is queued at function entrance. */ > BOOL_BITFIELD endbr_queued_at_entrance : 1; > > + /* Nonzero if the function doesn't need a stack frame. */ > + BOOL_BITFIELD no_stack_frame : 1; > + > /* The largest alignment, in bytes, of stack slot actually used. */ > unsigned int max_used_stack_alignment; > > diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c > b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c > new file mode 100644 > index 00000000000..c8bb0832fe2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx2" } */ > + > +struct B > +{ > + char a[12]; > + int b; > +}; > + > +struct B > +f2 (void) > +{ > + struct B x = {}; > + return x; > +} > + > +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t > \]*%\[re\]?sp" } } */ > +/* { dg-final { scan-assembler-not > "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c > b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c > new file mode 100644 > index 00000000000..e94fa1d18fa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/stackalign/pr88483-2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512f" } */ > + > +struct B > +{ > + char a[12]; > + int b; > +}; > + > +struct B > +f2 (void) > +{ > + struct B x = {}; > + return x; > +} > + > +/* { dg-final { scan-assembler-not "(sub|add)(l|q)\[\\t \]*\\$\[0-9\]*,\[\\t > \]*%\[re\]?sp" } } */ > +/* { dg-final { scan-assembler-not > "and\[lq\]?\[^\\n\]*-\[0-9\]+,\[^\\n\]*sp" } } */ > -- > 2.20.1 >