On Tue, Jan 30, 2018 at 5:48 AM, Jeff Law <l...@redhat.com> wrote: > > > > stack-clash-protection will sometimes force a prologue to use pushes to > save registers. We try to limit how often that happens as it constrains > options for generating efficient prologue sequences for common cases. > > We check the value of TO_ALLOCATE in ix86_compute_frame_layout and if > it's greater than the probing interval, then we force the prologue to > use pushes to save integer registers. That is conservatively correct > and allows freedom in the most common cases. This is good. > > In expand_prologue we assert that the integer registers were saved via a > push anytime we *might* generate a probe, even if the size of the > allocated frame is too small to require explicit probing. Naturally, > this conflicts with the code in ix86_compute_frame_layout that tries to > avoid forcing pushes instead of moves. > > This patch moves the assertion inside expand_prologue down to the points > where it actually needs to be true. Specifically when we're generating > probes in a loop for -fstack-check or -fstack-clash-protection. > > [ Probing via calls to chkstk_ms is not affected by this change. ] > > Sorry to have to change this stuff for the 3rd time!
Now you see how many paths stack frame formation code has ;) > Bootstrapped and regression tested on x86_64 and i686. Also verified > that if stack-clash checking is enabled by default that both compilers > will bootstrap. > > OK for the trunk? LGTM. Thanks, Uros. > THanks, > Jeff > > * i386.c (ix86_adjust_stack_and_probe_stack_clash): New argument > INT_REGISTERS_SAVED. Check it prior to calling > get_scratch_register_on_entry. > (ix86_adjust_stack_and_probe): Similarly. > (ix86_emit_probe_stack_range): Similarly. > (ix86_expand_prologue): Corresponding changes. > > * gcc.target/i386/pr84064: New test. > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 3653ddd..fef34a1 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -12591,10 +12591,14 @@ release_scratch_register_on_entry (struct > scratch_reg *sr) > This differs from the next routine in that it tries hard to prevent > attacks that jump the stack guard. Thus it is never allowed to allocate > more than PROBE_INTERVAL bytes of stack space without a suitable > - probe. */ > + probe. > + > + INT_REGISTERS_SAVED is true if integer registers have already been > + pushed on the stack. */ > > static void > -ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size) > +ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, > + const bool int_registers_saved) > { > struct machine_function *m = cfun->machine; > > @@ -12700,6 +12704,12 @@ ix86_adjust_stack_and_probe_stack_clash (const > HOST_WIDE_INT size) > } > else > { > + /* We expect the GP registers to be saved when probes are used > + as the probing sequences might need a scratch register and > + the routine to allocate one assumes the integer registers > + have already been saved. */ > + gcc_assert (int_registers_saved); > + > struct scratch_reg sr; > get_scratch_register_on_entry (&sr); > > @@ -12758,10 +12768,14 @@ ix86_adjust_stack_and_probe_stack_clash (const > HOST_WIDE_INT size) > emit_insn (gen_blockage ()); > } > > -/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */ > +/* Emit code to adjust the stack pointer by SIZE bytes while probing it. > + > + INT_REGISTERS_SAVED is true if integer registers have already been > + pushed on the stack. */ > > static void > -ix86_adjust_stack_and_probe (const HOST_WIDE_INT size) > +ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, > + const bool int_registers_saved) > { > /* We skip the probe for the first interval + a small dope of 4 words and > probe that many bytes past the specified size to maintain a protection > @@ -12822,6 +12836,12 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT > size) > equality test for the loop condition. */ > else > { > + /* We expect the GP registers to be saved when probes are used > + as the probing sequences might need a scratch register and > + the routine to allocate one assumes the integer registers > + have already been saved. */ > + gcc_assert (int_registers_saved); > + > HOST_WIDE_INT rounded_size; > struct scratch_reg sr; > > @@ -12949,10 +12969,14 @@ output_adjust_stack_and_probe (rtx reg) > } > > /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, > - inclusive. These are offsets from the current stack pointer. */ > + inclusive. These are offsets from the current stack pointer. > + > + INT_REGISTERS_SAVED is true if integer registers have already been > + pushed on the stack. */ > > static void > -ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) > +ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, > + const bool int_registers_saved) > { > /* See if we have a constant small number of probes to generate. If so, > that's the easy case. The run-time loop is made up of 6 insns in the > @@ -12980,6 +13004,12 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, > HOST_WIDE_INT size) > equality test for the loop condition. */ > else > { > + /* We expect the GP registers to be saved when probes are used > + as the probing sequences might need a scratch register and > + the routine to allocate one assumes the integer registers > + have already been saved. */ > + gcc_assert (int_registers_saved); > + > HOST_WIDE_INT rounded_size, last; > struct scratch_reg sr; > > @@ -13733,15 +13763,10 @@ ix86_expand_prologue (void) > && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK > || flag_stack_clash_protection)) > { > - /* We expect the GP registers to be saved when probes are used > - as the probing sequences might need a scratch register and > - the routine to allocate one assumes the integer registers > - have already been saved. */ > - gcc_assert (int_registers_saved); > - > if (flag_stack_clash_protection) > { > - ix86_adjust_stack_and_probe_stack_clash (allocate); > + ix86_adjust_stack_and_probe_stack_clash (allocate, > + int_registers_saved); > allocate = 0; > } > else if (STACK_CHECK_MOVING_SP) > @@ -13749,7 +13774,7 @@ ix86_expand_prologue (void) > if (!(crtl->is_leaf && !cfun->calls_alloca > && allocate <= get_probe_interval ())) > { > - ix86_adjust_stack_and_probe (allocate); > + ix86_adjust_stack_and_probe (allocate, int_registers_saved); > allocate = 0; > } > } > @@ -13765,11 +13790,12 @@ ix86_expand_prologue (void) > if (crtl->is_leaf && !cfun->calls_alloca) > { > if (size > get_probe_interval ()) > - ix86_emit_probe_stack_range (0, size); > + ix86_emit_probe_stack_range (0, size, > int_registers_saved); > } > else > ix86_emit_probe_stack_range (0, > - size + get_stack_check_protect > ()); > + size + get_stack_check_protect > (), > + int_registers_saved); > } > else > { > @@ -13778,10 +13804,13 @@ ix86_expand_prologue (void) > if (size > get_probe_interval () > && size > get_stack_check_protect ()) > ix86_emit_probe_stack_range (get_stack_check_protect (), > - size - > get_stack_check_protect ()); > + (size > + - get_stack_check_protect > ()), > + int_registers_saved); > } > else > - ix86_emit_probe_stack_range (get_stack_check_protect (), > size); > + ix86_emit_probe_stack_range (get_stack_check_protect (), size, > + int_registers_saved); > } > } > } > diff --git a/gcc/testsuite/gcc.target/i386/pr84064.c > b/gcc/testsuite/gcc.target/i386/pr84064.c > new file mode 100644 > index 0000000..01f8d9e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr84064.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=i686 -fstack-clash-protection" } */ > +/* { dg-require-effective-target ia32 } */ > + > +void > +f (void *p1, void *p2) > +{ > + __builtin_memcpy (p1, p2, 1000); > +} > + >