On Thu, Feb 1, 2018 at 2:44 AM, Jeff Law <l...@redhat.com> wrote: > > This is V2 of the patch to fix 84128. > > What's change since V1. The -fstack-check path for non-linux systems > has been restored to its previous (correct) behavior. Only the behavior > for -fstack-check on linux systems and -fstack-clash-protection has been > changed. > > release_scratch_regsiter_on_entry now has two paths. The original path > that is used by -fstack-check on non-linux systems (RESTORE_VIA_POP) and > the new path used by -fstack-clash-protection and -fstack-check on linux > systems. > > The original path restores the scratch register using a push. The new > path accepts an offset where the register was saved and uses a reg+d > memory reference to restore it, the space allocated by the push in this > case becomes part of the local frame and is deallocated by the epilogue. > > THe original path is used by ix86_emit_probe_stack_range which just > needs to pass in the new RESTORE_VIA_POP argument as true. > > The new path is used by ix86_adjust_stack_and_probe_stack_clash and > ix86_adjust_stack_and_probe where the space used by the push becomes > part of the local frame and is deallocated by the epilogue. It passes > false for RESTORE_VIA_POP. > > Per my invesitgations we have never tried to describe the restore of > %eax in the CFI info. So that's a non-issue for this change.
Agreed. > I've done testing of all three paths paths now. > -fstack-clash-protection, -fstack-check with moving sp, and > -fstack-check without moving sp. The details of that testing can be > found in the original thread (it's not automated and requires a fair > amount of hackery and hand verification of the test results). > > OK for the trunk now? > > THanks, > Jeff > > > > > > PR target/84128 > * config/i386/i386.c (release_scratch_register_on_entry): Add new > OFFSET and RELEASE_VIA_POP arguments. Use SP+OFFSET to restore > the scratch if RELEASE_VIA_POP is false. > (ix86_adjust_stack_and_probe_stack_clash): Un-constify SIZE. > If we have to save a temporary register, decrement SIZE appropriately. > Pass new arguments to release_scratch_register_on_entry. > (ix86_adjust_stack_and_probe): Likewise. > (ix86_emit_probe_stack_range): Pass new arguments to > release_scratch_register_on_entry. > > PR target/84128 > * gcc.target/i386/pr84128.c: New test. OK. Thanks, Uros. > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index fef34a1..3196ac4 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -12567,22 +12567,39 @@ get_scratch_register_on_entry (struct scratch_reg > *sr) > } > } > > -/* Release a scratch register obtained from the preceding function. */ > +/* Release a scratch register obtained from the preceding function. > + > + If RELEASE_VIA_POP is true, we just pop the register off the stack > + to release it. This is what non-Linux systems use with -fstack-check. > + > + Otherwise we use OFFSET to locate the saved register and the > + allocated stack space becomes part of the local frame and is > + deallocated by the epilogue. */ > > static void > -release_scratch_register_on_entry (struct scratch_reg *sr) > +release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT > offset, > + bool release_via_pop) > { > if (sr->saved) > { > - struct machine_function *m = cfun->machine; > - rtx x, insn = emit_insn (gen_pop (sr->reg)); > + if (release_via_pop) > + { > + struct machine_function *m = cfun->machine; > + rtx x, insn = emit_insn (gen_pop (sr->reg)); > > - /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */ > - RTX_FRAME_RELATED_P (insn) = 1; > - x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); > - x = gen_rtx_SET (stack_pointer_rtx, x); > - add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); > - m->fs.sp_offset -= UNITS_PER_WORD; > + /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ > + RTX_FRAME_RELATED_P (insn) = 1; > + x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT > (UNITS_PER_WORD)); > + x = gen_rtx_SET (stack_pointer_rtx, x); > + add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); > + m->fs.sp_offset -= UNITS_PER_WORD; > + } > + else > + { > + rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); > + x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); > + emit_insn (x); > + } > } > } > > @@ -12597,7 +12614,7 @@ release_scratch_register_on_entry (struct scratch_reg > *sr) > pushed on the stack. */ > > static void > -ix86_adjust_stack_and_probe_stack_clash (const HOST_WIDE_INT size, > +ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, > const bool int_registers_saved) > { > struct machine_function *m = cfun->machine; > @@ -12713,6 +12730,12 @@ ix86_adjust_stack_and_probe_stack_clash (const > HOST_WIDE_INT size, > struct scratch_reg sr; > get_scratch_register_on_entry (&sr); > > + /* If we needed to save a register, then account for any space > + that was pushed (we are not going to pop the register when > + we do the restore). */ > + if (sr.saved) > + size -= UNITS_PER_WORD; > + > /* Step 1: round SIZE down to a multiple of the interval. */ > HOST_WIDE_INT rounded_size = size & -probe_interval; > > @@ -12761,7 +12784,9 @@ ix86_adjust_stack_and_probe_stack_clash (const > HOST_WIDE_INT size, > m->fs.cfa_reg == stack_pointer_rtx); > dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); > > - release_scratch_register_on_entry (&sr); > + /* This does not deallocate the space reserved for the scratch > + register. That will be deallocated in the epilogue. */ > + release_scratch_register_on_entry (&sr, size, false); > } > > /* Make sure nothing is scheduled before we are done. */ > @@ -12774,7 +12799,7 @@ ix86_adjust_stack_and_probe_stack_clash (const > HOST_WIDE_INT size, > pushed on the stack. */ > > static void > -ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, > +ix86_adjust_stack_and_probe (HOST_WIDE_INT size, > const bool int_registers_saved) > { > /* We skip the probe for the first interval + a small dope of 4 words and > @@ -12847,6 +12872,11 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT > size, > > get_scratch_register_on_entry (&sr); > > + /* If we needed to save a register, then account for any space > + that was pushed (we are not going to pop the register when > + we do the restore). */ > + if (sr.saved) > + size -= UNITS_PER_WORD; > > /* Step 1: round SIZE to the previous multiple of the interval. */ > > @@ -12906,7 +12936,9 @@ ix86_adjust_stack_and_probe (const HOST_WIDE_INT size, > (get_probe_interval () > + dope)))); > > - release_scratch_register_on_entry (&sr); > + /* This does not deallocate the space reserved for the scratch > + register. That will be deallocated in the epilogue. */ > + release_scratch_register_on_entry (&sr, size, false); > } > > /* Even if the stack pointer isn't the CFA register, we need to correctly > @@ -13055,7 +13087,7 @@ ix86_emit_probe_stack_range (HOST_WIDE_INT first, > HOST_WIDE_INT size, > sr.reg), > rounded_size - size)); > > - release_scratch_register_on_entry (&sr); > + release_scratch_register_on_entry (&sr, size, true); > } > > /* Make sure nothing is scheduled before we are done. */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr84128.c > b/gcc/testsuite/gcc.target/i386/pr84128.c > new file mode 100644 > index 0000000..a8323fd6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr84128.c > @@ -0,0 +1,30 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -march=i686 -mtune=generic -fstack-clash-protection" } > */ > +/* { dg-require-effective-target ia32 } */ > + > +__attribute__ ((noinline, noclone, weak, regparm (3))) > +int > +f1 (long arg0, int (*pf) (long, void *)) > +{ > + unsigned char buf[32768]; > + return pf (arg0, buf); > +} > + > +__attribute__ ((noinline, noclone, weak)) > +int > +f2 (long arg0, void *ignored) > +{ > + if (arg0 != 17) > + __builtin_abort (); > + return 19; > +} > + > +int > +main (void) > +{ > + if (f1 (17, f2) != 19) > + __builtin_abort (); > + return 0; > +} > + > + >