On Thu, 11 Apr 2019, Jakub Jelinek wrote: > Hi! > > The following testcase is miscompiled, because the result of > a DImode (doubleword) right shift is used in a QImode subreg as well as > later on pushed into a stack slot as an argument to a const function > whose result isn't used. The RA because of the weirdo target tuning > reuses the REG_EQUIV argument slot (as it wants the value in a non-Q_REGS > register), so it first pushes it to the stack slot and then loads from the > stack slot again (according to Vlad, this can happen with both LRA and old > reload). Later on during DCE we determine the call is not needed and try to > find all the argument pushes and don't mark those as needed, so we > effectively DCE the right shift, push to the argument slot as well as > other slots and the call, and end up keeping just the load from the > uninitialized argument slot. > > The following patch just punts if we find loads from stack slots in between > where they are pushed and the const/pure call. In addition to that, I've > outlined the same largish sequence that had 3 copies in the function already > and I'd need to add 4th copy, so in the end the dce.c changes are removing > more than adding: > 1 file changed, 118 insertions(+), 135 deletions(-)
The refactoring itself is probably OK (and if done separately makes reviewing easier). > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > During those 2 bootstraps/regtests, data.load_found has been set just > on the new testcase on ia32. Hmm, I wonder whether we really need to DCE calls after reload? That said, I'm not familiar enough with the code to check if the patch makes sense (can there ever be uses of the argument slots _after_ the call?). Richard. > 2019-04-11 Jakub Jelinek <ja...@redhat.com> > > PR rtl-optimization/89965 > * dce.c: Include rtl-iter.h. > (sp_based_mem_offset): New function. > (struct check_argument_load_data): New type. > (check_argument_load): New function. > (find_call_stack_args): Use sp_based_mem_offset, check for loads > from stack slots still tracked in sp_bytes and punt if any is > found. > > * gcc.target/i386/pr89965.c: New test. > > --- gcc/dce.c.jj 2019-02-15 00:11:13.209111382 +0100 > +++ gcc/dce.c 2019-04-10 14:18:21.111763533 +0200 > @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. > #include "valtrack.h" > #include "tree-pass.h" > #include "dbgcnt.h" > +#include "rtl-iter.h" > > > /* ------------------------------------------------------------------------- > @@ -265,6 +266,100 @@ check_argument_store (HOST_WIDE_INT size > return true; > } > > +/* If MEM has sp address, return 0, if it has sp + const address, > + return that const, if it has reg address where reg is set to sp + const > + and FAST is false, return const, otherwise return > + INTTYPE_MINUMUM (HOST_WIDE_INT). */ > + > +static HOST_WIDE_INT > +sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast) > +{ > + HOST_WIDE_INT off = 0; > + rtx addr = XEXP (mem, 0); > + if (GET_CODE (addr) == PLUS > + && REG_P (XEXP (addr, 0)) > + && CONST_INT_P (XEXP (addr, 1))) > + { > + off = INTVAL (XEXP (addr, 1)); > + addr = XEXP (addr, 0); > + } > + if (addr == stack_pointer_rtx) > + return off; > + > + if (!REG_P (addr) || fast) > + return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + /* If not fast, use chains to see if addr wasn't set to sp + offset. */ > + df_ref use; > + FOR_EACH_INSN_USE (use, call_insn) > + if (rtx_equal_p (addr, DF_REF_REG (use))) > + break; > + > + if (use == NULL) > + return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + struct df_link *defs; > + for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > + if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > + break; > + > + if (defs == NULL) > + return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + rtx set = single_set (DF_REF_INSN (defs->ref)); > + if (!set) > + return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + if (GET_CODE (SET_SRC (set)) != PLUS > + || XEXP (SET_SRC (set), 0) != stack_pointer_rtx > + || !CONST_INT_P (XEXP (SET_SRC (set), 1))) > + return INTTYPE_MINIMUM (HOST_WIDE_INT); > + > + off += INTVAL (XEXP (SET_SRC (set), 1)); > + return off; > +} > + > +/* Data for check_argument_load called via note_uses. */ > +struct check_argument_load_data { > + bitmap sp_bytes; > + HOST_WIDE_INT min_sp_off, max_sp_off; > + rtx_call_insn *call_insn; > + bool fast; > + bool load_found; > +}; > + > +/* Helper function for find_call_stack_args. Check if there are > + any loads from the argument slots in between the const/pure call > + and store to the argument slot, set LOAD_FOUND if any is found. */ > + > +static void > +check_argument_load (rtx *loc, void *data) > +{ > + struct check_argument_load_data *d > + = (struct check_argument_load_data *) data; > + subrtx_iterator::array_type array; > + FOR_EACH_SUBRTX (iter, array, *loc, NONCONST) > + { > + const_rtx mem = *iter; > + HOST_WIDE_INT size; > + if (MEM_P (mem) > + && MEM_SIZE_KNOWN_P (mem) > + && MEM_SIZE (mem).is_constant (&size)) > + { > + HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast); > + if (off != INTTYPE_MINIMUM (HOST_WIDE_INT) > + && off < d->max_sp_off > + && off + size > d->min_sp_off) > + for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off); > + byte < MIN (off + size, d->max_sp_off); byte++) > + if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off)) > + { > + d->load_found = true; > + return; > + } > + } > + } > +} > > /* Try to find all stack stores of CALL_INSN arguments if > ACCUMULATE_OUTGOING_ARGS. If all stack stores have been found > @@ -302,58 +397,13 @@ find_call_stack_args (rtx_call_insn *cal > if (GET_CODE (XEXP (p, 0)) == USE > && MEM_P (XEXP (XEXP (p, 0), 0))) > { > - rtx mem = XEXP (XEXP (p, 0), 0), addr; > - HOST_WIDE_INT off = 0, size; > + rtx mem = XEXP (XEXP (p, 0), 0); > + HOST_WIDE_INT size; > if (!MEM_SIZE_KNOWN_P (mem) || !MEM_SIZE (mem).is_constant (&size)) > return false; > - addr = XEXP (mem, 0); > - if (GET_CODE (addr) == PLUS > - && REG_P (XEXP (addr, 0)) > - && CONST_INT_P (XEXP (addr, 1))) > - { > - off = INTVAL (XEXP (addr, 1)); > - addr = XEXP (addr, 0); > - } > - if (addr != stack_pointer_rtx) > - { > - if (!REG_P (addr)) > - return false; > - /* If not fast, use chains to see if addr wasn't set to > - sp + offset. */ > - if (!fast) > - { > - df_ref use; > - struct df_link *defs; > - rtx set; > - > - FOR_EACH_INSN_USE (use, call_insn) > - if (rtx_equal_p (addr, DF_REF_REG (use))) > - break; > - > - if (use == NULL) > - return false; > - > - for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > - if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > - break; > - > - if (defs == NULL) > - return false; > - > - set = single_set (DF_REF_INSN (defs->ref)); > - if (!set) > - return false; > - > - if (GET_CODE (SET_SRC (set)) != PLUS > - || XEXP (SET_SRC (set), 0) != stack_pointer_rtx > - || !CONST_INT_P (XEXP (SET_SRC (set), 1))) > - return false; > - > - off += INTVAL (XEXP (SET_SRC (set), 1)); > - } > - else > - return false; > - } > + HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast); > + if (off == INTTYPE_MINIMUM (HOST_WIDE_INT)) > + return false; > min_sp_off = MIN (min_sp_off, off); > max_sp_off = MAX (max_sp_off, off + size); > } > @@ -369,51 +419,24 @@ find_call_stack_args (rtx_call_insn *cal > if (GET_CODE (XEXP (p, 0)) == USE > && MEM_P (XEXP (XEXP (p, 0), 0))) > { > - rtx mem = XEXP (XEXP (p, 0), 0), addr; > - HOST_WIDE_INT off = 0, byte, size; > + rtx mem = XEXP (XEXP (p, 0), 0); > /* Checked in the previous iteration. */ > - size = MEM_SIZE (mem).to_constant (); > - addr = XEXP (mem, 0); > - if (GET_CODE (addr) == PLUS > - && REG_P (XEXP (addr, 0)) > - && CONST_INT_P (XEXP (addr, 1))) > - { > - off = INTVAL (XEXP (addr, 1)); > - addr = XEXP (addr, 0); > - } > - if (addr != stack_pointer_rtx) > - { > - df_ref use; > - struct df_link *defs; > - rtx set; > - > - FOR_EACH_INSN_USE (use, call_insn) > - if (rtx_equal_p (addr, DF_REF_REG (use))) > - break; > - > - for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > - if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > - break; > - > - set = single_set (DF_REF_INSN (defs->ref)); > - off += INTVAL (XEXP (SET_SRC (set), 1)); > - } > - for (byte = off; byte < off + size; byte++) > - { > - if (!bitmap_set_bit (sp_bytes, byte - min_sp_off)) > - gcc_unreachable (); > - } > + HOST_WIDE_INT size = MEM_SIZE (mem).to_constant (); > + HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast); > + gcc_checking_assert (off != INTTYPE_MINIMUM (HOST_WIDE_INT)); > + for (HOST_WIDE_INT byte = off; byte < off + size; byte++) > + if (!bitmap_set_bit (sp_bytes, byte - min_sp_off)) > + gcc_unreachable (); > } > > /* Walk backwards, looking for argument stores. The search stops > when seeing another call, sp adjustment or memory store other than > argument store. */ > + struct check_argument_load_data data > + = { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false }; > ret = false; > for (insn = PREV_INSN (call_insn); insn; insn = prev_insn) > { > - rtx set, mem, addr; > - HOST_WIDE_INT off; > - > if (insn == BB_HEAD (BLOCK_FOR_INSN (call_insn))) > prev_insn = NULL; > else > @@ -425,61 +448,21 @@ find_call_stack_args (rtx_call_insn *cal > if (!NONDEBUG_INSN_P (insn)) > continue; > > - set = single_set (insn); > + rtx set = single_set (insn); > if (!set || SET_DEST (set) == stack_pointer_rtx) > break; > > + note_uses (&PATTERN (insn), check_argument_load, &data); > + if (data.load_found) > + break; > + > if (!MEM_P (SET_DEST (set))) > continue; > > - mem = SET_DEST (set); > - addr = XEXP (mem, 0); > - off = 0; > - if (GET_CODE (addr) == PLUS > - && REG_P (XEXP (addr, 0)) > - && CONST_INT_P (XEXP (addr, 1))) > - { > - off = INTVAL (XEXP (addr, 1)); > - addr = XEXP (addr, 0); > - } > - if (addr != stack_pointer_rtx) > - { > - if (!REG_P (addr)) > - break; > - if (!fast) > - { > - df_ref use; > - struct df_link *defs; > - rtx set; > - > - FOR_EACH_INSN_USE (use, insn) > - if (rtx_equal_p (addr, DF_REF_REG (use))) > - break; > - > - if (use == NULL) > - break; > - > - for (defs = DF_REF_CHAIN (use); defs; defs = defs->next) > - if (! DF_REF_IS_ARTIFICIAL (defs->ref)) > - break; > - > - if (defs == NULL) > - break; > - > - set = single_set (DF_REF_INSN (defs->ref)); > - if (!set) > - break; > - > - if (GET_CODE (SET_SRC (set)) != PLUS > - || XEXP (SET_SRC (set), 0) != stack_pointer_rtx > - || !CONST_INT_P (XEXP (SET_SRC (set), 1))) > - break; > - > - off += INTVAL (XEXP (SET_SRC (set), 1)); > - } > - else > - break; > - } > + rtx mem = SET_DEST (set); > + HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast); > + if (off == INTTYPE_MINIMUM (HOST_WIDE_INT)) > + break; > > HOST_WIDE_INT size; > if (!MEM_SIZE_KNOWN_P (mem) > --- gcc/testsuite/gcc.target/i386/pr89965.c.jj 2019-04-10 > 14:04:07.708517723 +0200 > +++ gcc/testsuite/gcc.target/i386/pr89965.c 2019-04-10 14:09:55.300911221 > +0200 > @@ -0,0 +1,39 @@ > +/* PR rtl-optimization/89965 */ > +/* { dg-do run } */ > +/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations > -fno-tree-dce -fno-tree-ter" } */ > +/* { dg-additional-options "-march=i386" { target ia32 } } */ > + > +int a; > + > +__attribute__ ((noipa)) unsigned long long > +foo (unsigned char c, unsigned d, unsigned e, unsigned long long f, > + unsigned char g, unsigned h, unsigned long long i) > +{ > + (void) d; > + unsigned short j = __builtin_mul_overflow_p (~0, h, c); > + e <<= e; > + i >>= 7; > + c *= i; > + i /= 12; > + a = __builtin_popcount (c); > + __builtin_add_overflow (e, a, &f); > + return c + f + g + j + h; > +} > + > +__attribute__ ((noipa)) void > +bar (void) > +{ > + char buf[64]; > + __builtin_memset (buf, 0x55, sizeof buf); > + asm volatile ("" : : "r" (&buf[0]) : "memory"); > +} > + > +int > +main (void) > +{ > + bar (); > + unsigned long long x = foo (2, 0, 0, 0, 0, 0, 0); > + if (x != 0) > + __builtin_abort (); > + return 0; > +} > > Jakub > -- Richard Biener <rguent...@suse.de> SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)