Hi!

The following testcase is miscompiled, because the result of
a DImode (doubleword) right shift is used in a QImode subreg as well as
later on pushed into a stack slot as an argument to a const function
whose result isn't used.  The RA because of the weirdo target tuning
reuses the REG_EQUIV argument slot (as it wants the value in a non-Q_REGS
register), so it first pushes it to the stack slot and then loads from the
stack slot again (according to Vlad, this can happen with both LRA and old
reload).  Later on during DCE we determine the call is not needed and try to
find all the argument pushes and don't mark those as needed, so we
effectively DCE the right shift, push to the argument slot as well as
other slots and the call, and end up keeping just the load from the
uninitialized argument slot.

The following patch just punts if we find loads from stack slots in between
where they are pushed and the const/pure call.  In addition to that, I've
outlined the same largish sequence that had 3 copies in the function already
and I'd need to add 4th copy, so in the end the dce.c changes are removing
more than adding:
 1 file changed, 118 insertions(+), 135 deletions(-)

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

During those 2 bootstraps/regtests, data.load_found has been set just
on the new testcase on ia32.

2019-04-11  Jakub Jelinek  <ja...@redhat.com>
        
        PR rtl-optimization/89965
        * dce.c: Include rtl-iter.h.
        (sp_based_mem_offset): New function.
        (struct check_argument_load_data): New type.
        (check_argument_load): New function.
        (find_call_stack_args): Use sp_based_mem_offset, check for loads
        from stack slots still tracked in sp_bytes and punt if any is
        found.

        * gcc.target/i386/pr89965.c: New test.

--- gcc/dce.c.jj        2019-02-15 00:11:13.209111382 +0100
+++ gcc/dce.c   2019-04-10 14:18:21.111763533 +0200
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.
 #include "valtrack.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "rtl-iter.h"
 
 
 /* -------------------------------------------------------------------------
@@ -265,6 +266,100 @@ check_argument_store (HOST_WIDE_INT size
   return true;
 }
 
+/* If MEM has sp address, return 0, if it has sp + const address,
+   return that const, if it has reg address where reg is set to sp + const
+   and FAST is false, return const, otherwise return
+   INTTYPE_MINUMUM (HOST_WIDE_INT).  */
+
+static HOST_WIDE_INT
+sp_based_mem_offset (rtx_call_insn *call_insn, const_rtx mem, bool fast)
+{
+  HOST_WIDE_INT off = 0;
+  rtx addr = XEXP (mem, 0);
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0))
+      && CONST_INT_P (XEXP (addr, 1)))
+    {
+      off = INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+  if (addr == stack_pointer_rtx)
+    return off;
+
+  if (!REG_P (addr) || fast)
+    return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  /* If not fast, use chains to see if addr wasn't set to sp + offset.  */
+  df_ref use;
+  FOR_EACH_INSN_USE (use, call_insn)
+  if (rtx_equal_p (addr, DF_REF_REG (use)))
+    break;
+
+  if (use == NULL)
+    return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  struct df_link *defs;
+  for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
+    if (! DF_REF_IS_ARTIFICIAL (defs->ref))
+      break;
+
+  if (defs == NULL)
+    return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  rtx set = single_set (DF_REF_INSN (defs->ref));
+  if (!set)
+    return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  if (GET_CODE (SET_SRC (set)) != PLUS
+      || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
+      || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
+    return INTTYPE_MINIMUM (HOST_WIDE_INT);
+
+  off += INTVAL (XEXP (SET_SRC (set), 1));
+  return off;
+}
+
+/* Data for check_argument_load called via note_uses.  */
+struct check_argument_load_data {
+  bitmap sp_bytes;
+  HOST_WIDE_INT min_sp_off, max_sp_off;
+  rtx_call_insn *call_insn;
+  bool fast;
+  bool load_found;
+};
+
+/* Helper function for find_call_stack_args.  Check if there are
+   any loads from the argument slots in between the const/pure call
+   and store to the argument slot, set LOAD_FOUND if any is found.  */
+
+static void
+check_argument_load (rtx *loc, void *data)
+{
+  struct check_argument_load_data *d
+    = (struct check_argument_load_data *) data;
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
+    {
+      const_rtx mem = *iter;
+      HOST_WIDE_INT size;
+      if (MEM_P (mem)
+         && MEM_SIZE_KNOWN_P (mem)
+         && MEM_SIZE (mem).is_constant (&size))
+       {
+         HOST_WIDE_INT off = sp_based_mem_offset (d->call_insn, mem, d->fast);
+         if (off != INTTYPE_MINIMUM (HOST_WIDE_INT)
+             && off < d->max_sp_off
+             && off + size > d->min_sp_off)
+           for (HOST_WIDE_INT byte = MAX (off, d->min_sp_off);
+                byte < MIN (off + size, d->max_sp_off); byte++)
+             if (bitmap_bit_p (d->sp_bytes, byte - d->min_sp_off))
+               {
+                 d->load_found = true;
+                 return;
+               }
+        }
+    }
+}
 
 /* Try to find all stack stores of CALL_INSN arguments if
    ACCUMULATE_OUTGOING_ARGS.  If all stack stores have been found
@@ -302,58 +397,13 @@ find_call_stack_args (rtx_call_insn *cal
     if (GET_CODE (XEXP (p, 0)) == USE
        && MEM_P (XEXP (XEXP (p, 0), 0)))
       {
-       rtx mem = XEXP (XEXP (p, 0), 0), addr;
-       HOST_WIDE_INT off = 0, size;
+       rtx mem = XEXP (XEXP (p, 0), 0);
+       HOST_WIDE_INT size;
        if (!MEM_SIZE_KNOWN_P (mem) || !MEM_SIZE (mem).is_constant (&size))
          return false;
-       addr = XEXP (mem, 0);
-       if (GET_CODE (addr) == PLUS
-           && REG_P (XEXP (addr, 0))
-           && CONST_INT_P (XEXP (addr, 1)))
-         {
-           off = INTVAL (XEXP (addr, 1));
-           addr = XEXP (addr, 0);
-         }
-       if (addr != stack_pointer_rtx)
-         {
-           if (!REG_P (addr))
-             return false;
-           /* If not fast, use chains to see if addr wasn't set to
-              sp + offset.  */
-           if (!fast)
-             {
-               df_ref use;
-               struct df_link *defs;
-               rtx set;
-
-               FOR_EACH_INSN_USE (use, call_insn)
-                 if (rtx_equal_p (addr, DF_REF_REG (use)))
-                   break;
-
-               if (use == NULL)
-                 return false;
-
-               for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
-                 if (! DF_REF_IS_ARTIFICIAL (defs->ref))
-                   break;
-
-               if (defs == NULL)
-                 return false;
-
-               set = single_set (DF_REF_INSN (defs->ref));
-               if (!set)
-                 return false;
-
-               if (GET_CODE (SET_SRC (set)) != PLUS
-                   || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
-                   || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
-                 return false;
-
-               off += INTVAL (XEXP (SET_SRC (set), 1));
-             }
-           else
-             return false;
-         }
+        HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast);
+        if (off == INTTYPE_MINIMUM (HOST_WIDE_INT))
+         return false;
        min_sp_off = MIN (min_sp_off, off);
        max_sp_off = MAX (max_sp_off, off + size);
       }
@@ -369,51 +419,24 @@ find_call_stack_args (rtx_call_insn *cal
     if (GET_CODE (XEXP (p, 0)) == USE
        && MEM_P (XEXP (XEXP (p, 0), 0)))
       {
-       rtx mem = XEXP (XEXP (p, 0), 0), addr;
-       HOST_WIDE_INT off = 0, byte, size;
+       rtx mem = XEXP (XEXP (p, 0), 0);
        /* Checked in the previous iteration.  */
-       size = MEM_SIZE (mem).to_constant ();
-       addr = XEXP (mem, 0);
-       if (GET_CODE (addr) == PLUS
-           && REG_P (XEXP (addr, 0))
-           && CONST_INT_P (XEXP (addr, 1)))
-         {
-           off = INTVAL (XEXP (addr, 1));
-           addr = XEXP (addr, 0);
-         }
-       if (addr != stack_pointer_rtx)
-         {
-           df_ref use;
-           struct df_link *defs;
-           rtx set;
-
-           FOR_EACH_INSN_USE (use, call_insn)
-             if (rtx_equal_p (addr, DF_REF_REG (use)))
-               break;
-
-           for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
-             if (! DF_REF_IS_ARTIFICIAL (defs->ref))
-               break;
-
-           set = single_set (DF_REF_INSN (defs->ref));
-           off += INTVAL (XEXP (SET_SRC (set), 1));
-         }
-       for (byte = off; byte < off + size; byte++)
-         {
-           if (!bitmap_set_bit (sp_bytes, byte - min_sp_off))
-             gcc_unreachable ();
-         }
+       HOST_WIDE_INT size = MEM_SIZE (mem).to_constant ();
+       HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast);
+       gcc_checking_assert (off != INTTYPE_MINIMUM (HOST_WIDE_INT));
+       for (HOST_WIDE_INT byte = off; byte < off + size; byte++)
+         if (!bitmap_set_bit (sp_bytes, byte - min_sp_off))
+           gcc_unreachable ();
       }
 
   /* Walk backwards, looking for argument stores.  The search stops
      when seeing another call, sp adjustment or memory store other than
      argument store.  */
+  struct check_argument_load_data data
+    = { sp_bytes, min_sp_off, max_sp_off, call_insn, fast, false };
   ret = false;
   for (insn = PREV_INSN (call_insn); insn; insn = prev_insn)
     {
-      rtx set, mem, addr;
-      HOST_WIDE_INT off;
-
       if (insn == BB_HEAD (BLOCK_FOR_INSN (call_insn)))
        prev_insn = NULL;
       else
@@ -425,61 +448,21 @@ find_call_stack_args (rtx_call_insn *cal
       if (!NONDEBUG_INSN_P (insn))
        continue;
 
-      set = single_set (insn);
+      rtx set = single_set (insn);
       if (!set || SET_DEST (set) == stack_pointer_rtx)
        break;
 
+      note_uses (&PATTERN (insn), check_argument_load, &data);
+      if (data.load_found)
+       break;
+
       if (!MEM_P (SET_DEST (set)))
        continue;
 
-      mem = SET_DEST (set);
-      addr = XEXP (mem, 0);
-      off = 0;
-      if (GET_CODE (addr) == PLUS
-         && REG_P (XEXP (addr, 0))
-         && CONST_INT_P (XEXP (addr, 1)))
-       {
-         off = INTVAL (XEXP (addr, 1));
-         addr = XEXP (addr, 0);
-       }
-      if (addr != stack_pointer_rtx)
-       {
-         if (!REG_P (addr))
-           break;
-         if (!fast)
-           {
-             df_ref use;
-             struct df_link *defs;
-             rtx set;
-
-             FOR_EACH_INSN_USE (use, insn)
-               if (rtx_equal_p (addr, DF_REF_REG (use)))
-                 break;
-
-             if (use == NULL)
-               break;
-
-             for (defs = DF_REF_CHAIN (use); defs; defs = defs->next)
-               if (! DF_REF_IS_ARTIFICIAL (defs->ref))
-                 break;
-
-             if (defs == NULL)
-               break;
-
-             set = single_set (DF_REF_INSN (defs->ref));
-             if (!set)
-               break;
-
-             if (GET_CODE (SET_SRC (set)) != PLUS
-                 || XEXP (SET_SRC (set), 0) != stack_pointer_rtx
-                 || !CONST_INT_P (XEXP (SET_SRC (set), 1)))
-               break;
-
-             off += INTVAL (XEXP (SET_SRC (set), 1));
-           }
-         else
-           break;
-       }
+      rtx mem = SET_DEST (set);
+      HOST_WIDE_INT off = sp_based_mem_offset (call_insn, mem, fast);
+      if (off == INTTYPE_MINIMUM (HOST_WIDE_INT))
+       break;
 
       HOST_WIDE_INT size;
       if (!MEM_SIZE_KNOWN_P (mem)
--- gcc/testsuite/gcc.target/i386/pr89965.c.jj  2019-04-10 14:04:07.708517723 
+0200
+++ gcc/testsuite/gcc.target/i386/pr89965.c     2019-04-10 14:09:55.300911221 
+0200
@@ -0,0 +1,39 @@
+/* PR rtl-optimization/89965 */
+/* { dg-do run } */
+/* { dg-options "-O -mtune=nano-x2 -fcaller-saves -fexpensive-optimizations 
-fno-tree-dce -fno-tree-ter" } */
+/* { dg-additional-options "-march=i386" { target ia32 } } */
+
+int a;
+
+__attribute__ ((noipa)) unsigned long long
+foo (unsigned char c, unsigned d, unsigned e, unsigned long long f,
+     unsigned char g, unsigned h, unsigned long long i)
+{
+  (void) d;
+  unsigned short j = __builtin_mul_overflow_p (~0, h, c);
+  e <<= e;
+  i >>= 7;
+  c *= i;
+  i /= 12;
+  a = __builtin_popcount (c);
+  __builtin_add_overflow (e, a, &f);
+  return c + f + g + j + h;
+}
+
+__attribute__ ((noipa)) void
+bar (void)
+{
+  char buf[64];
+  __builtin_memset (buf, 0x55, sizeof buf);
+  asm volatile ("" : : "r" (&buf[0]) : "memory");
+}
+
+int
+main (void)
+{
+  bar ();
+  unsigned long long x = foo (2, 0, 0, 0, 0, 0, 0);
+  if (x != 0)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to