PR 59137 is another case where dbr_schedule gets confused about liveness.
We start out with:

A:     $2 = x
B:     if $4 == $2 goto L1  [REG_DEAD: $2]
C:     if $4 < 0 goto L2
       ...
    L1:
D:     $2 = y
E:     goto L3
    L2:
F:     $2 = x
G:     goto L3
       ...
    L3:
       ...
       return $2

We fill G's delay slot in the obvious way:

    L2:
G:     goto L3
F:       $2 = x

Then we try to "steal" G's delay slot for C.  F is obviously redundant
with A in this context, so we drop it and end with a simple threaded
branch to L3:

A:     $2 = x
B:     if $4 == $2 goto L1  [REG_DEAD: $2]
C:     if $4 < 0 goto L3

The problem is that the REG_DEAD note is no longer accurate, so when
we go on to fill B's delay slot we mistakenly think that we can use D:

A:     $2 = x
B:     if $4 == $2 goto L3
D:       $2 = y
C:     if $4 < 0 goto L3

and so the return value for $4 < 0 changes from x to y.

reorg's mechanism for handling deleted redundant instructions seems
to be update_block, which adds a USE containing the redundant instruction
just before the place that it was supposed to occur.  The patch therefore
uses update_block in steal_delay_list_from_target.

I went through the other calls to redundant_insn and a few of them
also seem to be missing an update_block.  I don't have testcases
for these though, so it's going to be be a matter of opinion whether
adding them or leaving them out is the defensive thing to do.  I'm happy
either way.

(redundant_insn is pretty conservative, so the branch whose delay slot
we're trying to fill can never be the one that makes a delay slot redundant.
It must always be an instruction from before the branch.  So inserting the
(use ...) immediately before the branch should be correct.)

Tested on mips64-linux-gnu.  OK for trunk?  OK for 4.8?

Thanks,
Richard


gcc/
        PR rtl-optimization/59137
        * reorg.c (steal_delay_list_from_target): Call update_block for
        elided insns.
        (steal_delay_list_from_fallthrough, relax_delay_slots): Likewise.

gcc/testsuite/
        PR rtl-optimization/59137
        * gcc.target/mips/pr59137.c: New test.

Index: gcc/reorg.c
===================================================================
--- gcc/reorg.c 2014-01-08 18:04:23.420954812 +0000
+++ gcc/reorg.c 2014-01-08 19:17:12.005446964 +0000
@@ -1093,6 +1093,7 @@ steal_delay_list_from_target (rtx insn,
   int used_annul = 0;
   int i;
   struct resources cc_set;
+  bool *redundant;
 
   /* We can't do anything if there are more delay slots in SEQ than we
      can handle, or if we don't know that it will be a taken branch.
@@ -1133,6 +1134,7 @@ steal_delay_list_from_target (rtx insn,
     return delay_list;
 #endif
 
+  redundant = XALLOCAVEC (bool, XVECLEN (seq, 0));
   for (i = 1; i < XVECLEN (seq, 0); i++)
     {
       rtx trial = XVECEXP (seq, 0, i);
@@ -1154,7 +1156,8 @@ steal_delay_list_from_target (rtx insn,
 
       /* If this insn was already done (usually in a previous delay slot),
         pretend we put it in our delay slot.  */
-      if (redundant_insn (trial, insn, new_delay_list))
+      redundant[i] = redundant_insn (trial, insn, new_delay_list);
+      if (redundant[i])
        continue;
 
       /* We will end up re-vectoring this branch, so compute flags
@@ -1187,6 +1190,12 @@ steal_delay_list_from_target (rtx insn,
        return delay_list;
     }
 
+  /* Record the effect of the instructions that were redundant and which
+     we therefore decided not to copy.  */
+  for (i = 1; i < XVECLEN (seq, 0); i++)
+    if (redundant[i])
+      update_block (XVECEXP (seq, 0, i), insn);
+
   /* Show the place to which we will be branching.  */
   *pnew_thread = first_active_target_insn (JUMP_LABEL (XVECEXP (seq, 0, 0)));
 
@@ -1250,6 +1259,7 @@ steal_delay_list_from_fallthrough (rtx i
       /* If this insn was already done, we don't need it.  */
       if (redundant_insn (trial, insn, delay_list))
        {
+         update_block (trial, insn);
          delete_from_delay_slot (trial);
          continue;
        }
@@ -3236,6 +3246,7 @@ relax_delay_slots (rtx first)
         to reprocess this insn.  */
       if (redundant_insn (XVECEXP (pat, 0, 1), delay_insn, 0))
        {
+         update_block (XVECEXP (pat, 0, 1), insn);
          delete_from_delay_slot (XVECEXP (pat, 0, 1));
          next = prev_active_insn (next);
          continue;
@@ -3355,6 +3366,7 @@ relax_delay_slots (rtx first)
              && redirect_with_delay_slots_safe_p (delay_insn, target_label,
                                                   insn))
            {
+             update_block (XVECEXP (PATTERN (trial), 0, 1), insn);
              reorg_redirect_jump (delay_insn, target_label);
              next = insn;
              continue;
Index: gcc/testsuite/gcc.target/mips/pr59137.c
===================================================================
--- /dev/null   2013-12-26 20:29:50.272541227 +0000
+++ gcc/testsuite/gcc.target/mips/pr59137.c     2014-01-08 19:17:12.006448250 
+0000
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-mno-plt" { target mips*-*-* } } */
+
+extern void abort (void);
+
+struct lispstruct
+{
+  int e;
+  int t;
+};
+
+struct lispstruct Cnil_body;
+struct lispstruct Ct_body;
+int nvalues;
+
+struct lispstruct * __attribute__ ((noinline))
+fLlistp (struct lispstruct *x0)
+{
+  if (x0 == &Cnil_body
+      || (((unsigned long) x0 >= 0x80000000) ? 0
+         : (!x0->e ? (x0 != &Cnil_body) : x0->t)))
+    x0 = &Ct_body;
+  else
+    x0 = &Cnil_body;
+  nvalues = 1;
+  return x0;
+}
+
+int main ()
+{
+  if (fLlistp ((struct lispstruct *) 0xa0000001) != &Cnil_body)
+    abort ();
+  return 0;
+}

Reply via email to