It's a regression present on all active branches caused by the RTL enhanced 
load motion optimization (-fgcse-lm), which performs PRE on "simple" MEMs and 
discards all other memory references.  The problem is that it fails to discard 
a particular memory reference present in a REG_EQUAL note:

(insn 7 6 8 3 (parallel [
            (set (reg:DI 88 [ _4 ])
                (rotatert:DI (reg:DI 117 [ v32u64_1+24 ])
                    (const_int 19 [0x13])))
            (clobber (reg:CC 17 flags))
        ]) pr70007.c:10 607 {*rotrdi3_1}
     (expr_list:REG_DEAD (reg:DI 117 [ v32u64_1+24 ])
        (expr_list:REG_UNUSED (reg:CC 17 flags)
            (expr_list:REG_EQUAL (rotatert:DI (mem/j/c:DI (plus:DI (reg/f:DI 
16 argp)
                            (const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
                    (const_int 19 [0x13]))
                (nil)))))

because it only considers REG_EQUAL notes attached to SET patterns.  Here it's 
a PARALLEL with a single SET and that's allowed by set_for_reg_notes.

Tested on x86_64-suse-linux, applied on all active branches.


2016-03-01  Eric Botcazou  <ebotca...@adacore.com>

        PR rtl-optimization/70007
        * gcse.c (compute_ld_motion_mems): Tidy up and also invalidate memory
        references present in REG_EQUAL notes attached to non-SET patterns.


2016-03-01  Eric Botcazou  <ebotca...@adacore.com>

        * gcc.target/i386/pr70007.c: New test.

-- 
Eric Botcazou
/* PR rtl-optimization/70007 */
/* { dg-do run } */
/* { dg-options "-O -fgcse -mbmi2" } */
/* { dg-require-effective-target bmi2 } */

typedef unsigned short v32u16 __attribute__ ((vector_size (32)));
typedef unsigned long long v32u64 __attribute__ ((vector_size (32)));
typedef unsigned __int128 u128;
typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32)));

u128
foo (v32u16 v32u16_0, v32u64 v32u64_0, v32u64 v32u64_1)
{
  do {
    v32u16_0[13] |= v32u64_1[3] = (v32u64_1[3] >> 19) | (v32u64_1[3] << 45);
    v32u64_1 %= ~v32u64_1;
    v32u64_0 *= (v32u64) v32u16_0;
  } while (v32u64_0[0]);
  return v32u64_1[3];
}

int
main (void)
{
  u128 x = foo((v32u16){~0xba31, 0x47c6}, (v32u64){64}, (v32u64){0, 0x8b217e2514d23242, 0xac569b6dff9f82, 0x9d4cffe03c139c});
  if (x != 0x3c74da5ca328d09)
    __builtin_abort();
  return 0;
}
Index: gcse.c
===================================================================
--- gcse.c	(revision 233840)
+++ gcse.c	(working copy)
@@ -3796,10 +3796,8 @@ compute_ld_motion_mems (void)
 		{
 		  rtx src = SET_SRC (PATTERN (insn));
 		  rtx dest = SET_DEST (PATTERN (insn));
-		  rtx note = find_reg_equal_equiv_note (insn);
-		  rtx src_eq;
 
-		  /* Check for a simple LOAD...  */
+		  /* Check for a simple load.  */
 		  if (MEM_P (src) && simple_mem (src))
 		    {
 		      ptr = ldst_entry (src);
@@ -3814,12 +3812,11 @@ compute_ld_motion_mems (void)
 		      invalidate_any_buried_refs (src);
 		    }
 
-		  if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
-		    src_eq = XEXP (note, 0);
-		  else
-		    src_eq = NULL_RTX;
-
-		  if (src_eq != NULL_RTX
+		  /* Check for a simple load through a REG_EQUAL note.  */
+		  rtx note = find_reg_equal_equiv_note (insn), src_eq;
+		  if (note
+		      && REG_NOTE_KIND (note) == REG_EQUAL
+		      && (src_eq = XEXP (note, 0))
 		      && !(MEM_P (src_eq) && simple_mem (src_eq)))
 		    invalidate_any_buried_refs (src_eq);
 
@@ -3843,7 +3840,17 @@ compute_ld_motion_mems (void)
 		    }
 		}
 	      else
-		invalidate_any_buried_refs (PATTERN (insn));
+		{
+		  /* Invalidate all MEMs in the pattern and...  */
+		  invalidate_any_buried_refs (PATTERN (insn));
+
+		  /* ...in REG_EQUAL notes for PARALLELs with single SET.  */
+		  rtx note = find_reg_equal_equiv_note (insn), src_eq;
+		  if (note
+		      && REG_NOTE_KIND (note) == REG_EQUAL
+		      && (src_eq = XEXP (note, 0)))
+		    invalidate_any_buried_refs (src_eq);
+		}
 	    }
 	}
     }

Reply via email to