It's a regression present on all active branches caused by the RTL enhanced
load motion optimization (-fgcse-lm), which performs PRE on "simple" MEMs and
discards all other memory references. The problem is that it fails to discard
a particular memory reference present in a REG_EQUAL note:
(insn 7 6 8 3 (parallel [
(set (reg:DI 88 [ _4 ])
(rotatert:DI (reg:DI 117 [ v32u64_1+24 ])
(const_int 19 [0x13])))
(clobber (reg:CC 17 flags))
]) pr70007.c:10 607 {*rotrdi3_1}
(expr_list:REG_DEAD (reg:DI 117 [ v32u64_1+24 ])
(expr_list:REG_UNUSED (reg:CC 17 flags)
(expr_list:REG_EQUAL (rotatert:DI (mem/j/c:DI (plus:DI (reg/f:DI
16 argp)
(const_int 88 [0x58])) [1 v32u64_1+24 S8 A64])
(const_int 19 [0x13]))
(nil)))))
because it only considers REG_EQUAL notes attached to SET patterns. Here it's
a PARALLEL with a single SET and that's allowed by set_for_reg_notes.
Tested on x86_64-suse-linux, applied on all active branches.
2016-03-01 Eric Botcazou <ebotca...@adacore.com>
PR rtl-optimization/70007
* gcse.c (compute_ld_motion_mems): Tidy up and also invalidate memory
references present in REG_EQUAL notes attached to non-SET patterns.
2016-03-01 Eric Botcazou <ebotca...@adacore.com>
* gcc.target/i386/pr70007.c: New test.
--
Eric Botcazou
/* PR rtl-optimization/70007 */
/* { dg-do run } */
/* { dg-options "-O -fgcse -mbmi2" } */
/* { dg-require-effective-target bmi2 } */
typedef unsigned short v32u16 __attribute__ ((vector_size (32)));
typedef unsigned long long v32u64 __attribute__ ((vector_size (32)));
typedef unsigned __int128 u128;
typedef unsigned __int128 v32u128 __attribute__ ((vector_size (32)));
u128
foo (v32u16 v32u16_0, v32u64 v32u64_0, v32u64 v32u64_1)
{
do {
v32u16_0[13] |= v32u64_1[3] = (v32u64_1[3] >> 19) | (v32u64_1[3] << 45);
v32u64_1 %= ~v32u64_1;
v32u64_0 *= (v32u64) v32u16_0;
} while (v32u64_0[0]);
return v32u64_1[3];
}
int
main (void)
{
u128 x = foo((v32u16){~0xba31, 0x47c6}, (v32u64){64}, (v32u64){0, 0x8b217e2514d23242, 0xac569b6dff9f82, 0x9d4cffe03c139c});
if (x != 0x3c74da5ca328d09)
__builtin_abort();
return 0;
}
Index: gcse.c
===================================================================
--- gcse.c (revision 233840)
+++ gcse.c (working copy)
@@ -3796,10 +3796,8 @@ compute_ld_motion_mems (void)
{
rtx src = SET_SRC (PATTERN (insn));
rtx dest = SET_DEST (PATTERN (insn));
- rtx note = find_reg_equal_equiv_note (insn);
- rtx src_eq;
- /* Check for a simple LOAD... */
+ /* Check for a simple load. */
if (MEM_P (src) && simple_mem (src))
{
ptr = ldst_entry (src);
@@ -3814,12 +3812,11 @@ compute_ld_motion_mems (void)
invalidate_any_buried_refs (src);
}
- if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
- src_eq = XEXP (note, 0);
- else
- src_eq = NULL_RTX;
-
- if (src_eq != NULL_RTX
+ /* Check for a simple load through a REG_EQUAL note. */
+ rtx note = find_reg_equal_equiv_note (insn), src_eq;
+ if (note
+ && REG_NOTE_KIND (note) == REG_EQUAL
+ && (src_eq = XEXP (note, 0))
&& !(MEM_P (src_eq) && simple_mem (src_eq)))
invalidate_any_buried_refs (src_eq);
@@ -3843,7 +3840,17 @@ compute_ld_motion_mems (void)
}
}
else
- invalidate_any_buried_refs (PATTERN (insn));
+ {
+ /* Invalidate all MEMs in the pattern and... */
+ invalidate_any_buried_refs (PATTERN (insn));
+
+ /* ...in REG_EQUAL notes for PARALLELs with single SET. */
+ rtx note = find_reg_equal_equiv_note (insn), src_eq;
+ if (note
+ && REG_NOTE_KIND (note) == REG_EQUAL
+ && (src_eq = XEXP (note, 0)))
+ invalidate_any_buried_refs (src_eq);
+ }
}
}
}