https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93946
--- Comment #20 from Richard Biener <rguenth at gcc dot gnu.org> ---
So for the CSE issue we go through the equivalence chain and find
(gdb) p debug_rtx (p->exp)
(mem/j:SI (reg/v/f:SI 48 [ ptr ]) [1 MEM[(struct aa *)ptr_1(D)].a.u.i+0 S4
A32])
5076 if (GET_CODE (dest) == code && rtx_equal_p (p->exp, dest))
5077 src_related = dest;
...
5121 if (rtx_equal_p (src_related, dest))
5122 src_related_cost = src_related_regcost = -1;
the first issue is that we're recording dest as src_related here losing the
opportunity to do a validity check later on. If we fix that we can do the
usual validity check, copied from DSE:
diff --git a/gcc/cse.c b/gcc/cse.c
index 3e8724b3fed..f07bbdbebad 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -5074,7 +5074,7 @@ cse_insn (rtx_insn *insn)
to prefer it. Copy it to src_related. The code below will
then give it a negative cost. */
if (GET_CODE (dest) == code && rtx_equal_p (p->exp, dest))
- src_related = dest;
+ src_related = p->exp;
}
/* Find the cheapest valid equivalent, trying all the available
@@ -5332,7 +5332,16 @@ cse_insn (rtx_insn *insn)
&& rtx_equal_p (trial, dest)
&& !side_effects_p (dest)
&& (cfun->can_delete_dead_exceptions
- || insn_nothrow_p (insn)))
+ || insn_nothrow_p (insn))
+ /* We can only remove the later store if the earlier aliases
+ at least all accesses the later one. */
+ && (!MEM_P (trial)
+ || ((MEM_ALIAS_SET (dest) == MEM_ALIAS_SET (trial)
+ || alias_set_subset_of (MEM_ALIAS_SET (dest),
+ MEM_ALIAS_SET (trial)))
+ && (!MEM_EXPR (trial)
+ || refs_same_for_tbaa_p (MEM_EXPR (trial),
+ MEM_EXPR (dest))))))
{
SET_SRC (sets[i].rtl) = trial;
noop_insn = true;
that gives us for the testcase
foo:
movi r2, 1
stw r2, 0(r4)
stw zero, 0(r5)
ldw r2, 0(r4)
stw zero, 4(r5)
ret
which looks correct to me. I'm going to test the above on x86_64-linux.