It turns out solving this long-standing optimization regression is now easy by exploiting implmenetation details in how we canonicalize refs in LIM. This allows us to properly identifying MEM[(integer(kind=4)[64] *)&a][0] and MEM[(c_char * {ref-all})&a] the same, applying store-motion to an initialization (non-)loop thereby eliminating it.
Bootstrap & regtest running on x86_64-unknown-linux-gnu. I didn't go further trying to exploit alias subset relationship instead but the alias-set zero case is obvious enough to be correct. Richard. 2019-04-15 Richard Biener <rguent...@suse.de> PR tree-optimization/56049 * tree-ssa-loop-im.c (mem_ref_hasher::equal): Elide alias-set equality check if alias-set zero will prevail. * gfortran.dg/pr56049.f90: New testcase. Index: gcc/tree-ssa-loop-im.c =================================================================== --- gcc/tree-ssa-loop-im.c (revision 270366) +++ gcc/tree-ssa-loop-im.c (working copy) @@ -178,7 +178,17 @@ mem_ref_hasher::equal (const im_mem_ref && known_eq (mem1->mem.size, obj2->size) && known_eq (mem1->mem.max_size, obj2->max_size) && mem1->mem.volatile_p == obj2->volatile_p - && mem1->mem.ref_alias_set == obj2->ref_alias_set + && (mem1->mem.ref_alias_set == obj2->ref_alias_set + /* We are not canonicalizing alias-sets but for the + special-case we didn't canonicalize yet and the + incoming ref is a alias-set zero MEM we pick + the correct one already. */ + || (!mem1->ref_canonical + && (TREE_CODE (obj2->ref) == MEM_REF + || TREE_CODE (obj2->ref) == TARGET_MEM_REF) + && obj2->ref_alias_set == 0) + /* Likewise if there's a canonical ref with alias-set zero. */ + || (mem1->ref_canonical && mem1->mem.ref_alias_set == 0)) && types_compatible_p (TREE_TYPE (mem1->mem.ref), TREE_TYPE (obj2->ref))); else Index: gcc/testsuite/gfortran.dg/pr56049.f90 =================================================================== --- gcc/testsuite/gfortran.dg/pr56049.f90 (nonexistent) +++ gcc/testsuite/gfortran.dg/pr56049.f90 (working copy) @@ -0,0 +1,29 @@ +! { dg-do compile } +! { dg-options "-O3 -fdump-tree-optimized" } + +program inline + + integer i + integer a(8,8), b(8,8) + + a = 0 + do i = 1, 10000000 + call add(b, a, 1) + a = b + end do + + print *, a + +contains + + subroutine add(b, a, o) + integer, intent(inout) :: b(8,8) + integer, intent(in) :: a(8,8), o + b = a + o + end subroutine add + +end program inline + +! Check there's no loop left, just two bb 2 in two functions. +! { dg-final { scan-tree-dump-times "<bb \[0-9\]*>" 2 "optimized" } } +! { dg-final { scan-tree-dump-times "<bb 2>" 2 "optimized" } }