http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444
--- Comment #12 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-16 15:25:06 UTC --- Testcase w/o includes that fails with 4.6 and 4.7: typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); typedef int __v4si __attribute__ ((__vector_size__ (16))); typedef long long __v2di __attribute__ ((__vector_size__ (16))); typedef unsigned int uint32_t; typedef struct { uint32_t v[4]; } a4x32; a4x32* incr(a4x32* x) { x->v[0] += 1; return x; } typedef struct { __m128i m; } a1xm128i; static inline a1xm128i ssefunc( a1xm128i in, a1xm128i k) { a1xm128i ret; ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m); return ret; } static a4x32 caster( a4x32 c4x32, a1xm128i k) { a1xm128i c1x128; if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort(); __builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128)); c1x128 = ssefunc(c1x128, k); __builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32)); return c4x32; } typedef struct { a1xm128i key; a4x32 c; __SIZE_TYPE__ elem; a4x32 v; } Engine; void ctor(Engine *e) { e->elem = 0; e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 }; e->c.v[0] = 0; e->c.v[1] = 0; e->c.v[2] = 0; e->c.v[3] = 0; } uint32_t method( Engine *e) { if( e->elem == 0 ) { e->v = caster(*incr(&e->c), e->key); e->elem = 4; } return e->v.v[--e->elem]; } int main() { Engine e4; ctor(&e4); Engine e5; ctor(&e5); if(method(&e4)!=method(&e5)) __builtin_abort (); return 0; } and the problematic SRA is indeed happening during ESRA in caster () which looks like (before SRA): <bb 2>: MEM[(char * {ref-all})&c1x128] = MEM[(char * {ref-all})&c4x32]; in = c1x128; k = k; D.1785_7 = k.m; D.1784_8 = in.m; D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7); D.1782.m = D.1783_9; D.1780 = D.1782; c1x128 = D.1780; MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128]; D.1760 = c4x32; c1x128 ={v} {CLOBBER}; return D.1760; and after SRA: <bb 2>: c4x32$m_4 = MEM[(struct *)&c4x32].m; c1x128$m_14 = c4x32$m_4; in$m_13 = c1x128$m_14; k$m_12 = MEM[(struct *)&k].m; D.1785_7 = k$m_12; D.1784_8 = in$m_13; D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7); SR.6_11 = D.1783_9; SR.7_10 = SR.6_11; c1x128$m_2 = SR.7_10; c4x32$m_15 = c1x128$m_2; MEM[(struct *)&D.1760].m = c4x32$m_15; c1x128$m_16 = { 0, 0 }; return D.1760; notice that D.1760 is of type a4x32 and thus has the alignment of an integer. But SRA constructs in-place the object of type c1x128. SRA analysis should have seen the alignment breaking copy MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128]; which uses a properly aligned type for the store. Similarly the prevailing store D.1760 = c4x32; has the alignment of D.1760. D.1760 already has a bogus type in lacc->type. We can easily avoid translating across aggregate copies that would transfer bogusly aligned types to an access via Index: tree-sra.c =================================================================== --- tree-sra.c (revision 183205) +++ tree-sra.c (working copy) @@ -2290,7 +2290,9 @@ propagate_subaccesses_across_link (struc if (is_gimple_reg_type (racc->type)) { - if (!lacc->first_child && !racc->first_child) + if (!lacc->first_child && !racc->first_child + && (get_object_alignment (lacc->expr) + >= get_object_alignment (racc->expr))) { tree t = lacc->base; or make sure to transfer the alignment to a constructed bare(!) MEM_REF from lacc->expr before overwriting that (assuming it retains the original form up until here).