https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65847
--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> --- Similarly struct X { int a; int b; int c; int d; }; struct X foo (struct X x, struct X y) { struct X res; res.a = x.a + y.a; res.b = x.b + y.b; res.c = x.c + y.c; res.d = x.d + y.d; return res; } is vectorized as foo: .LFB0: .cfi_startproc movq %rdi, -40(%rsp) movq %rsi, -32(%rsp) movdqa -40(%rsp), %xmm0 movq %rdx, -24(%rsp) movq %rcx, -16(%rsp) paddd -24(%rsp), %xmm0 movaps %xmm0, -40(%rsp) movq -40(%rsp), %rax movq -32(%rsp), %rdx ret which is bad because the on-stack construction of %xmm0 causes a STLF fail. Unvectorized code isn't necessarily worse, but the vectorized sequence can be improved foo: .LFB0: .cfi_startproc movq %rdi, %rax movq %rdi, %r10 movq %rdx, %rdi movq %rsi, %r9 sarq $32, %r10 sarq $32, %rdi addl %edx, %eax movq %rcx, %r8 addl %r10d, %edi sarq $32, %r9 movl %eax, %eax leal (%rsi,%rcx), %edx movl %edi, %edi sarq $32, %r8 salq $32, %rdi orq %rdi, %rax leal (%r9,%r8), %edi salq $32, %rdi orq %rdi, %rdx ret in this case the spill is caused by LRA not knowing how to re-load the TImode reg build by pieces by the RTL expansion code.