This loop isn't vectorized: float a[4], b[4], c[4]; int main () { int i;
for (i=0; i<4; i++){ a[i] = b[i] + c[i]; } } main: .LFB0: .cfi_startproc movss b(%rip), %xmm0 addss c(%rip), %xmm0 movss %xmm0, a(%rip) movss b+4(%rip), %xmm0 addss c+4(%rip), %xmm0 movss %xmm0, a+4(%rip) movss b+8(%rip), %xmm0 addss c+8(%rip), %xmm0 movss %xmm0, a+8(%rip) movss b+12(%rip), %xmm0 addss c+12(%rip), %xmm0 movss %xmm0, a+12(%rip) ret .cfi_endproc but this loop is vectorized: float a[8], b[8], c[8]; int main () { int i; for (i=0; i<8; i++){ a[i] = b[i] + c[i]; } } main: .LFB0: .cfi_startproc movaps b(%rip), %xmm0 addps c(%rip), %xmm0 movaps %xmm0, a(%rip) movaps b+16(%rip), %xmm0 addps c+16(%rip), %xmm0 movaps %xmm0, a+16(%rip) ret .cfi_endproc -- Summary: small loop not vectorized Product: gcc Version: 4.4.3 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: wbrana at gmail dot com GCC host triplet: x86_64-pc-linux-gnu http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43692