------- Comment #11 from dominiq at lps dot ens dot fr 2010-08-24 14:33 ------- Assembly for the inner loop
do i = 1, n b(i,j) = b(i,j)-temp(i)*c end do with -Ofast r163277 L38: movsd (%rsi,%rax), %xmm0 addl $1, %ecx movhpd 8(%rsi,%rax), %xmm0 movapd %xmm0, %xmm1 movapd (%rdi,%rax), %xmm0 mulpd %xmm3, %xmm1 subpd %xmm1, %xmm0 movapd %xmm0, (%rdi,%rax) addq $16, %rax cmpl $249, %ecx jbe L38 r163519 L38: movsd (%rdi,%rax), %xmm5 addl $1, %esi movhpd 8(%rdi,%rax), %xmm5 movapd %xmm5, %xmm1 movsd (%rcx,%rax), %xmm5 mulpd %xmm3, %xmm1 movhpd 8(%rcx,%rax), %xmm5 movapd %xmm5, %xmm0 subpd %xmm1, %xmm0 movlpd %xmm0, (%rcx,%rax) movhpd %xmm0, 8(%rcx,%rax) addq $16, %rax cmpl $249, %esi jbe L38 -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45379