------- Comment #4 from bonzini at gnu dot org  2009-01-31 16:23 -------
I see optimal code with trunk:

.LFB8:
        movaps  %xmm1, %xmm4
        shrl    $2, %edx
        mov     %edx, %edx
        xorl    %eax, %eax
        addss   %xmm0, %xmm4
        movaps  %xmm4, %xmm3
        unpcklps        %xmm0, %xmm4
        addss   %xmm1, %xmm3
        movaps  %xmm3, %xmm2
        addss   %xmm1, %xmm2
        mulss   .LC0(%rip), %xmm1
        unpcklps        %xmm3, %xmm2
        shufps  $0, %xmm1, %xmm1
        movlhps %xmm4, %xmm2
        .align 16
.L2:
        movaps  (%rsi,%rax), %xmm0
        addps   %xmm2, %xmm0
        addps   %xmm1, %xmm2
        movaps  %xmm0, (%rdi,%rax)
        addq    $16, %rax
        subq    $1, %rdx
        jne     .L2


-- 

bonzini at gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |FIXED


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38682

Reply via email to