------- Comment #4 from changpeng dot fang at amd dot com  2010-08-24 00:46 
-------
Ooops, the open64 generated code posted in last comment is for non-vectorized
loop, the vectorized one is similar:

.LBB23_f:
        .loc    1       7       0
        movups 0(%r10),%xmm3            # [0] id:65
        movups 0(%rax),%xmm1            # [1] id:64
        subps %xmm3,%xmm1               # [3]
        .loc    1       8       0
        mulps %xmm1,%xmm1               # [7]
        movups 0(%r9),%xmm2             # [9] id:66
        mulps %xmm2,%xmm1               # [11]
        addq $16,%rax                   # [13]
        addq $16,%r9                    # [14]
        addq $16,%r10                   # [14]
        .loc    1       7       0
        prefetchnta 112(%rax)           # [14] L1
        prefetchnta 112(%r10)           # [15] L1
        .loc    1       8       0
        prefetchnta 112(%r9)            # [15] L1
        subps %xmm1,%xmm0               # [15]


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45391

Reply via email to