http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58268
Bug ID: 58268 Summary: umm registers not used for -march=bdver1 Product: gcc Version: 4.9.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: vincenzo.innocente at cern dot ch in this trival example avx is used for corei7-avx and core-avx2 not for bdver1 float a[1024]; float x[1024]; float bar(float b) { float r=0.; for (int i=0; i!=1024; ++i) r += a[i]+b*x[i]; return r; } c++ -v -Ofast -march=core-avx2 -S fma.cpp ; cat fma.s .L2: vmovaps x(%rax), %ymm2 addq $32, %rax vfmadd213ps a-32(%rax), %ymm0, %ymm2 cmpq $4096, %rax vaddps %ymm2, %ymm1, %ymm1 jne .L2 vhaddps %ymm1, %ymm1, %ymm1 vhaddps %ymm1, %ymm1, %ymm0 vperm2f128 $1, %ymm0, %ymm0, %ymm1 vaddps %ymm0, %ymm1, %ymm0 vzeroupper ret c++ -v -Ofast -march=bdver1 -S fma.cpp ; cat fma.s .L2: vmovaps 16(%rcx), %xmm5 vmovaps 48(%rcx), %xmm6 prefetcht0 320(%rcx) vmovaps 16(%rax), %xmm7 addq $64, %rcx addl $4, %edx prefetcht0 320(%rax) leaq 64(%rax), %rsi vaddps -64(%rcx), %xmm5, %xmm3 vaddps -32(%rcx), %xmm6, %xmm1 cmpq $x+4032, %rcx vmovaps 48(%rax), %xmm5 vaddps %xmm1, %xmm3, %xmm1 vaddps (%rax), %xmm7, %xmm3 vfmaddps %xmm0, %xmm2, %xmm1, %xmm0 vaddps 32(%rax), %xmm5, %xmm1 vaddps %xmm1, %xmm3, %xmm1 vaddps %xmm0, %xmm1, %xmm0 jne .L5 xorl %eax, %eax .p2align 4,,10 .p2align 3 .L4: vmovaps x+4032(%rax), %xmm4 incl %edx vfmaddps (%rsi,%rax), %xmm2, %xmm4, %xmm1 addq $16, %rax cmpl $256, %edx vaddps %xmm1, %xmm0, %xmm0 jb .L4 vhaddps %xmm0, %xmm0, %xmm0 vhaddps %xmm0, %xmm0, %xmm0 ret