http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58268

            Bug ID: 58268
           Summary: umm registers not used for -march=bdver1
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

in this trival example avx is used for corei7-avx and core-avx2
not for bdver1

float a[1024];
float x[1024];

float bar(float b) {
  float r=0.;
  for (int i=0; i!=1024; ++i)
    r += a[i]+b*x[i];
  return r;
}


c++ -v -Ofast -march=core-avx2 -S fma.cpp ; cat fma.s
.L2:
    vmovaps    x(%rax), %ymm2
    addq    $32, %rax
    vfmadd213ps    a-32(%rax), %ymm0, %ymm2
    cmpq    $4096, %rax
    vaddps    %ymm2, %ymm1, %ymm1
    jne    .L2
    vhaddps    %ymm1, %ymm1, %ymm1
    vhaddps    %ymm1, %ymm1, %ymm0
    vperm2f128    $1, %ymm0, %ymm0, %ymm1
    vaddps    %ymm0, %ymm1, %ymm0
    vzeroupper
    ret



c++ -v -Ofast -march=bdver1 -S fma.cpp ; cat fma.s
.L2:
    vmovaps    16(%rcx), %xmm5
    vmovaps    48(%rcx), %xmm6
    prefetcht0    320(%rcx)
    vmovaps    16(%rax), %xmm7
    addq    $64, %rcx
    addl    $4, %edx
    prefetcht0    320(%rax)
    leaq    64(%rax), %rsi
    vaddps    -64(%rcx), %xmm5, %xmm3
    vaddps    -32(%rcx), %xmm6, %xmm1
    cmpq    $x+4032, %rcx
    vmovaps    48(%rax), %xmm5
    vaddps    %xmm1, %xmm3, %xmm1
    vaddps    (%rax), %xmm7, %xmm3
    vfmaddps    %xmm0, %xmm2, %xmm1, %xmm0
    vaddps    32(%rax), %xmm5, %xmm1
    vaddps    %xmm1, %xmm3, %xmm1
    vaddps    %xmm0, %xmm1, %xmm0
    jne    .L5
    xorl    %eax, %eax
    .p2align 4,,10
    .p2align 3
.L4:
    vmovaps    x+4032(%rax), %xmm4
    incl    %edx
    vfmaddps    (%rsi,%rax), %xmm2, %xmm4, %xmm1
    addq    $16, %rax
    cmpl    $256, %edx
    vaddps    %xmm1, %xmm0, %xmm0
    jb    .L4
    vhaddps    %xmm0, %xmm0, %xmm0
    vhaddps    %xmm0, %xmm0, %xmm0
    ret

Reply via email to