http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52975

             Bug #: 52975
           Summary: Ofast produces not optimized code for vectorized
                    "converted if"
    Classification: Unclassified
           Product: gcc
           Version: 4.8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: vincenzo.innoce...@cern.ch


this is a modified version of 
gcc/testsuite/gcc.dg/torture/pr52969.c

notice 
cmpps    $0x1,%xmm2,%xmm1
cmpps    $0x2,%xmm3,%xmm0
in case of Ofast
similar with -march=corei7 when blendv is generated

cat ifconv2.cc
int b;
float xsum[100];
float clus[100];
void bar2 ()
{
  int j=0;
  for (; j<100 ; ++j) {
     xsum[j] = clus[j];
     if (xsum[j] > 0)
        xsum[j] = 0;
     // xsum[j] = (clus[j] > 0.) ? 0. : clus[j];
  }
  if (xsum[0])
    b = 0;
}


pb-d-128-141-131-26:bugs48 innocent$ c++ -O3 -c ifconv2.cc
-ftree-loop-if-convert-stores -ftree-vectorizer-verbose=2

Analyzing loop at ifconv2.cc:7


Vectorizing loop at ifconv2.cc:7

7: LOOP VECTORIZED.
ifconv2.cc:4: note: vectorized 1 loops in function.
pb-d-128-141-131-26:bugs48 innocent$ otool -t -X -v ifconv2.o
__Z4bar2v:
leaq    0x00000000(%rip),%rax
xorps    %xmm2,%xmm2
leaq    0x00000000(%rip),%rdx
leaq    0x00000190(%rip),%rcx
nopl    0x00000000(%rax,%rax)
movaps    (%rax),%xmm1
movaps    %xmm2,%xmm0
addq    $0x10,%rax
cmpps    $0x1,%xmm1,%xmm0
andnps    %xmm1,%xmm0
movaps    %xmm0,(%rdx)
addq    $0x10,%rdx
cmpq    %rcx,%rax
jne    0x00000020
xorps    %xmm0,%xmm0
ucomiss    0x00000000(%rip),%xmm0
jnp    0x00000054
movl    $0x00000000,0xfffffffc(%rip)
ret
jne    0x00000049
repz/ret
pb-d-128-141-131-26:bugs48 innocent$ c++ -Ofast -c ifconv2.cc
-ftree-loop-if-convert-stores -ftree-vectorizer-verbose=2

Analyzing loop at ifconv2.cc:7


Vectorizing loop at ifconv2.cc:7

7: LOOP VECTORIZED.
ifconv2.cc:4: note: vectorized 1 loops in function.
pb-d-128-141-131-26:bugs48 innocent$ otool -t -X -v ifconv2.o
__Z4bar2v:
leaq    0x00000000(%rip),%rdx
xorps    %xmm3,%xmm3
leaq    0x00000000(%rip),%rax
leaq    0x00000190(%rip),%rcx
nopl    0x00000000(%rax,%rax)
movaps    (%rdx),%xmm2
movaps    %xmm3,%xmm1
addq    $0x10,%rdx
movaps    %xmm2,%xmm0
cmpps    $0x1,%xmm2,%xmm1
cmpps    $0x2,%xmm3,%xmm0
andnps    (%rax),%xmm1
andps    %xmm0,%xmm2
andnps    %xmm1,%xmm0
orps    %xmm2,%xmm0
movaps    %xmm0,(%rax)
addq    $0x10,%rax
cmpq    %rcx,%rdx
jne    0x00000020
xorps    %xmm0,%xmm0
comiss    0x00000000(%rip),%xmm0
je    0x00000063
movl    $0x00000000,0xfffffffc(%rip)
repz/ret

Reply via email to