http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52975
Bug #: 52975 Summary: Ofast produces not optimized code for vectorized "converted if" Classification: Unclassified Product: gcc Version: 4.8.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: vincenzo.innoce...@cern.ch this is a modified version of gcc/testsuite/gcc.dg/torture/pr52969.c notice cmpps $0x1,%xmm2,%xmm1 cmpps $0x2,%xmm3,%xmm0 in case of Ofast similar with -march=corei7 when blendv is generated cat ifconv2.cc int b; float xsum[100]; float clus[100]; void bar2 () { int j=0; for (; j<100 ; ++j) { xsum[j] = clus[j]; if (xsum[j] > 0) xsum[j] = 0; // xsum[j] = (clus[j] > 0.) ? 0. : clus[j]; } if (xsum[0]) b = 0; } pb-d-128-141-131-26:bugs48 innocent$ c++ -O3 -c ifconv2.cc -ftree-loop-if-convert-stores -ftree-vectorizer-verbose=2 Analyzing loop at ifconv2.cc:7 Vectorizing loop at ifconv2.cc:7 7: LOOP VECTORIZED. ifconv2.cc:4: note: vectorized 1 loops in function. pb-d-128-141-131-26:bugs48 innocent$ otool -t -X -v ifconv2.o __Z4bar2v: leaq 0x00000000(%rip),%rax xorps %xmm2,%xmm2 leaq 0x00000000(%rip),%rdx leaq 0x00000190(%rip),%rcx nopl 0x00000000(%rax,%rax) movaps (%rax),%xmm1 movaps %xmm2,%xmm0 addq $0x10,%rax cmpps $0x1,%xmm1,%xmm0 andnps %xmm1,%xmm0 movaps %xmm0,(%rdx) addq $0x10,%rdx cmpq %rcx,%rax jne 0x00000020 xorps %xmm0,%xmm0 ucomiss 0x00000000(%rip),%xmm0 jnp 0x00000054 movl $0x00000000,0xfffffffc(%rip) ret jne 0x00000049 repz/ret pb-d-128-141-131-26:bugs48 innocent$ c++ -Ofast -c ifconv2.cc -ftree-loop-if-convert-stores -ftree-vectorizer-verbose=2 Analyzing loop at ifconv2.cc:7 Vectorizing loop at ifconv2.cc:7 7: LOOP VECTORIZED. ifconv2.cc:4: note: vectorized 1 loops in function. pb-d-128-141-131-26:bugs48 innocent$ otool -t -X -v ifconv2.o __Z4bar2v: leaq 0x00000000(%rip),%rdx xorps %xmm3,%xmm3 leaq 0x00000000(%rip),%rax leaq 0x00000190(%rip),%rcx nopl 0x00000000(%rax,%rax) movaps (%rdx),%xmm2 movaps %xmm3,%xmm1 addq $0x10,%rdx movaps %xmm2,%xmm0 cmpps $0x1,%xmm2,%xmm1 cmpps $0x2,%xmm3,%xmm0 andnps (%rax),%xmm1 andps %xmm0,%xmm2 andnps %xmm1,%xmm0 orps %xmm2,%xmm0 movaps %xmm0,(%rax) addq $0x10,%rax cmpq %rcx,%rdx jne 0x00000020 xorps %xmm0,%xmm0 comiss 0x00000000(%rip),%xmm0 je 0x00000063 movl $0x00000000,0xfffffffc(%rip) repz/ret