https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61194
--- Comment #1 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
what I find quite absurd is that
void barX() {
for (int i=0; i<1024; ++i) {
k[i] = x[i]>0;
k[i] &= w[i]<y[i];
// z[i] = (k[i]) ? z[i] : y[i];
}
}
vectorize and
void barX() {
for (int i=0; i<1024; ++i) {
k[i] = x[i]>0;
k[i] &= w[i]<y[i];
z[i] = (k[i]) ? z[i] : y[i];
}
}
does not with gcc 4.9.0
This is a regression w.r.t. 4.7.0
compiled as
c++ -Ofast -Wall -fno-tree-slp-vectorize -ftree-loop-if-convert-stores -S
cond.cc -msse4.2 -ftree-vectorizer-verbose=1
that produced
Z4barXv:
.LFB1:
.cfi_startproc
xorps %xmm4, %xmm4
xorl %eax, %eax
pxor %xmm3, %xmm3
movdqa .LC1(%rip), %xmm5
.p2align 4,,10
.p2align 3
.L9:
movaps y(%rax), %xmm2
movaps %xmm4, %xmm1
movaps w(%rax), %xmm0
cmpltps x(%rax), %xmm1
cmpltps %xmm2, %xmm0
pand %xmm5, %xmm0
pand %xmm1, %xmm0
movaps z(%rax), %xmm1
movdqa %xmm0, k(%rax)
pcmpeqd %xmm3, %xmm0
blendvps %xmm0, %xmm2, %xmm1
movaps %xmm1, z(%rax)
addq $16, %rax
cmpq $4096, %rax
jne .L9
rep
ret
.cfi_endproc