------- Comment #24 from astrange at ithinksw dot com 2008-03-19 19:21 -------
For
typedef short mmxw __attribute__ ((mode(V4HI)));
typedef int mmxdw __attribute__ ((mode(V2SI)));
mmxdw dw;
mmxw w;
void test(){
w+=w;
dw= (mmxdw)w;
}
void test2(){
w= __builtin_ia32_paddw(w,w);
dw= (mmxdw)w;
}
gcc SVN generates the expected code for test2(), but not test(). I don't think
using += on an MMX variable should count as autovectorization - if you're doing
either you should know where to put emms yourself.
For test() we get:
subl $28, %esp
movq _w, %mm0
movq %mm0, 8(%esp)
movzwl 8(%esp), %eax
movzwl 10(%esp), %edx
movzwl 12(%esp), %ecx
addl %eax, %eax
addl %edx, %edx
movw %ax, _w
movw %dx, _w+2
movzwl 14(%esp), %eax
addl %ecx, %ecx
addl %eax, %eax
movw %cx, _w+4
movw %ax, _w+6
movq _w, %mm0
movq %mm0, _dw
addl $28, %esp
ret
which touches mm0 (requiring emms, I think) but not using paddw (so being slow
and silly-looking).
LLVM generates expected code for both of them.
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14552