------- Comment #3 from jakub at gcc dot gnu dot org 2010-08-24 19:57 ------- Reduced testcase:
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef float __v4sf __attribute__ ((__vector_size__ (16))); static __inline__ unsigned short bar (unsigned short x) { return ((x << 8) | (x >> 8)); } unsigned int foo (float *x, short *y) { __v4sf b = (__v4sf){ 32767.5f, 32767.5f, 32767.5f, 32767.5f }; __v4sf c = __builtin_ia32_mulps (*(__v4sf *) x, b); __v4sf d = __builtin_ia32_movhlps (c, c); __v2si e = __builtin_ia32_cvtps2pi (c); __v2si f = __builtin_ia32_cvtps2pi (d); __m64 g = (__m64) __builtin_ia32_packssdw (e, f); __builtin_memcpy (y, &g, sizeof (short) * 4); y[0] = bar (y[0]); } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45400