http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46419
Uros Bizjak <ubizjak at gmail dot com> changed: What |Removed |Added ---------------------------------------------------------------------------- Target| |x86 Status|UNCONFIRMED |ASSIGNED Last reconfirmed| |2010.11.10 22:14:13 Component|c |target AssignedTo|unassigned at gcc dot |ubizjak at gmail dot com |gnu.org | Ever Confirmed|0 |1 Summary|xmmintrin.h: _mm_cvtpu16_ps |[4.4, 4.5, 4.6 Regression] |(and hence _mm_cvtpu8_ps) |_mm_cvtpu16_ps (and hence |returns false result in gcc |_mm_cvtpu8_ps) returns |>= 4.4 |false result Target Milestone|--- |4.4.6 Severity|critical |normal --- Comment #1 from Uros Bizjak <ubizjak at gmail dot com> 2010-11-10 22:14:13 UTC --- Ugh... Patch in testing: Index: xmmintrin.h =================================================================== --- xmmintrin.h (revision 166558) +++ xmmintrin.h (working copy) @@ -626,13 +626,13 @@ __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A); /* Convert the four words to doublewords. */ + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign); - __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); /* Convert the doublewords to floating point two at a time. */ __zero = (__v4sf) _mm_setzero_ps (); - __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi); - __rb = __builtin_ia32_cvtpi2ps (__ra, __losi); + __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); + __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); return (__m128) __builtin_ia32_movlhps (__ra, __rb); } @@ -645,13 +645,13 @@ __v4sf __zero, __ra, __rb; /* Convert the four words to doublewords. */ + __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL); - __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); /* Convert the doublewords to floating point two at a time. */ __zero = (__v4sf) _mm_setzero_ps (); - __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi); - __rb = __builtin_ia32_cvtpi2ps (__ra, __losi); + __ra = __builtin_ia32_cvtpi2ps (__zero, __losi); + __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi); return (__m128) __builtin_ia32_movlhps (__ra, __rb); }