http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46419
Uros Bizjak <ubizjak at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Target| |x86
Status|UNCONFIRMED |ASSIGNED
Last reconfirmed| |2010.11.10 22:14:13
Component|c |target
AssignedTo|unassigned at gcc dot |ubizjak at gmail dot com
|gnu.org |
Ever Confirmed|0 |1
Summary|xmmintrin.h: _mm_cvtpu16_ps |[4.4, 4.5, 4.6 Regression]
|(and hence _mm_cvtpu8_ps) |_mm_cvtpu16_ps (and hence
|returns false result in gcc |_mm_cvtpu8_ps) returns
|>= 4.4 |false result
Target Milestone|--- |4.4.6
Severity|critical |normal
--- Comment #1 from Uros Bizjak <ubizjak at gmail dot com> 2010-11-10 22:14:13
UTC ---
Ugh...
Patch in testing:
Index: xmmintrin.h
===================================================================
--- xmmintrin.h (revision 166558)
+++ xmmintrin.h (working copy)
@@ -626,13 +626,13 @@
__sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
/* Convert the four words to doublewords. */
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
__hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
- __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
/* Convert the doublewords to floating point two at a time. */
__zero = (__v4sf) _mm_setzero_ps ();
- __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi);
- __rb = __builtin_ia32_cvtpi2ps (__ra, __losi);
+ __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+ __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
return (__m128) __builtin_ia32_movlhps (__ra, __rb);
}
@@ -645,13 +645,13 @@
__v4sf __zero, __ra, __rb;
/* Convert the four words to doublewords. */
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
__hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
- __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
/* Convert the doublewords to floating point two at a time. */
__zero = (__v4sf) _mm_setzero_ps ();
- __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi);
- __rb = __builtin_ia32_cvtpi2ps (__ra, __losi);
+ __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+ __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
return (__m128) __builtin_ia32_movlhps (__ra, __rb);
}