http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46419

Uros Bizjak <ubizjak at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Target|                            |x86
             Status|UNCONFIRMED                 |ASSIGNED
   Last reconfirmed|                            |2010.11.10 22:14:13
          Component|c                           |target
         AssignedTo|unassigned at gcc dot       |ubizjak at gmail dot com
                   |gnu.org                     |
     Ever Confirmed|0                           |1
            Summary|xmmintrin.h: _mm_cvtpu16_ps |[4.4, 4.5, 4.6 Regression]
                   |(and hence _mm_cvtpu8_ps)   |_mm_cvtpu16_ps (and hence
                   |returns false result in gcc |_mm_cvtpu8_ps) returns
                   |>= 4.4                      |false result
   Target Milestone|---                         |4.4.6
           Severity|critical                    |normal

--- Comment #1 from Uros Bizjak <ubizjak at gmail dot com> 2010-11-10 22:14:13 
UTC ---
Ugh...

Patch in testing:

Index: xmmintrin.h
===================================================================
--- xmmintrin.h    (revision 166558)
+++ xmmintrin.h    (working copy)
@@ -626,13 +626,13 @@
   __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);

   /* Convert the four words to doublewords.  */
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
   __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
-  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);

   /* Convert the doublewords to floating point two at a time.  */
   __zero = (__v4sf) _mm_setzero_ps ();
-  __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi);
-  __rb = __builtin_ia32_cvtpi2ps (__ra, __losi);
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);

   return (__m128) __builtin_ia32_movlhps (__ra, __rb);
 }
@@ -645,13 +645,13 @@
   __v4sf __zero, __ra, __rb;

   /* Convert the four words to doublewords.  */
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
   __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
-  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);

   /* Convert the doublewords to floating point two at a time.  */
   __zero = (__v4sf) _mm_setzero_ps ();
-  __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi);
-  __rb = __builtin_ia32_cvtpi2ps (__ra, __losi);
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);

   return (__m128) __builtin_ia32_movlhps (__ra, __rb);
 }

Reply via email to