On Sun, 11 Oct 2015 20:59:41 -0700 Matt Turner <[email protected]> wrote:
> We had lots of hacks to handle the inability to include xmmintrin.h > without compiling with -msse (lest SSE instructions be used in > pixman-mmx.c). Some recent version of gcc relaxed this restriction. > > Change configure.ac to test that xmmintrin.h can be included and that we > can use some intrinsics from it, and remove the work-around code from > pixman-mmx.c. > > Evidently allows gcc 4.9.3 to optimize better as well: > > text data bss dec hex filename > 657078 30848 680 688606 a81de libpixman-1.so.0.33.3 before > 656710 30848 680 688238 a806e libpixman-1.so.0.33.3 after > > Signed-off-by: Matt Turner <[email protected]> > --- > Looks like _MM_SHUFFLE isn't defined by ARM's mmintrin.h. > > configure.ac | 15 ++++--------- > pixman/pixman-mmx.c | 64 > ++++------------------------------------------------- > 2 files changed, 8 insertions(+), 71 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 424bfd3..b04cc69 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -347,21 +347,14 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ > #error "Need GCC >= 3.4 for MMX intrinsics" > #endif > #include <mmintrin.h> > +#include <xmmintrin.h> > int main () { > __m64 v = _mm_cvtsi32_si64 (1); > __m64 w; > > - /* Some versions of clang will choke on K */ > - asm ("pshufw %2, %1, %0\n\t" > - : "=y" (w) > - : "y" (v), "K" (5) > - ); > - > - /* Some versions of clang will choke on this */ > - asm ("pmulhuw %1, %0\n\t" > - : "+y" (w) > - : "y" (v) > - ); > + /* Test some intrinsics from xmmintrin.h */ > + w = _mm_shuffle_pi16(v, 5); > + w = _mm_mulhi_pu16(w, w); > > return _mm_cvtsi64_si32 (v); > }]])], have_mmx_intrinsics=yes) > diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c > index 05c48a4..88c3a39 100644 > --- a/pixman/pixman-mmx.c > +++ b/pixman/pixman-mmx.c > @@ -40,6 +40,9 @@ > #else > #include <mmintrin.h> > #endif > +#ifdef USE_X86_MMX > +#include <xmmintrin.h> > +#endif > #include "pixman-private.h" > #include "pixman-combine32.h" > #include "pixman-inlines.h" > @@ -59,66 +62,7 @@ _mm_empty (void) > } > #endif > > -#ifdef USE_X86_MMX > -# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64)) > -# include <xmmintrin.h> > -# else > -/* We have to compile with -msse to use xmmintrin.h, but that causes SSE > - * instructions to be generated that we don't want. Just duplicate the > - * functions we want to use. */ > -extern __inline int __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_movemask_pi8 (__m64 __A) > -{ > - int ret; > - > - asm ("pmovmskb %1, %0\n\t" > - : "=r" (ret) > - : "y" (__A) > - ); > - > - return ret; > -} > - > -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_mulhi_pu16 (__m64 __A, __m64 __B) > -{ > - asm ("pmulhuw %1, %0\n\t" > - : "+y" (__A) > - : "y" (__B) > - ); > - return __A; > -} > - > -# ifdef __OPTIMIZE__ > -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, > __artificial__)) > -_mm_shuffle_pi16 (__m64 __A, int8_t const __N) > -{ > - __m64 ret; > - > - asm ("pshufw %2, %1, %0\n\t" > - : "=y" (ret) > - : "y" (__A), "K" (__N) > - ); > - > - return ret; > -} > -# else > -# define _mm_shuffle_pi16(A, N) \ > - ({ > \ > - __m64 ret; \ > - \ > - asm ("pshufw %2, %1, %0\n\t" \ > - : "=y" (ret) \ > - : "y" (A), "K" ((const int8_t)N) \ > - ); \ > - \ > - ret; \ > - }) > -# endif > -# endif > -#endif > - > -#ifndef _MSC_VER > +#ifndef _MM_SHUFFLE > #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ > (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) > #endif Thanks, Reviewed-by: Siarhei Siamashka <[email protected]> If there are people with uncommon systems and compilers (or compiler versions) here in the mailing list, then it may be a good idea to give this patch a try. The 32-bit build is particularly interesting. -- Best regards, Siarhei Siamashka _______________________________________________ Pixman mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/pixman
