On Mon, Nov 08, 2021 at 11:42:56AM -0600, Paul A. Clarke via Gcc-patches wrote: > Gentle ping...
Gentle re-ping. > On Thu, Oct 21, 2021 at 12:22:12PM -0500, Paul A. Clarke via Gcc-patches > wrote: > > Power10 ISA added `vextract*` instructions which are realized in the > > `vec_extractm` instrinsic. > > > > Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and > > `_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`. > > > > 2021-10-21 Paul A. Clarke <p...@us.ibm.com> > > > > gcc > > * config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm > > when _ARCH_PWR10. > > * config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise. > > (_mm_movemask_epi8): Likewise. > > --- > > Tested on Power10 powerpc64le-linux (compiled with and without > > `-mcpu=power10`). > > > > OK for trunk? > > > > gcc/config/rs6000/emmintrin.h | 8 ++++++++ > > gcc/config/rs6000/xmmintrin.h | 4 ++++ > > 2 files changed, 12 insertions(+) > > > > diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h > > index 32ad72b4cc35..ab16c13c379e 100644 > > --- a/gcc/config/rs6000/emmintrin.h > > +++ b/gcc/config/rs6000/emmintrin.h > > @@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B) > > extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > _mm_movemask_pd (__m128d __A) > > { > > +#ifdef _ARCH_PWR10 > > + return vec_extractm ((__v2du) __A); > > +#else > > __vector unsigned long long result; > > static const __vector unsigned int perm_mask = > > { > > @@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d __A) > > #else > > return result[0]; > > #endif > > +#endif /* !_ARCH_PWR10 */ > > } > > #endif /* _ARCH_PWR8 */ > > > > @@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B) > > extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > _mm_movemask_epi8 (__m128i __A) > > { > > +#ifdef _ARCH_PWR10 > > + return vec_extractm ((__v16qu) __A); > > +#else > > __vector unsigned long long result; > > static const __vector unsigned char perm_mask = > > { > > @@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A) > > #else > > return result[0]; > > #endif > > +#endif /* !_ARCH_PWR10 */ > > } > > #endif /* _ARCH_PWR8 */ > > > > diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h > > index ae1a33e8d95b..4c093fd1d5ae 100644 > > --- a/gcc/config/rs6000/xmmintrin.h > > +++ b/gcc/config/rs6000/xmmintrin.h > > @@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A) > > extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > _mm_movemask_ps (__m128 __A) > > { > > +#ifdef _ARCH_PWR10 > > + return vec_extractm ((vector unsigned int) __A); > > +#else > > __vector unsigned long long result; > > static const __vector unsigned int perm_mask = > > { > > @@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128 __A) > > #else > > return result[0]; > > #endif > > +#endif /* !_ARCH_PWR10 */ > > } > > #endif /* _ARCH_PWR8 */ > > > > -- > > 2.27.0 > >