On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > > Add missing macro under O0 and adjust macro format for scalf > intrinsics. > Please add the corresponding intrinsic test in sse-14.c. > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}. > > Ok for master and backport to GCC 9/10/11? > > gcc/ChangeLog: > > PR target/105339 > * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): > Add parentheses for parameters and djust format. > (_mm512_mask_scalef_round_pd): Ditto. > (_mm512_maskz_scalef_round_pd): Ditto. > (_mm512_scalef_round_ps): Ditto. > (_mm512_mask_scalef_round_ps): Ditto. > (_mm512_maskz_scalef_round_ps): Ditto. > (_mm_scalef_round_sd): Use _mm_undefined_pd. > (_mm_scalef_round_ss): Use _mm_undefined_ps. > (_mm_mask_scalef_round_sd): New macro. > (_mm_mask_scalef_round_ss): Ditto. > (_mm_maskz_scalef_round_sd): Ditto. > (_mm_maskz_scalef_round_ss): Ditto. > --- > gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++--------- > 1 file changed, 56 insertions(+), 20 deletions(-) > > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h > index 29511fd2831..6dc69ff0234 100644 > --- a/gcc/config/i386/avx512fintrin.h > +++ b/gcc/config/i386/avx512fintrin.h > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, > __m128 __B, const int __R) > (__mmask8) __U, __R); > } > #else > -#define _mm512_scalef_round_pd(A, B, C) \ > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, > (__v8df)_mm512_undefined_pd(), -1, C) > - > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) > - > -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, > (__v8df)_mm512_setzero_pd(), U, C) > +#define _mm512_scalef_round_pd(A, B, C) > \ > + ((__m512d) \ > + __builtin_ia32_scalefpd512_mask((A), (B), \ > + (__v8df) _mm512_undefined_pd(), \ > + -1, (C))) > + > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) > + > +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > + ((__m512d) \ > + __builtin_ia32_scalefpd512_mask((A), (B), \ > + (__v8df) _mm512_setzero_pd(), \ > + (U), (C))) > + > +#define _mm512_scalef_round_ps(A, B, C) > \ > + ((__m512) \ > + __builtin_ia32_scalefps512_mask((A), (B), \ > + (__v16sf) _mm512_undefined_ps(), \ > + -1, (C))) > + > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) > + > +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ > + ((__m512) \ > + __builtin_ia32_scalefps512_mask((A), (B), \ > + (__v16sf) _mm512_setzero_ps(), \ > + (U), (C))) > + > +#define _mm_scalef_round_sd(A, B, C) \ > + ((__m128d) \ > + __builtin_ia32_scalefsd_mask_round ((A), (B), \ > + (__v2df) _mm_undefined_pd (), \ > + -1, (C))) > > -#define _mm512_scalef_round_ps(A, B, C) \ > - (__m512)__builtin_ia32_scalefps512_mask(A, B, > (__v16sf)_mm512_undefined_ps(), -1, C) > +#define _mm_scalef_round_ss(A, B, C) \ > + ((__m128) \ > + __builtin_ia32_scalefss_mask_round ((A), (B), \ > + (__v4sf) _mm_undefined_ps (), \ > + -1, (C))) > > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) > +#define _mm_mask_scalef_round_sd(W, U, A, B, C) > \ > + ((__m128d) \ > + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) > > -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ > - (__m512)__builtin_ia32_scalefps512_mask(A, B, > (__v16sf)_mm512_setzero_ps(), U, C) > +#define _mm_mask_scalef_round_ss(W, U, A, B, C) > \ > + ((__m128) \ > + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C))) > > -#define _mm_scalef_round_sd(A, B, C) \ > - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \ > - (__v2df)_mm_setzero_pd (), -1, C) > +#define _mm_maskz_scalef_round_sd(U, A, B, C) \ > + ((__m128d) \ > + __builtin_ia32_scalefsd_mask_round ((A), (B), \ > + (__v2df) _mm_setzero_pd (), \ > + (U), (C))) > > -#define _mm_scalef_round_ss(A, B, C) \ > - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \ > - (__v4sf)_mm_setzero_ps (), -1, C) > +#define _mm_maskz_scalef_round_ss(U, A, B, C) \ > + ((__m128) \ > + __builtin_ia32_scalefss_mask_round ((A), (B), \ > + (__v4sf) _mm_setzero_ps (), \ > + (W), (U), (C))) > #endif > > #define _mm_mask_scalef_sd(W, U, A, B) \ > -- > 2.18.1 >
-- BR, Hongtao