> Please add the corresponding intrinsic test in sse-14.c Sorry for forgetting this part. Updated patch. Thanks.
Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2022年4月22日周五 16:49写道: > > On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Hi, > > > > Add missing macro under O0 and adjust macro format for scalf > > intrinsics. > > > Please add the corresponding intrinsic test in sse-14.c. > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}. > > > > Ok for master and backport to GCC 9/10/11? > > > > gcc/ChangeLog: > > > > PR target/105339 > > * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): > > Add parentheses for parameters and djust format. > > (_mm512_mask_scalef_round_pd): Ditto. > > (_mm512_maskz_scalef_round_pd): Ditto. > > (_mm512_scalef_round_ps): Ditto. > > (_mm512_mask_scalef_round_ps): Ditto. > > (_mm512_maskz_scalef_round_ps): Ditto. > > (_mm_scalef_round_sd): Use _mm_undefined_pd. > > (_mm_scalef_round_ss): Use _mm_undefined_ps. > > (_mm_mask_scalef_round_sd): New macro. > > (_mm_mask_scalef_round_ss): Ditto. > > (_mm_maskz_scalef_round_sd): Ditto. > > (_mm_maskz_scalef_round_ss): Ditto. > > --- > > gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++--------- > > 1 file changed, 56 insertions(+), 20 deletions(-) > > > > diff --git a/gcc/config/i386/avx512fintrin.h > > b/gcc/config/i386/avx512fintrin.h > > index 29511fd2831..6dc69ff0234 100644 > > --- a/gcc/config/i386/avx512fintrin.h > > +++ b/gcc/config/i386/avx512fintrin.h > > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 > > __A, __m128 __B, const int __R) > > (__mmask8) __U, __R); > > } > > #else > > -#define _mm512_scalef_round_pd(A, B, C) \ > > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, > > (__v8df)_mm512_undefined_pd(), -1, C) > > - > > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) > > - > > -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > > - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, > > (__v8df)_mm512_setzero_pd(), U, C) > > +#define _mm512_scalef_round_pd(A, B, C) > > \ > > + ((__m512d) \ > > + __builtin_ia32_scalefpd512_mask((A), (B), \ > > + (__v8df) _mm512_undefined_pd(), \ > > + -1, (C))) > > + > > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ > > + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) > > + > > +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ > > + ((__m512d) \ > > + __builtin_ia32_scalefpd512_mask((A), (B), \ > > + (__v8df) _mm512_setzero_pd(), \ > > + (U), (C))) > > + > > +#define _mm512_scalef_round_ps(A, B, C) > > \ > > + ((__m512) \ > > + __builtin_ia32_scalefps512_mask((A), (B), \ > > + (__v16sf) _mm512_undefined_ps(), \ > > + -1, (C))) > > + > > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > > + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) > > + > > +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ > > + ((__m512) \ > > + __builtin_ia32_scalefps512_mask((A), (B), \ > > + (__v16sf) _mm512_setzero_ps(), \ > > + (U), (C))) > > + > > +#define _mm_scalef_round_sd(A, B, C) \ > > + ((__m128d) \ > > + __builtin_ia32_scalefsd_mask_round ((A), (B), \ > > + (__v2df) _mm_undefined_pd (), \ > > + -1, (C))) > > > > -#define _mm512_scalef_round_ps(A, B, C) \ > > - (__m512)__builtin_ia32_scalefps512_mask(A, B, > > (__v16sf)_mm512_undefined_ps(), -1, C) > > +#define _mm_scalef_round_ss(A, B, C) \ > > + ((__m128) \ > > + __builtin_ia32_scalefss_mask_round ((A), (B), \ > > + (__v4sf) _mm_undefined_ps (), \ > > + -1, (C))) > > > > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ > > - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) > > +#define _mm_mask_scalef_round_sd(W, U, A, B, C) > > \ > > + ((__m128d) \ > > + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) > > > > -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ > > - (__m512)__builtin_ia32_scalefps512_mask(A, B, > > (__v16sf)_mm512_setzero_ps(), U, C) > > +#define _mm_mask_scalef_round_ss(W, U, A, B, C) > > \ > > + ((__m128) \ > > + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C))) > > > > -#define _mm_scalef_round_sd(A, B, C) \ > > - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \ > > - (__v2df)_mm_setzero_pd (), -1, C) > > +#define _mm_maskz_scalef_round_sd(U, A, B, C) \ > > + ((__m128d) \ > > + __builtin_ia32_scalefsd_mask_round ((A), (B), \ > > + (__v2df) _mm_setzero_pd (), \ > > + (U), (C))) > > > > -#define _mm_scalef_round_ss(A, B, C) \ > > - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \ > > - (__v4sf)_mm_setzero_ps (), -1, C) > > +#define _mm_maskz_scalef_round_ss(U, A, B, C) \ > > + ((__m128) \ > > + __builtin_ia32_scalefss_mask_round ((A), (B), \ > > + (__v4sf) _mm_setzero_ps (), \ > > + (W), (U), (C))) > > #endif > > > > #define _mm_mask_scalef_sd(W, U, A, B) \ > > -- > > 2.18.1 > > > > > -- > BR, > Hongtao
From 93be6b95b3237e1bff61001031ed6ad733c5de3f Mon Sep 17 00:00:00 2001 From: Hongyu Wang <hongyu.w...@intel.com> Date: Fri, 22 Apr 2022 14:42:30 +0800 Subject: [PATCH] AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] Add missing macro under O0 and adjust macro format for scalf intrinsics. gcc/ChangeLog: PR target/105339 * config/i386/avx512fintrin.h (_mm512_scalef_round_pd): Add parentheses for parameters and djust format. (_mm512_mask_scalef_round_pd): Ditto. (_mm512_maskz_scalef_round_pd): Ditto. (_mm512_scalef_round_ps): Ditto. (_mm512_mask_scalef_round_ps): Ditto. (_mm512_maskz_scalef_round_ps): Ditto. (_mm_scalef_round_sd): Use _mm_undefined_pd. (_mm_scalef_round_ss): Use _mm_undefined_ps. (_mm_mask_scalef_round_sd): New macro. (_mm_mask_scalef_round_ss): Ditto. (_mm_maskz_scalef_round_sd): Ditto. (_mm_maskz_scalef_round_ss): Ditto. gcc/testsuite/ChangeLog: PR target/105339 * gcc.target/i386/sse-14.c: Add tests for new macro. --- gcc/config/i386/avx512fintrin.h | 76 +++++++++++++++++++------- gcc/testsuite/gcc.target/i386/sse-14.c | 4 ++ 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 29511fd2831..77d6249c2bc 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) (__mmask8) __U, __R); } #else -#define _mm512_scalef_round_pd(A, B, C) \ - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) - -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) - -#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ - (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) +#define _mm512_scalef_round_pd(A, B, C) \ + ((__m512d) \ + __builtin_ia32_scalefpd512_mask((A), (B), \ + (__v8df) _mm512_undefined_pd(), \ + -1, (C))) + +#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ + ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C))) + +#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ + ((__m512d) \ + __builtin_ia32_scalefpd512_mask((A), (B), \ + (__v8df) _mm512_setzero_pd(), \ + (U), (C))) + +#define _mm512_scalef_round_ps(A, B, C) \ + ((__m512) \ + __builtin_ia32_scalefps512_mask((A), (B), \ + (__v16sf) _mm512_undefined_ps(), \ + -1, (C))) + +#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ + ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C))) + +#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ + ((__m512) \ + __builtin_ia32_scalefps512_mask((A), (B), \ + (__v16sf) _mm512_setzero_ps(), \ + (U), (C))) + +#define _mm_scalef_round_sd(A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), \ + (__v2df) _mm_undefined_pd (), \ + -1, (C))) -#define _mm512_scalef_round_ps(A, B, C) \ - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) +#define _mm_scalef_round_ss(A, B, C) \ + ((__m128) \ + __builtin_ia32_scalefss_mask_round ((A), (B), \ + (__v4sf) _mm_undefined_ps (), \ + -1, (C))) -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ - (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) +#define _mm_mask_scalef_round_sd(W, U, A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C))) -#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ - (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) +#define _mm_mask_scalef_round_ss(W, U, A, B, C) \ + ((__m128) \ + __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C))) -#define _mm_scalef_round_sd(A, B, C) \ - (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \ - (__v2df)_mm_setzero_pd (), -1, C) +#define _mm_maskz_scalef_round_sd(U, A, B, C) \ + ((__m128d) \ + __builtin_ia32_scalefsd_mask_round ((A), (B), \ + (__v2df) _mm_setzero_pd (), \ + (U), (C))) -#define _mm_scalef_round_ss(A, B, C) \ - (__m128)__builtin_ia32_scalefss_mask_round (A, B, \ - (__v4sf)_mm_setzero_ps (), -1, C) +#define _mm_maskz_scalef_round_ss(U, A, B, C) \ + ((__m128) \ + __builtin_ia32_scalefss_mask_round ((A), (B), \ + (__v4sf) _mm_setzero_ps (), \ + (U), (C))) #endif #define _mm_mask_scalef_sd(W, U, A, B) \ diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 956a9d16f84..f41493b93f3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -430,7 +430,9 @@ test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9) test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 9) +test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 9) test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 9) +test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 9) test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1) test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1) test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1) @@ -545,7 +547,9 @@ test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9) +test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9) test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9) +test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1) test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1) test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1) -- 2.18.1