gcc/ChangeLog: * config/i386/avx512fp16intrin.h (_mm512_cvtph_pd): New intrinsic. (_mm512_mask_cvtph_pd): Likewise. (_mm512_maskz_cvtph_pd): Likewise. (_mm512_cvt_roundph_pd): Likewise. (_mm512_mask_cvt_roundph_pd): Likewise. (_mm512_maskz_cvt_roundph_pd): Likewise. (_mm512_cvtxph_ps): Likewise. (_mm512_mask_cvtxph_ps): Likewise. (_mm512_maskz_cvtxph_ps): Likewise. (_mm512_cvtx_roundph_ps): Likewise. (_mm512_mask_cvtx_roundph_ps): Likewise. (_mm512_maskz_cvtx_roundph_ps): Likewise. (_mm512_cvtxps_ph): Likewise. (_mm512_mask_cvtxps_ph): Likewise. (_mm512_maskz_cvtxps_ph): Likewise. (_mm512_cvtx_roundps_ph): Likewise. (_mm512_mask_cvtx_roundps_ph): Likewise. (_mm512_maskz_cvtx_roundps_ph): Likewise. (_mm512_cvtpd_ph): Likewise. (_mm512_mask_cvtpd_ph): Likewise. (_mm512_maskz_cvtpd_ph): Likewise. (_mm512_cvt_roundpd_ph): Likewise. (_mm512_mask_cvt_roundpd_ph): Likewise. (_mm512_maskz_cvt_roundpd_ph): Likewise. * config/i386/avx512fp16vlintrin.h (_mm_cvtph_pd): New intrinsic. (_mm_mask_cvtph_pd): Likewise. (_mm_maskz_cvtph_pd): Likewise. (_mm256_cvtph_pd): Likewise. (_mm256_mask_cvtph_pd): Likewise. (_mm256_maskz_cvtph_pd): Likewise. (_mm_cvtxph_ps): Likewise. (_mm_mask_cvtxph_ps): Likewise. (_mm_maskz_cvtxph_ps): Likewise. (_mm256_cvtxph_ps): Likewise. (_mm256_mask_cvtxph_ps): Likewise. (_mm256_maskz_cvtxph_ps): Likewise. (_mm_cvtxps_ph): Likewise. (_mm_mask_cvtxps_ph): Likewise. (_mm_maskz_cvtxps_ph): Likewise. (_mm256_cvtxps_ph): Likewise. (_mm256_mask_cvtxps_ph): Likewise. (_mm256_maskz_cvtxps_ph): Likewise. (_mm_cvtpd_ph): Likewise. (_mm_mask_cvtpd_ph): Likewise. (_mm_maskz_cvtpd_ph): Likewise. (_mm256_cvtpd_ph): Likewise. (_mm256_mask_cvtpd_ph): Likewise. (_mm256_maskz_cvtpd_ph): Likewise. * config/i386/i386-builtin.def: Add corresponding new builtins. * config/i386/i386-builtin-types.def: Add corresponding builtin types. * config/i386/i386-expand.c: Handle new builtin types. * config/i386/sse.md (VF4_128_8_256): New. (VF48H_AVX512VL): Ditto. (ssePHmode): Add HF vector modes. (castmode): Add new convertable modes. (qq2phsuff): Ditto. (ph2pssuffix): New. (avx512fp16_vcvt<castmode>2ph_<mode><mask_name><round_name>): Ditto. (avx512fp16_vcvt<castmode>2ph_<mode>): Ditto. (*avx512fp16_vcvt<castmode>2ph_<mode>): Ditto. (avx512fp16_vcvt<castmode>2ph_<mode>_mask): Ditto. (*avx512fp16_vcvt<castmode>2ph_<mode>_mask): Ditto. (*avx512fp16_vcvt<castmode>2ph_<mode>_mask_1): Ditto. (avx512fp16_vcvtpd2ph_v2df): Ditto. (*avx512fp16_vcvtpd2ph_v2df): Ditto. (avx512fp16_vcvtpd2ph_v2df_mask): Ditto. (*avx512fp16_vcvtpd2ph_v2df_mask): Ditto. (*avx512fp16_vcvtpd2ph_v2df_mask_1): Ditto.
gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add test for new builtins. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c: Add test for new intrinsics. * gcc.target/i386/sse-22.c: Ditto. --- gcc/config/i386/avx512fp16intrin.h | 297 +++++++++++++++++++++++++ gcc/config/i386/avx512fp16vlintrin.h | 200 +++++++++++++++++ gcc/config/i386/i386-builtin-types.def | 12 + gcc/config/i386/i386-builtin.def | 12 + gcc/config/i386/i386-expand.c | 12 + gcc/config/i386/sse.md | 189 +++++++++++++++- gcc/testsuite/gcc.target/i386/avx-1.c | 4 + gcc/testsuite/gcc.target/i386/sse-13.c | 4 + gcc/testsuite/gcc.target/i386/sse-14.c | 12 + gcc/testsuite/gcc.target/i386/sse-22.c | 12 + gcc/testsuite/gcc.target/i386/sse-23.c | 4 + 11 files changed, 755 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index bcd04f14769..5a6a0ba83a9 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -4306,6 +4306,303 @@ _mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R) #endif /* __OPTIMIZE__ */ #endif /* __x86_64__ */ +/* Intrinsics vcvtph2pd. */ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd_v8df_mask_round (__A, + _mm512_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_pd (__m512d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd_v8df_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd_v8df_mask_round (__B, + _mm512_setzero_pd (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundph_pd (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtph2pd_v8df_mask_round (__A, + _mm512_setzero_pd (), + (__mmask8) -1, + __B); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundph_pd (__m512d __A, __mmask8 __B, __m128h __C, int __D) +{ + return __builtin_ia32_vcvtph2pd_v8df_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundph_pd (__mmask8 __A, __m128h __B, int __C) +{ + return __builtin_ia32_vcvtph2pd_v8df_mask_round (__B, + _mm512_setzero_pd (), + __A, + __C); +} + +#else +#define _mm512_cvt_roundph_pd(A, B) \ + (__builtin_ia32_vcvtph2pd_v8df_mask_round ((A), \ + _mm512_setzero_pd (), \ + (__mmask8)-1, \ + (B))) + +#define _mm512_mask_cvt_roundph_pd(A, B, C, D) \ + (__builtin_ia32_vcvtph2pd_v8df_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvt_roundph_pd(A, B, C) \ + (__builtin_ia32_vcvtph2pd_v8df_mask_round ((B), \ + _mm512_setzero_pd (), \ + (A), \ + (C))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtph2psx. */ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtxph_ps (__m256h __A) +{ + return __builtin_ia32_vcvtph2ps_v16sf_mask_round (__A, + _mm512_setzero_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtxph_ps (__m512 __A, __mmask16 __B, __m256h __C) +{ + return __builtin_ia32_vcvtph2ps_v16sf_mask_round (__C, __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtxph_ps (__mmask16 __A, __m256h __B) +{ + return __builtin_ia32_vcvtph2ps_v16sf_mask_round (__B, + _mm512_setzero_ps (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_roundph_ps (__m256h __A, int __B) +{ + return __builtin_ia32_vcvtph2ps_v16sf_mask_round (__A, + _mm512_setzero_ps (), + (__mmask16) -1, + __B); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_roundph_ps (__m512 __A, __mmask16 __B, __m256h __C, int __D) +{ + return __builtin_ia32_vcvtph2ps_v16sf_mask_round (__C, __A, __B, __D); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_roundph_ps (__mmask16 __A, __m256h __B, int __C) +{ + return __builtin_ia32_vcvtph2ps_v16sf_mask_round (__B, + _mm512_setzero_ps (), + __A, + __C); +} + +#else +#define _mm512_cvtx_roundph_ps(A, B) \ + (__builtin_ia32_vcvtph2ps_v16sf_mask_round ((A), \ + _mm512_setzero_ps (), \ + (__mmask16)-1, \ + (B))) + +#define _mm512_mask_cvtx_roundph_ps(A, B, C, D) \ + (__builtin_ia32_vcvtph2ps_v16sf_mask_round ((C), (A), (B), (D))) + +#define _mm512_maskz_cvtx_roundph_ps(A, B, C) \ + (__builtin_ia32_vcvtph2ps_v16sf_mask_round ((B), \ + _mm512_setzero_ps (), \ + (A), \ + (C))) +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtps2ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtxps_ph (__m512 __A) +{ + return __builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtxps_ph (__m256h __A, __mmask16 __B, __m512 __C) +{ + return __builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf) __C, + __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtxps_ph (__mmask16 __A, __m512 __B) +{ + return __builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf) __B, + _mm256_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_roundps_ph (__m512 __A, int __B) +{ + return __builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf) __A, + _mm256_setzero_ph (), + (__mmask16) -1, + __B); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_roundps_ph (__m256h __A, __mmask16 __B, __m512 __C, int __D) +{ + return __builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf) __C, + __A, __B, __D); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_roundps_ph (__mmask16 __A, __m512 __B, int __C) +{ + return __builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf) __B, + _mm256_setzero_ph (), + __A, __C); +} + +#else +#define _mm512_cvtx_roundps_ph(A, B) \ + (__builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf)(A), \ + _mm256_setzero_ph (), \ + (__mmask16)-1, (B))) + +#define _mm512_mask_cvtx_roundps_ph(A, B, C, D) \ + (__builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf)(C), \ + (A), (B), (D))) + +#define _mm512_maskz_cvtx_roundps_ph(A, B, C) \ + (__builtin_ia32_vcvtps2ph_v16sf_mask_round ((__v16sf)(B), \ + _mm256_setzero_ph (), \ + (A), (C))) +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vcvtpd2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtpd_ph (__m512d __A) +{ + return __builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df) __A, + _mm_setzero_ph (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m512d __C) +{ + return __builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df) __C, + __A, __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtpd_ph (__mmask8 __A, __m512d __B) +{ + return __builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df) __B, + _mm_setzero_ph (), + __A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt_roundpd_ph (__m512d __A, int __B) +{ + return __builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df) __A, + _mm_setzero_ph (), + (__mmask8) -1, + __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt_roundpd_ph (__m128h __A, __mmask8 __B, __m512d __C, int __D) +{ + return __builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df) __C, + __A, __B, __D); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C) +{ + return __builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df) __B, + _mm_setzero_ph (), + __A, __C); +} + +#else +#define _mm512_cvt_roundpd_ph(A, B) \ + (__builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df)(A), \ + _mm_setzero_ph (), \ + (__mmask8)-1, (B))) + +#define _mm512_mask_cvt_roundpd_ph(A, B, C, D) \ + (__builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df)(C), \ + (A), (B), (D))) + +#define _mm512_maskz_cvt_roundpd_ph(A, B, C) \ + (__builtin_ia32_vcvtpd2ph_v8df_mask_round ((__v8df)(B), \ + _mm_setzero_ph (), \ + (A), (C))) + +#endif /* __OPTIMIZE__ */ #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index e1ee37edde6..0124b830dd5 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -1952,6 +1952,206 @@ _mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B) __A); } +/* Intrinsics vcvtph2pd. */ +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd_v2df_mask (__A, + _mm_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd_v2df_mask (__C, __A, __B); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd_v2df_mask (__B, _mm_setzero_pd (), __A); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtph_pd (__m128h __A) +{ + return __builtin_ia32_vcvtph2pd_v4df_mask (__A, + _mm256_setzero_pd (), + (__mmask8) -1); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2pd_v4df_mask (__C, __A, __B); +} + +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2pd_v4df_mask (__B, + _mm256_setzero_pd (), + __A); +} + +/* Intrinsics vcvtph2ps. */ +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtxph_ps (__m128h __A) +{ + return __builtin_ia32_vcvtph2ps_v4sf_mask (__A, + _mm_setzero_ps (), + (__mmask8) -1); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2ps_v4sf_mask (__C, __A, __B); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2ps_v4sf_mask (__B, _mm_setzero_ps (), __A); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtxph_ps (__m128h __A) +{ + return __builtin_ia32_vcvtph2ps_v8sf_mask (__A, + _mm256_setzero_ps (), + (__mmask8) -1); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C) +{ + return __builtin_ia32_vcvtph2ps_v8sf_mask (__C, __A, __B); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) +{ + return __builtin_ia32_vcvtph2ps_v8sf_mask (__B, + _mm256_setzero_ps (), + __A); +} + +/* Intrinsics vcvtxps2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtxps_ph (__m128 __A) +{ + return __builtin_ia32_vcvtps2ph_v4sf_mask ((__v4sf) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C) +{ + return __builtin_ia32_vcvtps2ph_v4sf_mask ((__v4sf) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B) +{ + return __builtin_ia32_vcvtps2ph_v4sf_mask ((__v4sf) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtxps_ph (__m256 __A) +{ + return __builtin_ia32_vcvtps2ph_v8sf_mask ((__v8sf) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C) +{ + return __builtin_ia32_vcvtps2ph_v8sf_mask ((__v8sf) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B) +{ + return __builtin_ia32_vcvtps2ph_v8sf_mask ((__v8sf) __B, + _mm_setzero_ph (), + __A); +} + +/* Intrinsics vcvtpd2ph. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpd_ph (__m128d __A) +{ + return __builtin_ia32_vcvtpd2ph_v2df_mask ((__v2df) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C) +{ + return __builtin_ia32_vcvtpd2ph_v2df_mask ((__v2df) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B) +{ + return __builtin_ia32_vcvtpd2ph_v2df_mask ((__v2df) __B, + _mm_setzero_ph (), + __A); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtpd_ph (__m256d __A) +{ + return __builtin_ia32_vcvtpd2ph_v4df_mask ((__v4df) __A, + _mm_setzero_ph (), + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C) +{ + return __builtin_ia32_vcvtpd2ph_v4df_mask ((__v4df) __C, __A, __B); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B) +{ + return __builtin_ia32_vcvtpd2ph_v4df_mask ((__v4df) __B, + _mm_setzero_ph (), + __A); +} + #ifdef __DISABLE_AVX512FP16VL__ #undef __DISABLE_AVX512FP16VL__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 74bda59a65e..4123e66f7cd 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1321,13 +1321,21 @@ DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT) DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI) DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI) +DEF_FUNCTION_TYPE (V2DF, V8HF, V2DF, UQI) +DEF_FUNCTION_TYPE (V4DF, V8HF, V4DF, UQI) DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI) +DEF_FUNCTION_TYPE (V4SF, V8HF, V4SF, UQI) DEF_FUNCTION_TYPE (V8SI, V8HF, V8SI, UQI) +DEF_FUNCTION_TYPE (V8SF, V8HF, V8SF, UQI) DEF_FUNCTION_TYPE (V8HI, V8HF, V8HI, UQI) DEF_FUNCTION_TYPE (V8HF, V4SI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V4SF, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V8SI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8SF, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V2DI, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V4DI, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V2DF, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V4DF, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V8HI, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT) @@ -1336,7 +1344,9 @@ DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI) DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT) DEF_FUNCTION_TYPE (V8DI, V8HF, V8DI, UQI, INT) +DEF_FUNCTION_TYPE (V8DF, V8HF, V8DF, UQI, INT) DEF_FUNCTION_TYPE (V8HF, V8DI, V8HF, UQI, INT) +DEF_FUNCTION_TYPE (V8HF, V8DF, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF) @@ -1344,9 +1354,11 @@ DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI) DEF_FUNCTION_TYPE (V16HF, V16HI, V16HF, UHI) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI) DEF_FUNCTION_TYPE (V16SI, V16HF, V16SI, UHI, INT) +DEF_FUNCTION_TYPE (V16SF, V16HF, V16SF, UHI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI) DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI) DEF_FUNCTION_TYPE (V16HF, V16SI, V16HF, UHI, INT) +DEF_FUNCTION_TYPE (V16HF, V16SF, V16HF, UHI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 4e6d08c2d3f..2992bd0383d 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2867,6 +2867,14 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v4di_mask, "__builtin_ia32_vcvtqq2ph_v4di_mask", IX86_BUILTIN_VCVTQQ2PH_V4DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v2di_mask, "__builtin_ia32_vcvtuqq2ph_v2di_mask", IX86_BUILTIN_VCVTUQQ2PH_V2DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DI_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v4di_mask, "__builtin_ia32_vcvtuqq2ph_v4di_mask", IX86_BUILTIN_VCVTUQQ2PH_V4DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv2df2_mask, "__builtin_ia32_vcvtph2pd_v2df_mask", IX86_BUILTIN_VCVTPH2PD_V2DF_MASK, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv4df2_mask, "__builtin_ia32_vcvtph2pd_v4df_mask", IX86_BUILTIN_VCVTPH2PD_V4DF_MASK, UNKNOWN, (int) V4DF_FTYPE_V8HF_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv4sf2_mask, "__builtin_ia32_vcvtph2ps_v4sf_mask", IX86_BUILTIN_VCVTPH2PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8sf2_mask, "__builtin_ia32_vcvtph2ps_v8sf_mask", IX86_BUILTIN_VCVTPH2PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HF_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v4sf_mask, "__builtin_ia32_vcvtps2ph_v4sf_mask", IX86_BUILTIN_VCVTPS2PH_V4SF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v8sf_mask, "__builtin_ia32_vcvtps2ph_v8sf_mask", IX86_BUILTIN_VCVTPS2PH_V8SF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v2df_mask, "__builtin_ia32_vcvtpd2ph_v2df_mask", IX86_BUILTIN_VCVTPD2PH_V2DF_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v4df_mask, "__builtin_ia32_vcvtpd2ph_v4df_mask", IX86_BUILTIN_VCVTPD2PH_V4DF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DF_V8HF_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3124,6 +3132,10 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT) BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd_v8df_mask_round", IX86_BUILTIN_VCVTPH2PD_V8DF_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2ps_v16sf_mask_round", IX86_BUILTIN_VCVTPH2PS_V16SF_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph_v8df_mask_round", IX86_BUILTIN_VCVTPD2PH_V8DF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2ph_v16sf_mask_round", IX86_BUILTIN_VCVTPS2PH_V16SF_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index b83c6d9a92b..a216f6f2bf3 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -9566,9 +9566,11 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8SF_FTYPE_V8HI_V8SF_UQI: case V4SF_FTYPE_V8HI_V4SF_UQI: case V8SI_FTYPE_V8HF_V8SI_UQI: + case V8SF_FTYPE_V8HF_V8SF_UQI: case V8SI_FTYPE_V8SF_V8SI_UQI: case V4SI_FTYPE_V4SF_V4SI_UQI: case V4SI_FTYPE_V8HF_V4SI_UQI: + case V4SF_FTYPE_V8HF_V4SF_UQI: case V4DI_FTYPE_V8HF_V4DI_UQI: case V4DI_FTYPE_V4SF_V4DI_UQI: case V2DI_FTYPE_V8HF_V2DI_UQI: @@ -9576,12 +9578,18 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8HF_FTYPE_V8HF_V8HF_UQI: case V8HF_FTYPE_V8HI_V8HF_UQI: case V8HF_FTYPE_V8SI_V8HF_UQI: + case V8HF_FTYPE_V8SF_V8HF_UQI: case V8HF_FTYPE_V4SI_V8HF_UQI: + case V8HF_FTYPE_V4SF_V8HF_UQI: case V8HF_FTYPE_V4DI_V8HF_UQI: + case V8HF_FTYPE_V4DF_V8HF_UQI: case V8HF_FTYPE_V2DI_V8HF_UQI: + case V8HF_FTYPE_V2DF_V8HF_UQI: case V4SF_FTYPE_V4DI_V4SF_UQI: case V4SF_FTYPE_V2DI_V4SF_UQI: case V4DF_FTYPE_V4DI_V4DF_UQI: + case V4DF_FTYPE_V8HF_V4DF_UQI: + case V2DF_FTYPE_V8HF_V2DF_UQI: case V2DF_FTYPE_V2DI_V2DF_UQI: case V16QI_FTYPE_V8HI_V16QI_UQI: case V16QI_FTYPE_V16HI_V16QI_UHI: @@ -10527,6 +10535,8 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V8DI_FTYPE_V8DF_V8DI_QI_INT: case V8SF_FTYPE_V8DI_V8SF_QI_INT: case V8DF_FTYPE_V8DI_V8DF_QI_INT: + case V8DF_FTYPE_V8HF_V8DF_UQI_INT: + case V16SF_FTYPE_V16HF_V16SF_UHI_INT: case V32HF_FTYPE_V32HI_V32HF_USI_INT: case V32HF_FTYPE_V32HF_V32HF_USI_INT: case V16SF_FTYPE_V16SF_V16SF_HI_INT: @@ -10540,6 +10550,8 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: case V8HF_FTYPE_V8DI_V8HF_UQI_INT: + case V8HF_FTYPE_V8DF_V8HF_UQI_INT: + case V16HF_FTYPE_V16SF_V16HF_UHI_INT: nargs = 4; break; case V4SF_FTYPE_V4SF_V4SF_INT_INT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c16e0dc46a7..7447d6b75b5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -621,6 +621,9 @@ (define_mode_iterator V48_AVX2 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) +(define_mode_iterator VF4_128_8_256 + [V4DF V4SF]) + (define_mode_iterator VI1_AVX512VLBW [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL") (V16QI "TARGET_AVX512VL")]) @@ -783,6 +786,8 @@ (define_mode_iterator VI48F_256_512 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) (define_mode_iterator VF48_I1248 [V16SI V16SF V8DI V8DF V32HI V64QI]) +(define_mode_iterator VF48H_AVX512VL + [V8DF V16SF (V8SF "TARGET_AVX512VL")]) (define_mode_iterator VI48F [V16SI V16SF V8DI V8DF (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") @@ -957,7 +962,8 @@ (define_mode_attr ssehalfvecmodelower (define_mode_attr ssePHmode [(V32HI "V32HF") (V16HI "V16HF") (V8HI "V8HF") (V16SI "V16HF") (V8SI "V8HF") (V4SI "V8HF") - (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")]) + (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF") + (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")]) ;; Mapping of vector modes to packed single mode of the same size (define_mode_attr ssePSmode @@ -1101,7 +1107,8 @@ (define_mode_attr sserotatemax ;; Mapping of mode to cast intrinsic name (define_mode_attr castmode - [(V8SI "si") (V8SF "ps") (V4DF "pd") + [(V4SF "ps") (V2DF "pd") + (V8SI "si") (V8SF "ps") (V4DF "pd") (V16SI "si") (V16SF "ps") (V8DF "pd")]) ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. @@ -5440,7 +5447,9 @@ (define_int_attr sseintconvertsignprefix (define_mode_attr qq2phsuff [(V32HI "") (V16HI "") (V8HI "") (V16SI "") (V8SI "{y}") (V4SI "{x}") - (V8DI "{z}") (V4DI "{y}") (V2DI "{x}")]) + (V8DI "{z}") (V4DI "{y}") (V2DI "{x}") + (V16SF "") (V8SF "{y}") (V4SF "{x}") + (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")]) (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><mask_name><round_name>" [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v") @@ -5686,6 +5695,180 @@ (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>" (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) +(define_mode_attr ph2pssuffix + [(V16SF "x") (V8SF "x") (V4SF "x") + (V8DF "") (V4DF "") (V2DF "")]) + +(define_insn "avx512fp16_float_extend_ph<mode>2<mask_name><round_saeonly_name>" + [(set (match_operand:VF48H_AVX512VL 0 "register_operand" "=v") + (float_extend:VF48H_AVX512VL + (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] + "TARGET_AVX512FP16" + "vcvtph2<castmode><ph2pssuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_float_extend_ph<mode>2<mask_name>" + [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v") + (float_extend:VF4_128_8_256 + (vec_select:V4HF + (match_operand:V8HF 1 "nonimmediate_operand" "vm") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtph2<castmode><ph2pssuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx512fp16_float_extend_phv2df2<mask_name>" + [(set (match_operand:V2DF 0 "register_operand" "=v") + (float_extend:V2DF + (vec_select:V2HF + (match_operand:V8HF 1 "nonimmediate_operand" "vm") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtph2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "avx512fp16_vcvt<castmode>2ph_<mode><mask_name><round_name>" + [(set (match_operand:<ssePHmode> 0 "register_operand" "=v") + (float_truncate:<ssePHmode> + (match_operand:VF48H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))] + "TARGET_AVX512FP16" + "vcvt<castmode>2ph<ph2pssuffix><round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<castmode>2ph_<mode>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V4HF (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (match_dup 2)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[2] = CONST0_RTX (V4HFmode);") + +(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V4HF (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (match_operand:V4HF 2 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvt<castmode>2ph_<mode>_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (float_truncate:V4HF (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (vec_select:V4HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_dup 4)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[4] = CONST0_RTX (V4HFmode);") + +(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (float_truncate:V4HF (match_operand:VF4_128_8_256 1 "vector_operand" "vm")) + (vec_select:V4HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V4HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask_1" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V4HF + (float_truncate:V4HF (match_operand:VF4_128_8_256 1 + "vector_operand" "vm")) + (match_operand:V4HF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V4HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx512fp16_vcvtpd2ph_v2df" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand" "vm")) + (match_dup 2)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[2] = CONST0_RTX (V6HFmode);") + +(define_insn "*avx512fp16_vcvtpd2ph_v2df" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand" "vm")) + (match_operand:V6HF 2 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtpd2ph{x}\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_expand "avx512fp16_vcvtpd2ph_v2df_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand" "vm")) + (vec_select:V2HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_dup 4)))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "operands[4] = CONST0_RTX (V6HFmode);") + +(define_insn "*avx512fp16_vcvtpd2ph_v2df_mask" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand" "vm")) + (vec_select:V2HF + (match_operand:V8HF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V6HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtpd2ph{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*avx512fp16_vcvtpd2ph_v2df_mask_1" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_concat:V8HF + (vec_merge:V2HF + (float_truncate:V2HF (match_operand:V2DF 1 + "vector_operand" "vm")) + (match_operand:V2HF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V6HF 4 "const0_operand" "C")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" + "vcvtpd2ph{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point conversion operations diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 595a6ac007a..f186f8c40f3 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -749,6 +749,10 @@ #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) #define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtph2pd_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd_v8df_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 0d976fb0de4..0e88174e636 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -766,6 +766,10 @@ #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) #define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtph2pd_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd_v8df_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 403f3af6067..5c3e370d4a7 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -687,6 +687,8 @@ test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8) test_1 (_mm512_cvtt_roundph_epi32, __m512i, __m256h, 8) test_1 (_mm512_cvtt_roundph_epu32, __m512i, __m256h, 8) test_1 (_mm512_cvtt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_pd, __m512d, __m128h, 8) +test_1 (_mm512_cvtx_roundph_ps, __m512, __m256h, 8) test_1 (_mm512_cvtt_roundph_epu64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) @@ -696,6 +698,8 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) +test_1 (_mm512_cvtx_roundps_ph, __m256h, __m512, 8) +test_1 (_mm512_cvt_roundpd_ph, __m128h, __m512d, 8) test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8) @@ -751,6 +755,8 @@ test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvtt_roundph_epi32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvtt_roundph_epu32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvtt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_pd, __m512d, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtx_roundph_ps, __m512, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvtt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8) @@ -758,6 +764,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8) +test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8) test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) @@ -809,6 +817,8 @@ test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvtt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvtt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvtt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_pd, __m512d, __m512d, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtx_roundph_ps, __m512, __m512, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvtt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8) @@ -816,6 +826,8 @@ test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu32_ph, __m256h, __m256h, __mmask16, __m512i, 8) test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8) +test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index b980ac3cddd..5bf94d56ce3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -794,6 +794,8 @@ test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8) test_1 (_mm512_cvtt_roundph_epi32, __m512i, __m256h, 8) test_1 (_mm512_cvtt_roundph_epu32, __m512i, __m256h, 8) test_1 (_mm512_cvtt_roundph_epi64, __m512i, __m128h, 8) +test_1 (_mm512_cvt_roundph_pd, __m512d, __m128h, 8) +test_1 (_mm512_cvtx_roundph_ps, __m512, __m256h, 8) test_1 (_mm512_cvtt_roundph_epu64, __m512i, __m128h, 8) test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8) test_1 (_mm512_cvt_roundepu16_ph, __m512h, __m512i, 8) @@ -801,6 +803,8 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) +test_1 (_mm512_cvtx_roundps_ph, __m256h, __m512, 8) +test_1 (_mm512_cvt_roundpd_ph, __m128h, __m512d, 8) test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8) @@ -855,6 +859,8 @@ test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvtt_roundph_epi32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvtt_roundph_epu32, __m512i, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvtt_roundph_epi64, __m512i, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvt_roundph_pd, __m512d, __mmask8, __m128h, 8) +test_2 (_mm512_maskz_cvtx_roundph_ps, __m512, __mmask16, __m256h, 8) test_2 (_mm512_maskz_cvtt_roundph_epu64, __m512i, __mmask8, __m128h, 8) test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8) @@ -862,6 +868,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm512_maskz_cvtx_roundps_ph, __m256h, __mmask16, __m512, 8) +test_2 (_mm512_maskz_cvt_roundpd_ph, __m128h, __mmask8, __m512d, 8) test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) @@ -912,6 +920,8 @@ test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvtt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvtt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvtt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvt_roundph_pd, __m512d, __m512d, __mmask8, __m128h, 8) +test_3 (_mm512_mask_cvtx_roundph_ps, __m512, __m512, __mmask16, __m256h, 8) test_3 (_mm512_mask_cvtt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8) test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8) @@ -919,6 +929,8 @@ test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu32_ph, __m256h, __m256h, __mmask16, __m512i, 8) test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8) test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8) +test_3 (_mm512_mask_cvtx_roundps_ph, __m256h, __m256h, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvt_roundpd_ph, __m128h, __m128h, __mmask8, __m512d, 8) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 1bd734a9352..2f27d9a1e87 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -767,6 +767,10 @@ #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) #define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtph2pd_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtph2pd_v8df_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtph2ps_v16sf_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, D) __builtin_ia32_vcvtpd2ph_v8df_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, D) __builtin_ia32_vcvtps2ph_v16sf_mask_round(A, B, C, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) -- 2.18.1