From: "Hu, Lin1" <lin1...@intel.com> gcc/ChangeLog:
* config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/sse.md (avx10_2_vcvttpd2<sat_cvt_ds_sign_prefix>dqs<mode><mask_name><round_saeonly_name>): New. (avx10_2_vcvttpd2<sat_cvt_ds_sign_prefix>qqs<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_vcvttps2<sat_cvt_ds_sign_prefix>dqs<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_vcvttps2<sat_cvt_ds_sign_prefix>qqs<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_vcvttsd2<sat_cvt_ds_sign_prefix>sis<mode><round_saeonly_name>): Ditto. (avx10_2_vcvttss2<sat_cvt_ds_sign_prefix>sis<mode><round_saeonly_name>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add macros. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx10_2-satcvt-1.c: New test. * gcc.target/i386/avx10_2-satcvt-512-1.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttpd2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttpd2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttpd2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttpd2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttps2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttps2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttps2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-512-vcvttps2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttpd2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttpd2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttpd2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttpd2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttps2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttps2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttps2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttps2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttsd2sis-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttsd2usis-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttss2sis-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-vcvttss2usis-2.c: Ditto. --- gcc/config/i386/avx10_2-512satcvtintrin.h | 456 +++++++ gcc/config/i386/avx10_2satcvtintrin.h | 1055 ++++++++++++++++- gcc/config/i386/i386-builtin.def | 33 + gcc/config/i386/sse.md | 83 +- gcc/testsuite/gcc.target/i386/avx-1.c | 26 + .../gcc.target/i386/avx10_2-512-satcvt-1.c | 59 + .../i386/avx10_2-512-vcvttpd2dqs-2.c | 72 ++ .../i386/avx10_2-512-vcvttpd2qqs-2.c | 72 ++ .../i386/avx10_2-512-vcvttpd2udqs-2.c | 72 ++ .../i386/avx10_2-512-vcvttpd2uqqs-2.c | 72 ++ .../i386/avx10_2-512-vcvttps2dqs-2.c | 72 ++ .../i386/avx10_2-512-vcvttps2qqs-2.c | 73 ++ .../i386/avx10_2-512-vcvttps2udqs-2.c | 72 ++ .../i386/avx10_2-512-vcvttps2uqqs-2.c | 72 ++ .../gcc.target/i386/avx10_2-satcvt-1.c | 138 +++ .../gcc.target/i386/avx10_2-vcvttpd2dqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttpd2qqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttpd2udqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttps2dqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttps2qqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttps2udqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttps2uqqs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttsd2sis-2.c | 47 + .../gcc.target/i386/avx10_2-vcvttsd2usis-2.c | 47 + .../gcc.target/i386/avx10_2-vcvttss2sis-2.c | 47 + .../gcc.target/i386/avx10_2-vcvttss2usis-2.c | 46 + gcc/testsuite/gcc.target/i386/sse-13.c | 26 + gcc/testsuite/gcc.target/i386/sse-14.c | 58 + gcc/testsuite/gcc.target/i386/sse-22.c | 58 + gcc/testsuite/gcc.target/i386/sse-23.c | 26 + 31 files changed, 2870 insertions(+), 40 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h index 4286458c413..d625a644948 100644 --- a/gcc/config/i386/avx10_2-512satcvtintrin.h +++ b/gcc/config/i386/avx10_2-512satcvtintrin.h @@ -438,6 +438,286 @@ _mm512_maskz_ipcvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) (__mmask16) __U, __R); } + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} #else #define _mm512_ipcvt_roundph_epi16(A, R) \ ((__m512i) \ @@ -614,6 +894,182 @@ _mm512_maskz_ipcvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) (_mm512_setzero_si512 ()), \ (__mmask16) (U), \ (R))) + +#define _mm512_cvtts_roundpd_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) #endif #ifdef __DISABLE_AVX10_2_512__ diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h index 4fcf78955df..d0e3e3790c4 100644 --- a/gcc/config/i386/avx10_2satcvtintrin.h +++ b/gcc/config/i386/avx10_2satcvtintrin.h @@ -510,6 +510,238 @@ _mm_maskz_ipcvttps_epu32 (__mmask8 __U, __m128 __A) (__mmask8) __U); } +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttspd_epi32 (__m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttspd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttspd_epi32 (__mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttspd_epi64 (__m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A, + (__v2di) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttspd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A, + (__v2di) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttspd_epi64 (__mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttspd_epu32 (__m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttspd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttspd_epu32 (__mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttspd_epu64 (__m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A, + (__v2di) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttspd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A, + (__v2di) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttspd_epu64 (__mmask8 __U, __m128d __A) +{ + return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsps_epi32 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttsps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttsps_epi32 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsps_epi64 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A, + (__v2di) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttsps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A, + (__v2di) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttsps_epi64 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsps_epu32 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttsps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttsps_epu32 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsps_epu64 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A, + (__v2di) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cvttsps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A, + (__v2di) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_cvttsps_epu64 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + #ifdef __OPTIMIZE__ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -790,51 +1022,363 @@ _mm256_maskz_ipcvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) (__mmask8) __U, __R); } -#else -#define _mm256_ipcvt_roundph_epi16(A, R) \ - ((__m256i) \ - __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ - (__v16hi) \ - (_mm256_undefined_si256 ()), \ - (__mmask16) (-1), \ - (R))) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundpd_epi32 (__m256d __A, const int __R) +{ + return + (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1, + __R); +} -#define _mm256_mask_ipcvt_roundph_epi16(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ - (__v16hi) (W), \ - (__mmask16) (U), \ - (R))) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A, + const int __R) +{ + return (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A, + (__v4si) __W, + (__mmask8) __U, + __R); +} -#define _mm256_maskz_ipcvt_roundph_epi16(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ - (__v16hi) \ - (_mm256_setzero_si256 ()), \ - (__mmask16) (U), \ - (R))) +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R) +{ + return + (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U, + __R); +} -#define _mm256_ipcvt_roundph_epu16(A, R) \ - ((__m256i) \ - __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ - (__v16hi) \ - (_mm256_undefined_si256 ()), \ - (__mmask16) (-1), \ - (R))) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundpd_epi64 (__m256d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A, + (__v4di) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} -#define _mm256_mask_ipcvt_roundph_epu16(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ - (__v16hi) (W), \ - (__mmask16) (U), \ - (R))) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A, + (__v4di) __W, + (__mmask8) __U, + __R); +} -#define _mm256_maskz_ipcvt_roundph_epu16(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ - (__v16hi) \ - (_mm256_setzero_si256 ()), \ - (__mmask16) (U), \ - (R))) +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundpd_epu32 (__m256d __A, const int __R) +{ + return + (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1, + __R); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A, + const int __R) +{ + return (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A, + (__v4si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R) +{ + return + (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundpd_epu64 (__m256d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A, + (__v4di) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A, + (__v4di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundps_epi32 (__m256 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundps_epi64 (__m128 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A, + (__v4di) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A, + (__v4di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundps_epu32 (__m256 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_cvtts_roundps_epu64 (__m128 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A, + (__v4di) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_cvtts_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A, + (__v4di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundsd_epi32 (__m128d __A, const int __R) +{ + return (int) __builtin_ia32_cvttsd2sis32_round ((__v2df) __A, + __R); +} + +extern __inline unsigned int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundsd_epu32 (__m128d __A, const int __R) +{ + return (unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) __A, + __R); +} + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundss_epi32 (__m128 __A, const int __R) +{ + return (int) __builtin_ia32_cvttss2sis32_round ((__v4sf) __A, + __R); +} + +extern __inline unsigned int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundss_epu32 (__m128 __A, const int __R) +{ + return (unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) __A, + __R); +} +#else + +#define _mm256_ipcvt_roundph_epi16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvt_roundph_epi16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvt_roundph_epi16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvt_roundph_epu16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvt_roundph_epu16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvt_roundph_epu16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) #define _mm256_ipcvt_roundps_epi32(A, R) \ ((__m256i) \ @@ -1012,7 +1556,440 @@ _mm256_maskz_ipcvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) + +#define _mm256_cvtts_roundpd_epi32(A, R) \ + ((__m128i) \ + __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \ + (__v4si) \ + (_mm_undefined_si128 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \ + (__v4si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \ + (__v4si) \ + (_mm_setzero_si128 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundpd_epi64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundpd_epu32(A, R) \ + ((__m128i) \ + __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \ + (__v4si) \ + (_mm_undefined_si128 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \ + (__v4si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m128i) \ + __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \ + (__v4si) (_mm_setzero_si128 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundpd_epu64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epi64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epu64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm_cvtts_roundsd_epi32(A, R) \ + ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \ + (R))) + +#define _mm_cvtts_roundsd_epu32(A, R) \ + ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \ + (R))) + +#define _mm_cvtts_roundss_epi32(A, R) \ + ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \ + (R))) + +#define _mm_cvtts_roundss_epu32(A, R) \ + ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \ + (R))) +#define _mm256_cvtts_roundpd_epi32(A, R) \ + ((__m128i) \ + __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \ + (__v4si) \ + (_mm_undefined_si128 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \ + (__v4si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \ + (__v4si) \ + (_mm_setzero_si128 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundpd_epi64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundpd_epu32(A, R) \ + ((__m128i) \ + __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \ + (__v4si) \ + (_mm_undefined_si128 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \ + (__v4si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m128i) \ + __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \ + (__v4si) (_mm_setzero_si128 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundpd_epu64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epi64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_cvtts_roundps_epu64(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \ + (__v4di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \ + (__v4di) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm_cvtts_roundsd_epi32(A, R) \ + ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \ + (R))) + +#define _mm_cvtts_roundsd_epu32(A, R) \ + ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \ + (R))) + +#define _mm_cvtts_roundss_epi32(A, R) \ + ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \ + (R))) + +#define _mm_cvtts_roundss_epu32(A, R) \ + ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \ + (R))) +#endif + +#ifdef __x86_64__ +#ifdef __OPTIMIZE__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundsd_epi64 (__m128d __A, const int __R) +{ + return (long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) __A, + __R); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundsd_epu64 (__m128d __A, const int __R) +{ + return (unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) __A, + __R); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundss_epi64 (__m128 __A, const int __R) +{ + return (long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) __A, + __R); +} + + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtts_roundss_epu64 (__m128 __A, const int __R) +{ + return (unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) __A, + __R); +} +#else + +#define _mm_cvtts_roundsd_epi64(A, R) \ + ((long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) (A), \ + (R))) + +#define _mm_cvtts_roundsd_epu64(A, R) \ + ((unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) (A), \ + (R))) + +#define _mm_cvtts_roundss_epi64(A, R) \ + ((long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) (A), \ + (R))) + +#define _mm_cvtts_roundss_epu64(A, R) \ + ((unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) (A), \ + (R))) #endif +#endif /* __x86_64__ */ #ifdef __DISABLE_AVX10_2_256__ #undef __DISABLE_AVX10_2_256__ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index b85eba5b330..d39274bc323 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3290,6 +3290,14 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2ibsv8hf_mask, "_ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv8hf_mask, "__builtin_ia32_cvttph2iubs128_mask", IX86_BUILTIN_CVTTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv4sf_mask, "__builtin_ia32_cvttps2ibs128_mask", IX86_BUILTIN_CVTTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv4sf_mask, "__builtin_ia32_cvttps2iubs128_mask", IX86_BUILTIN_CVTTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv2df_mask, "__builtin_ia32_cvttpd2dqs128_mask", IX86_BUILTIN_VCVTTPD2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv2df_mask, "__builtin_ia32_cvttpd2qqs128_mask", IX86_BUILTIN_VCVTTPD2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv2df_mask, "__builtin_ia32_cvttpd2udqs128_mask", IX86_BUILTIN_VCVTTPD2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv2df_mask, "__builtin_ia32_cvttpd2uqqs128_mask", IX86_BUILTIN_VCVTTPD2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv4sf_mask, "__builtin_ia32_cvttps2dqs128_mask", IX86_BUILTIN_VCVTTPS2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3767,6 +3775,31 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_ro BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask_round, "__builtin_ia32_cvttps2iubs256_mask_round", IX86_BUILTIN_CVTTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv4df_mask_round, "__builtin_ia32_cvttpd2dqs256_mask_round", IX86_BUILTIN_VCVTTPD2DQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask_round, "__builtin_ia32_cvttpd2dqs512_mask_round", IX86_BUILTIN_VCVTTPD2DQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv4df_mask_round, "__builtin_ia32_cvttpd2qqs256_mask_round", IX86_BUILTIN_VCVTTPD2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2qqsv8df_mask_round, "__builtin_ia32_cvttpd2qqs512_mask_round", IX86_BUILTIN_VCVTTPD2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv4df_mask_round, "__builtin_ia32_cvttpd2udqs256_mask_round", IX86_BUILTIN_VCVTTPD2UDQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2udqsv8df_mask_round, "__builtin_ia32_cvttpd2udqs512_mask_round", IX86_BUILTIN_VCVTTPD2UDQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv4df_mask_round, "__builtin_ia32_cvttpd2uqqs256_mask_round", IX86_BUILTIN_VCVTTPD2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2uqqsv8df_mask_round, "__builtin_ia32_cvttpd2uqqs512_mask_round", IX86_BUILTIN_VCVTTPD2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv8sf_mask_round, "__builtin_ia32_cvttps2dqs256_mask_round", IX86_BUILTIN_VCVTTPS2DQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2dqsv16sf_mask_round, "__builtin_ia32_cvttps2dqs512_mask_round", IX86_BUILTIN_VCVTTPS2DQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv4di_mask_round, "__builtin_ia32_cvttps2qqs256_mask_round", IX86_BUILTIN_VCVTTPS2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2qqsv8di_mask_round, "__builtin_ia32_cvttps2qqs512_mask_round", IX86_BUILTIN_VCVTTPS2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv8sf_mask_round, "__builtin_ia32_cvttps2udqs256_mask_round", IX86_BUILTIN_VCVTTPS2UDQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2udqsv16sf_mask_round, "__builtin_ia32_cvttps2udqs512_mask_round", IX86_BUILTIN_VCVTTPS2UDQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv4di_mask_round, "__builtin_ia32_cvttps2uqqs256_mask_round", IX86_BUILTIN_VCVTTPS2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2uqqsv8di_mask_round, "__builtin_ia32_cvttps2uqqs512_mask_round", IX86_BUILTIN_VCVTTPS2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sissi_round, "__builtin_ia32_cvttsd2sis32_round", IX86_BUILTIN_VCVTTSD2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sisdi_round, "__builtin_ia32_cvttsd2sis64_round", IX86_BUILTIN_VCVTTSD2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usissi_round, "__builtin_ia32_cvttsd2usis32_round", IX86_BUILTIN_VCVTTSD2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usisdi_round, "__builtin_ia32_cvttsd2usis64_round", IX86_BUILTIN_VCVTTSD2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sissi_round, "__builtin_ia32_cvttss2sis32_round", IX86_BUILTIN_VCVTTSS2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT) + BDESC_END (ROUND_ARGS, MULTI_ARG) /* FMA4 and XOP. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0de94187e69..7c40079047a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -247,6 +247,8 @@ UNSPEC_VCVTTPH2IUBS UNSPEC_VCVTTPS2IBS UNSPEC_VCVTTPS2IUBS + UNSPEC_SFIX_SATURATION + UNSPEC_UFIX_SATURATION ]) (define_c_enum "unspecv" [ @@ -375,6 +377,10 @@ (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL") (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")]) +(define_mode_iterator VF1_VF2_AVX10_2 + [(V16SF "TARGET_AVX10_2_512") V8SF V4SF + (V8DF "TARGET_AVX10_2_512") V4DF V2DF]) + (define_mode_iterator VFH [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") @@ -423,6 +429,9 @@ (define_mode_iterator VF2 [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) +(define_mode_iterator VF2_AVX10_2 + [(V8DF "TARGET_AVX10_2_512") V4DF V2DF]) + ;; All DFmode & HFmode vector float modes (define_mode_iterator VF2H [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") @@ -570,6 +579,9 @@ (define_mode_iterator VI8 [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) +(define_mode_iterator VI8_AVX10_2 + [(V8DI "TARGET_AVX10_2_512") V4DI V2DI]) + (define_mode_iterator VI8_FVL [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")]) @@ -32228,7 +32240,9 @@ (UNSPEC_VCVTPS2IBS "") (UNSPEC_VCVTPS2IUBS "u") (UNSPEC_VCVTTPS2IBS "") - (UNSPEC_VCVTTPS2IUBS "u")]) + (UNSPEC_VCVTTPS2IUBS "u") + (UNSPEC_SFIX_SATURATION "") + (UNSPEC_UFIX_SATURATION "u")]) (define_int_attr sat_cvt_trunc_prefix @@ -32307,3 +32321,70 @@ [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) + +(define_int_iterator UNSPEC_SAT_CVT_DS_SIGN_ITER + [UNSPEC_SFIX_SATURATION + UNSPEC_UFIX_SATURATION]) + +(define_mode_attr pd2dqssuff + [(V16SF "") (V8SF "") (V4SF "") + (V8DF "") (V4DF "{y}") (V2DF "{x}")]) + +(define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>" + [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v") + (unspec:<VEC_GATHER_IDXSI> + [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + UNSPEC_SAT_CVT_DS_SIGN_ITER))] + "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>" + "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>" + [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v") + (unspec:<VEC_GATHER_IDXDI> + [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + UNSPEC_SAT_CVT_DS_SIGN_ITER))] + "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>" + "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>" + [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v") + (unspec:VI8_AVX10_2 + [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + UNSPEC_SAT_CVT_DS_SIGN_ITER))] + "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>" + "vcvttps2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_vcvttsd2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(vec_select:DF + (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>") + (parallel [(const_int 0)]))] + UNSPEC_SAT_CVT_DS_SIGN_ITER))] + "TARGET_AVX10_2_256" + "vcvttsd2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx10_2_vcvttss2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(vec_select:SF + (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>") + (parallel [(const_int 0)]))] + UNSPEC_SAT_CVT_DS_SIGN_ITER))] + "TARGET_AVX10_2_256" + "vcvttss2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index be2fb5ae15a..30c071adf13 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -1044,6 +1044,14 @@ #define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8) /* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8) @@ -1054,6 +1062,24 @@ #define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8) +#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8) +#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8) +#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8) +#ifdef __x86_64__ +#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8) +#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8) +#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8) +#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8) +#endif #include <wmmintrin.h> #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c index 84826c0fe5a..ecc356aab94 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c @@ -36,12 +36,39 @@ /* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> +volatile __m256 hx; +volatile __m256i hxi; volatile __m512 x; volatile __m512h xh; volatile __m512i xi; +volatile __m512d xd; volatile __m512bh xbh; volatile __mmask8 m8; volatile __mmask16 m16; @@ -97,4 +124,36 @@ avx10_2_test (void) xi = _mm512_ipcvttnebf16_epu16 (xbh); xi = _mm512_mask_ipcvttnebf16_epu16 (xi, m32, xbh); xi = _mm512_maskz_ipcvttnebf16_epu16 (m32, xbh); + + hxi = _mm512_cvtts_roundpd_epi32 (xd, 8); + hxi = _mm512_mask_cvtts_roundpd_epi32 (hxi, m8, xd, 8); + hxi = _mm512_maskz_cvtts_roundpd_epi32 (m8, xd, 8); + + xi = _mm512_cvtts_roundpd_epi64 (xd, 8); + xi = _mm512_mask_cvtts_roundpd_epi64 (xi, m8, xd, 8); + xi = _mm512_maskz_cvtts_roundpd_epi64 (m8, xd, 8); + + hxi = _mm512_cvtts_roundpd_epu32 (xd, 8); + hxi = _mm512_mask_cvtts_roundpd_epu32 (hxi, m8, xd, 8); + hxi = _mm512_maskz_cvtts_roundpd_epu32 (m8, xd, 8); + + xi = _mm512_cvtts_roundpd_epu64 (xd, 8); + xi = _mm512_mask_cvtts_roundpd_epu64 (xi, m8, xd, 8); + xi = _mm512_maskz_cvtts_roundpd_epu64 (m8, xd, 8); + + xi = _mm512_cvtts_roundps_epi32 (x, 8); + xi = _mm512_mask_cvtts_roundps_epi32 (xi, m16, x, 8); + xi = _mm512_maskz_cvtts_roundps_epi32 (m16, x, 8); + + xi = _mm512_cvtts_roundps_epi64 (hx, 8); + xi = _mm512_mask_cvtts_roundps_epi64 (xi, m8, hx, 8); + xi = _mm512_maskz_cvtts_roundps_epi64 (m8, hx, 8); + + xi = _mm512_cvtts_roundps_epu32 (x, 8); + xi = _mm512_mask_cvtts_roundps_epu32 (xi, m16, x, 8); + xi = _mm512_maskz_cvtts_roundps_epu32 (m16, x, 8); + + xi = _mm512_cvtts_roundps_epu64 (hx, 8); + xi = _mm512_mask_cvtts_roundps_epu64 (xi, m8, hx, 8); + xi = _mm512_maskz_cvtts_roundps_epu64 (m8, hx, 8); } diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c new file mode 100644 index 00000000000..dd7ea88cb82 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 64) +#define DST_SIZE ((AVX512F_LEN_HALF) / 32) + +static void +CALC (double *s, int *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > INT_MAX) + r[i] = INT_MAX; + else if (s[i] < INT_MIN) + r[i] = INT_MIN; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, d) s; + UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttspd_epi32) (s.x); + res2.x = INTRINSIC (_mask_cvttspd_epi32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttspd_epi32) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundpd_epi32) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundpd_epi32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundpd_epi32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref)) + abort (); + + MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c new file mode 100644 index 00000000000..a28643152ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 64) +#define DST_SIZE ((AVX512F_LEN) / 64) + +static void +CALC (double *s, long long *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > LLONG_MAX) + r[i] = LLONG_MAX; + else if (s[i] < LLONG_MIN) + r[i] = LLONG_MIN; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, d) s; + UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + long long res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttspd_epi64) (s.x); + res2.x = INTRINSIC (_mask_cvttspd_epi64) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttspd_epi64) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundpd_epi64) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundpd_epi64) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundpd_epi64) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref)) + abort (); + + MASK_MERGE (i_q) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref)) + abort (); + + MASK_ZERO (i_q) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c new file mode 100644 index 00000000000..768567747a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 64) +#define DST_SIZE ((AVX512F_LEN_HALF) / 32) + +static void +CALC (double *s, unsigned int *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UINT_MAX) + r[i] = UINT_MAX; + else if (s[i] < 0) + r[i] = 0; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, d) s; + UNION_TYPE (AVX512F_LEN_HALF, i_ud) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + unsigned int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttspd_epu32) (s.x); + res2.x = INTRINSIC (_mask_cvttspd_epu32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttspd_epu32) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundpd_epu32) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundpd_epu32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundpd_epu32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res1, res_ref)) + abort (); + + MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res2, res_ref)) + abort (); + + MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c new file mode 100644 index 00000000000..dbdd8114241 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 64) +#define DST_SIZE ((AVX512F_LEN) / 64) + +static void +CALC (double *s, unsigned long long *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > ULONG_MAX) + r[i] = ULONG_MAX; + else if (s[i] < 0) + r[i] = 0; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, d) s; + UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + unsigned long long res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttspd_epu64) (s.x); + res2.x = INTRINSIC (_mask_cvttspd_epu64) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttspd_epu64) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundpd_epu64) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundpd_epu64) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundpd_epu64) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref)) + abort (); + + MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref)) + abort (); + + MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c new file mode 100644 index 00000000000..7a9b6e31e40 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 32) +#define DST_SIZE ((AVX512F_LEN) / 32) + +static void +CALC (float *s, int *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > INT_MAX) + r[i] = INT_MAX; + else if (s[i] < INT_MIN) + r[i] = INT_MIN; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttsps_epi32) (s.x); + res2.x = INTRINSIC (_mask_cvttsps_epi32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttsps_epi32) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundps_epi32) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundps_epi32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundps_epi32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) + abort (); + + MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c new file mode 100644 index 00000000000..ed19c5e329d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c @@ -0,0 +1,73 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN_HALF) / 32) +#define DST_SIZE ((AVX512F_LEN) / 64) + +static void +CALC (float *s, long long *r) +{ + int i; + + for (i = 0; i < DST_SIZE; i++) + { + if (s[i] > LLONG_MAX) + r[i] = LLONG_MAX; + else if (s[i] < LLONG_MIN) + r[i] = LLONG_MIN; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN_HALF, ) s; + UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + long long res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttsps_epi64) (s.x); + res2.x = INTRINSIC (_mask_cvttsps_epi64) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttsps_epi64) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundps_epi64) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundps_epi64) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundps_epi64) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + + if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref)) + abort (); + + MASK_MERGE (i_q) (res_ref, mask, DST_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref)) + abort (); + + MASK_ZERO (i_q) (res_ref, mask, DST_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c new file mode 100644 index 00000000000..b279af29326 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 32) +#define DST_SIZE ((AVX512F_LEN) / 32) + +static void +CALC (float *s, unsigned int *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UINT_MAX) + r[i] = UINT_MAX; + else if (s[i] < 0) + r[i] = 0; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + UNION_TYPE (AVX512F_LEN, i_ud) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + unsigned int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttsps_epu32) (s.x); + res2.x = INTRINSIC (_mask_cvttsps_epu32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttsps_epu32) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundps_epu32) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundps_epu32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundps_epu32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_ud) (res1, res_ref)) + abort (); + + MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_ud) (res2, res_ref)) + abort (); + + MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_ud) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c new file mode 100644 index 00000000000..7151d079b79 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN_HALF) / 32) +#define DST_SIZE ((AVX512F_LEN) / 64) + +static void +CALC (float *s, unsigned long long *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > ULONG_MAX) + r[i] = ULONG_MAX; + else if (s[i] < 0) + r[i] = 0; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN_HALF, ) s; + UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + unsigned long long res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_cvttsps_epu64) (s.x); + res2.x = INTRINSIC (_mask_cvttsps_epu64) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_cvttsps_epu64) (mask, s.x); +#else + res1.x = INTRINSIC (_cvtts_roundps_epu64) (s.x, 8); + res2.x = INTRINSIC (_mask_cvtts_roundps_epu64) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_cvtts_roundps_epu64) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref)) + abort (); + + MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref)) + abort (); + + MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c index f04e3ecb642..83ef63cf067 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c @@ -72,19 +72,81 @@ /* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsd2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsd2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttss2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttss2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttsd2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttsd2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttss2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttss2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ #include <immintrin.h> volatile __m128 hx; volatile __m128i hxi; volatile __m128h hxh; +volatile __m128d hxd; volatile __m128bh hxbh; volatile __m256 x; volatile __m256h xh; volatile __m256i xi; +volatile __m256d xd; volatile __m256bh xbh; volatile __mmask8 m8; volatile __mmask16 m16; +volatile int i; +volatile unsigned int ui; +volatile long long ll; +volatile unsigned long long ull; void extern avx10_2_test (void) @@ -184,4 +246,80 @@ avx10_2_test (void) hxi = _mm_ipcvttnebf16_epu16 (hxbh); hxi = _mm_mask_ipcvttnebf16_epu16 (hxi, m8, hxbh); hxi = _mm_maskz_ipcvttnebf16_epu16 (m8, hxbh); + + hxi = _mm256_cvtts_roundpd_epi32 (xd, 8); + hxi = _mm256_mask_cvtts_roundpd_epi32 (hxi, m8, xd, 8); + hxi = _mm256_maskz_cvtts_roundpd_epi32 (m8, xd, 8); + + xi = _mm256_cvtts_roundpd_epi64 (xd, 8); + xi = _mm256_mask_cvtts_roundpd_epi64 (xi, m8, xd, 8); + xi = _mm256_maskz_cvtts_roundpd_epi64 (m8, xd, 8); + + hxi = _mm256_cvtts_roundpd_epu32 (xd, 8); + hxi = _mm256_mask_cvtts_roundpd_epu32 (hxi, m8, xd, 8); + hxi = _mm256_maskz_cvtts_roundpd_epu32 (m8, xd, 8); + + xi = _mm256_cvtts_roundpd_epu64 (xd, 8); + xi = _mm256_mask_cvtts_roundpd_epu64 (xi, m8, xd, 8); + xi = _mm256_maskz_cvtts_roundpd_epu64 (m8, xd, 8); + + xi = _mm256_cvtts_roundps_epi32 (x, 8); + xi = _mm256_mask_cvtts_roundps_epi32 (xi, m16, x, 8); + xi = _mm256_maskz_cvtts_roundps_epi32 (m16, x, 8); + + xi = _mm256_cvtts_roundps_epi64 (hx, 8); + xi = _mm256_mask_cvtts_roundps_epi64 (xi, m8, hx, 8); + xi = _mm256_maskz_cvtts_roundps_epi64 (m8, hx, 8); + + xi = _mm256_cvtts_roundps_epu32 (x, 8); + xi = _mm256_mask_cvtts_roundps_epu32 (xi, m16, x, 8); + xi = _mm256_maskz_cvtts_roundps_epu32 (m16, x, 8); + + xi = _mm256_cvtts_roundps_epu64 (hx, 8); + xi = _mm256_mask_cvtts_roundps_epu64 (xi, m8, hx, 8); + xi = _mm256_maskz_cvtts_roundps_epu64 (m8, hx, 8); + + hxi = _mm_cvttspd_epi32 (hxd); + hxi = _mm_mask_cvttspd_epi32 (hxi, m8, hxd); + hxi = _mm_maskz_cvttspd_epi32 (m8, hxd); + + hxi = _mm_cvttspd_epi64 (hxd); + hxi = _mm_mask_cvttspd_epi64 (hxi, m8, hxd); + hxi = _mm_maskz_cvttspd_epi64 (m8, hxd); + + hxi = _mm_cvttspd_epu32 (hxd); + hxi = _mm_mask_cvttspd_epu32 (hxi, m8, hxd); + hxi = _mm_maskz_cvttspd_epu32 (m8, hxd); + + hxi = _mm_cvttspd_epu64 (hxd); + hxi = _mm_mask_cvttspd_epu64 (hxi, m8, hxd); + hxi = _mm_maskz_cvttspd_epu64 (m8, hxd); + + hxi = _mm_cvttsps_epi32 (hx); + hxi = _mm_mask_cvttsps_epi32 (hxi, m8, hx); + hxi = _mm_maskz_cvttsps_epi32 (m8, hx); + + hxi = _mm_cvttsps_epi64 (hx); + hxi = _mm_mask_cvttsps_epi64 (hxi, m8, hx); + hxi = _mm_maskz_cvttsps_epi64 (m8, hx); + + hxi = _mm_cvttsps_epu32 (hx); + hxi = _mm_mask_cvttsps_epu32 (hxi, m8, hx); + hxi = _mm_maskz_cvttsps_epu32 (m8, hx); + + hxi = _mm_cvttsps_epu64 (hx); + hxi = _mm_mask_cvttsps_epu64 (hxi, m8, hx); + hxi = _mm_maskz_cvttsps_epu64 (m8, hx); + + i = _mm_cvtts_roundsd_epi32 (hxd, 8); + ui = _mm_cvtts_roundsd_epu32 (hxd, 8); + i = _mm_cvtts_roundss_epi32 (hx, 8); + ui = _mm_cvtts_roundss_epu32 (hx, 8); + +#ifdef __x86_64__ + ll = _mm_cvtts_roundsd_epi64 (hxd, 8); + ull = _mm_cvtts_roundsd_epu64 (hxd, 8); + ll = _mm_cvtts_roundss_epi64 (hx, 8); + ull = _mm_cvtts_roundss_epu64 (hx, 8); +#endif } diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c new file mode 100644 index 00000000000..06cbb5b24e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2dqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2dqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c new file mode 100644 index 00000000000..df29d0f14da --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2qqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2qqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c new file mode 100644 index 00000000000..9e9cea121a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2udqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2udqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c new file mode 100644 index 00000000000..282b43f56a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2uqqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttpd2uqqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c new file mode 100644 index 00000000000..57acd36b28f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2dqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2dqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c new file mode 100644 index 00000000000..1e6bbfd24ea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2qqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2qqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c new file mode 100644 index 00000000000..4b175e694f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2udqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2udqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c new file mode 100644 index 00000000000..3abebfb4559 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2uqqs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2uqqs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c new file mode 100644 index 00000000000..9e4bd71a411 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX10_SCALAR +#include "avx10-helper.h" +#include <limits.h> + +void +TEST (void) +{ + UNION_TYPE (128, d) s; + int res1; + long long res2; + int res1_ref = 0; + long long res2_ref = 0; + int i, sign = 1; + + s.a[0] = 2.46; + + res1 = _mm_cvtts_roundsd_epi32 (s.x, 8); + + if (s.a[0] > INT_MAX) + res1_ref = INT_MAX; + else if (s.a[0] < INT_MIN) + res1_ref = INT_MIN; + else + res1_ref = s.a[0]; + + if (res1 != res1_ref) + abort(); + +#ifdef __x86_64__ + res2 = _mm_cvtts_roundsd_epi64 (s.x, 8); + + if (s.a[0] > LLONG_MAX) + res2_ref = LLONG_MAX; + else if (s.a[0] < LLONG_MIN) + res2_ref = LLONG_MIN; + else + res2_ref = s.a[0]; + + if (res2 != res2_ref) + abort(); +#endif +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c new file mode 100644 index 00000000000..b4ab914862b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX10_SCALAR +#include "avx10-helper.h" +#include <limits.h> + +void +TEST (void) +{ + UNION_TYPE (128, d) s; + unsigned int res1; + unsigned long long res2; + unsigned int res1_ref = 0; + unsigned long long res2_ref = 0; + int i, sign = 1; + + s.a[0] = 2.46; + + res1 = _mm_cvtts_roundsd_epu32 (s.x, 8); + + if (s.a[0] > UINT_MAX) + res1_ref = UINT_MAX; + else if (s.a[0] < 0) + res1_ref = 0; + else + res1_ref = s.a[0]; + + if (res1 != res1_ref) + abort(); + +#ifdef __x86_64__ + res2 = _mm_cvtts_roundsd_epu64 (s.x, 8); + + if (s.a[0] > ULONG_MAX) + res2_ref = ULONG_MAX; + else if (s.a[0] < 0) + res2_ref = 0; + else + res2_ref = s.a[0]; + + if (res2 != res2_ref) + abort(); +#endif +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c new file mode 100644 index 00000000000..67b6b8d384b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX10_SCALAR +#include "avx10-helper.h" +#include <limits.h> + +void +TEST (void) +{ + UNION_TYPE (128, ) s; + int res1; + long long res2; + int res1_ref = 0; + long long res2_ref = 0; + int i, sign = 1; + + s.a[0] = 2.46; + + res1 = _mm_cvtts_roundss_epi32 (s.x, 8); + + if (s.a[0] > INT_MAX) + res1_ref = INT_MAX; + else if (s.a[0] < INT_MIN) + res1_ref = INT_MIN; + else + res1_ref = s.a[0]; + + if (res1 != res1_ref) + abort(); + +#ifdef __x86_64__ + res2 = _mm_cvtts_roundss_epi64 (s.x, 8); + + if (s.a[0] > LLONG_MAX) + res2_ref = LLONG_MAX; + else if (s.a[0] < LLONG_MIN) + res2_ref = LLONG_MIN; + else + res2_ref = s.a[0]; + + if (res2 != res2_ref) + abort(); +#endif +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c new file mode 100644 index 00000000000..1e58a9c6979 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX10_SCALAR +#include "avx10-helper.h" +#include <limits.h> + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + unsigned int res1; + unsigned long long res2; + unsigned int res1_ref = 0; + unsigned long long res2_ref = 0; + + s.a[0] = 2.46; + + res1 = _mm_cvtts_roundss_epu32 (s.x, 8); + + if (s.a[0] > UINT_MAX) + res1_ref = UINT_MAX; + else if (s.a[0] < 0) + res1_ref = 0; + else + res1_ref = s.a[0]; + + if (res1 != res1_ref) + abort(); + +#ifdef __x86_64__ + res2 = _mm_cvtts_roundss_epu64 (s.x, 8); + + if (s.a[0] > ULONG_MAX) + res2_ref = ULONG_MAX; + else if (s.a[0] < 0) + res2_ref = 0; + else + res2_ref = s.a[0]; + + if (res2 != res2_ref) + abort(); +#endif +} diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 5669fa1aa00..1d6ca552fcc 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1052,6 +1052,14 @@ #define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8) /* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8) @@ -1062,5 +1070,23 @@ #define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8) +#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8) +#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8) +#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8) +#ifdef __x86_64__ +#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8) +#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8) +#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8) +#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8) +#endif #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 550d2633b78..799982b6f7e 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1457,6 +1457,30 @@ test_3 (_mm512_mask_ipcvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) +test_1 (_mm512_cvtts_roundpd_epi32, __m256i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epi32, __m256i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundpd_epi64, __m512i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epi64, __m512i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epi64, __m512i, __m512i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundpd_epu32, __m256i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epu32, __m256i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundpd_epu64, __m512i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epu64, __m512i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epu64, __m512i, __m512i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundps_epi32, __m512i, __m512, 8) +test_2 (_mm512_maskz_cvtts_roundps_epi32, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvtts_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) +test_1 (_mm512_cvtts_roundps_epi64, __m512i, __m256, 8) +test_2 (_mm512_maskz_cvtts_roundps_epi64, __m512i, __mmask8, __m256, 8) +test_3 (_mm512_mask_cvtts_roundps_epi64, __m512i, __m512i, __mmask8, __m256, 8) +test_1 (_mm512_cvtts_roundps_epu32, __m512i, __m512, 8) +test_2 (_mm512_maskz_cvtts_roundps_epu32, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) +test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8) +test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8) +test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8) /* avx10_2satcvtintrin.h */ test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8) @@ -1483,3 +1507,37 @@ test_3 (_mm256_mask_ipcvtt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h, test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8) test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) +test_1 (_mm256_cvtts_roundpd_epi32, __m128i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epi32, __m128i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epi32, __m128i, __m128i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundpd_epi64, __m256i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epi64, __m256i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epi64, __m256i, __m256i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundpd_epu32, __m128i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epu32, __m128i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epu32, __m128i, __m128i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundpd_epu64, __m256i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epu64, __m256i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epu64, __m256i, __m256i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundps_epi32, __m256i, __m256, 8) +test_2 (_mm256_maskz_cvtts_roundps_epi32, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_cvtts_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) +test_1 (_mm256_cvtts_roundps_epi64, __m256i, __m128, 8) +test_2 (_mm256_maskz_cvtts_roundps_epi64, __m256i, __mmask8, __m128, 8) +test_3 (_mm256_mask_cvtts_roundps_epi64, __m256i, __m256i, __mmask8, __m128, 8) +test_1 (_mm256_cvtts_roundps_epu32, __m256i, __m256, 8) +test_2 (_mm256_maskz_cvtts_roundps_epu32, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_cvtts_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) +test_1 (_mm256_cvtts_roundps_epu64, __m256i, __m128, 8) +test_2 (_mm256_maskz_cvtts_roundps_epu64, __m256i, __mmask8, __m128, 8) +test_3 (_mm256_mask_cvtts_roundps_epu64, __m256i, __m256i, __mmask8, __m128, 8) +test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8) +test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8) +test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8) +test_1 (_mm_cvtts_roundss_epu32, unsigned int, __m128, 8) +#ifdef __x86_64__ +test_1 (_mm_cvtts_roundsd_epi64, long long, __m128d, 8) +test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8) +test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8) +test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8) +#endif diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index ba67ee26914..b8eb6ae7828 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -1496,6 +1496,30 @@ test_3 (_mm512_mask_ipcvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) +test_1 (_mm512_cvtts_roundpd_epi32, __m256i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epi32, __m256i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundpd_epi64, __m512i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epi64, __m512i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epi64, __m512i, __m512i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundpd_epu32, __m256i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epu32, __m256i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundpd_epu64, __m512i, __m512d, 8) +test_2 (_mm512_maskz_cvtts_roundpd_epu64, __m512i, __mmask8, __m512d, 8) +test_3 (_mm512_mask_cvtts_roundpd_epu64, __m512i, __m512i, __mmask8, __m512d, 8) +test_1 (_mm512_cvtts_roundps_epi32, __m512i, __m512, 8) +test_2 (_mm512_maskz_cvtts_roundps_epi32, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvtts_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) +test_1 (_mm512_cvtts_roundps_epi64, __m512i, __m256, 8) +test_2 (_mm512_maskz_cvtts_roundps_epi64, __m512i, __mmask8, __m256, 8) +test_3 (_mm512_mask_cvtts_roundps_epi64, __m512i, __m512i, __mmask8, __m256, 8) +test_1 (_mm512_cvtts_roundps_epu32, __m512i, __m512, 8) +test_2 (_mm512_maskz_cvtts_roundps_epu32, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) +test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8) +test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8) +test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8) /* avx10_2satcvtintrin.h */ test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8) @@ -1522,3 +1546,37 @@ test_3 (_mm256_mask_ipcvtt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h, test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8) test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) +test_1 (_mm256_cvtts_roundpd_epi32, __m128i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epi32, __m128i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epi32, __m128i, __m128i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundpd_epi64, __m256i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epi64, __m256i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epi64, __m256i, __m256i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundpd_epu32, __m128i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epu32, __m128i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epu32, __m128i, __m128i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundpd_epu64, __m256i, __m256d, 8) +test_2 (_mm256_maskz_cvtts_roundpd_epu64, __m256i, __mmask8, __m256d, 8) +test_3 (_mm256_mask_cvtts_roundpd_epu64, __m256i, __m256i, __mmask8, __m256d, 8) +test_1 (_mm256_cvtts_roundps_epi32, __m256i, __m256, 8) +test_2 (_mm256_maskz_cvtts_roundps_epi32, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_cvtts_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) +test_1 (_mm256_cvtts_roundps_epi64, __m256i, __m128, 8) +test_2 (_mm256_maskz_cvtts_roundps_epi64, __m256i, __mmask8, __m128, 8) +test_3 (_mm256_mask_cvtts_roundps_epi64, __m256i, __m256i, __mmask8, __m128, 8) +test_1 (_mm256_cvtts_roundps_epu32, __m256i, __m256, 8) +test_2 (_mm256_maskz_cvtts_roundps_epu32, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_cvtts_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) +test_1 (_mm256_cvtts_roundps_epu64, __m256i, __m128, 8) +test_2 (_mm256_maskz_cvtts_roundps_epu64, __m256i, __mmask8, __m128, 8) +test_3 (_mm256_mask_cvtts_roundps_epu64, __m256i, __m256i, __mmask8, __m128, 8) +test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8) +test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8) +test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8) +test_1 (_mm_cvtts_roundss_epu32, unsigned int, __m128, 8) +#ifdef __x86_64__ +test_1 (_mm_cvtts_roundsd_epi64, long long, __m128d, 8) +test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8) +test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8) +test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8) +#endif diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 7e8b5d01871..f3ab4a4f34a 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1026,6 +1026,14 @@ #define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8) /* avx10_2satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8) @@ -1036,6 +1044,24 @@ #define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8) #define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8) +#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8) +#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8) +#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8) +#ifdef __x86_64__ +#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8) +#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8) +#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8) +#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8) +#endif #pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") -- 2.43.5