https://gcc.gnu.org/g:e2c80d237223f8524c2bd930b681aa891a13db99
commit r15-3181-ge2c80d237223f8524c2bd930b681aa891a13db99 Author: Hu, Lin1 <lin1...@intel.com> Date: Mon Aug 26 10:53:47 2024 +0800 [PATCH 1/2] AVX10.2: Support saturating convert instructions gcc/ChangeLog: * config.gcc: Add avx10_2satcvtintrin.h and avx10_2-512satcvtintrin.h. * config/i386/i386-builtin-types.def: Add DEF_FUNCTION_TYPE (V8HI, V8BF, V8HI, UQI), (V16HI, V16BF, V16HI, UHI), (V32HI, V32BF, V32HI, USI), (V16SI, V16SF, V16SI, UHI, INT), (V16HI, V16BF, V16HI, UHI, INT), (V32HI, V32BF, V32HI, USI, INT). * config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle V32HI_FTYPE_V32BF_V32HI_USI, V16HI_FTYPE_V16BF_V16HI_UHI, V8HI_FTYPE_V8BF_V8HI_UQI. (ix86_expand_round_builtin): Handle V32HI_FTYPE_V32BF_V32HI_USI_INT, V16SI_FTYPE_V16SF_V16SI_UHI_INT, V16HI_FTYPE_V16BF_V16HI_UHI_INT. * config/i386/immintrin.h: Include avx10_2satcvtintrin.h and avx10_2-512savcvtintrin.h. * config/i386/sse.md: (UNSPEC_CVTNE_BF16_IBS_ITER): New iterator. (sat_cvt_sign_prefix): Ditto. (sat_cvt_trunc_prefix): Ditto. (UNSPEC_CVT_PH_IBS_ITER): Ditto. (UNSPEC_CVTT_PH_IBS_ITER): Ditto. (UNSPEC_CVT_PS_IBS_ITER): Ditto. (UNSPEC_CVTT_PS_IBS_ITER): Ditto. (avx10_2_cvt<sat_cvt_trunc_prefix>nebf162i<sat_cvt_sign_prefix>bs<mode><mask_name>): New define_insn. (avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>): Ditto. (avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>): Ditto. (avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>): Ditto. * config/i386/avx10_2-512satcvtintrin.h: New file. * config/i386/avx10_2satcvtintrin.h: Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add macros. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx512f-helper.h: Add new test macro. * gcc.target/i386/m512-check.h: Add new type. * gcc.target/i386/avx10_2-512-satcvt-1.c: New test. * gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtnebf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttnebf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttnebf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-1.c: Ditto. * gcc.target/i386/avx10_2-vcvtnebf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvtnebf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvtph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvtph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvtps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttnebf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttnebf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttps2iubs-2.c: Ditto. Diff: --- gcc/config.gcc | 4 +- gcc/config/i386/avx10_2-512satcvtintrin.h | 624 ++++++++++++ gcc/config/i386/avx10_2satcvtintrin.h | 1022 ++++++++++++++++++++ gcc/config/i386/i386-builtin-types.def | 6 + gcc/config/i386/i386-builtin.def | 36 + gcc/config/i386/i386-expand.cc | 6 + gcc/config/i386/immintrin.h | 3 + gcc/config/i386/sse.md | 109 +++ gcc/testsuite/gcc.target/i386/avx-1.c | 20 + .../gcc.target/i386/avx10_2-512-satcvt-1.c | 100 ++ .../gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c | 69 ++ .../i386/avx10_2-512-vcvtnebf162iubs-2.c | 69 ++ .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c | 74 ++ .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c | 74 ++ .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c | 75 ++ .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c | 73 ++ .../i386/avx10_2-512-vcvttnebf162ibs-2.c | 69 ++ .../i386/avx10_2-512-vcvttnebf162iubs-2.c | 69 ++ .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c | 74 ++ .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c | 74 ++ .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c | 75 ++ .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c | 73 ++ gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c | 187 ++++ .../gcc.target/i386/avx10_2-vcvtnebf162ibs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvtnebf162iubs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvtph2ibs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvtph2iubs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvtps2ibs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttnebf162ibs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttnebf162iubs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttph2ibs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttph2iubs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttps2ibs-2.c | 16 + .../gcc.target/i386/avx10_2-vcvttps2iubs-2.c | 16 + gcc/testsuite/gcc.target/i386/avx512f-helper.h | 2 + gcc/testsuite/gcc.target/i386/m512-check.h | 21 + gcc/testsuite/gcc.target/i386/sse-13.c | 20 + gcc/testsuite/gcc.target/i386/sse-14.c | 52 + gcc/testsuite/gcc.target/i386/sse-22.c | 52 + gcc/testsuite/gcc.target/i386/sse-23.c | 20 + 40 files changed, 3327 insertions(+), 1 deletion(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index 7d761b257cd4..4bcb461b68cc 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -454,7 +454,9 @@ i[34567]86-*-* | x86_64-*-*) sm3intrin.h sha512intrin.h sm4intrin.h usermsrintrin.h avx10_2roundingintrin.h avx10_2mediaintrin.h avx10_2-512mediaintrin.h - avx10_2bf16intrin.h avx10_2-512bf16intrin.h" + avx10_2convertintrin.h avx10_2-512convertintrin.h + avx10_2bf16intrin.h avx10_2-512bf16intrin.h + avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h new file mode 100644 index 000000000000..4286458c413a --- /dev/null +++ b/gcc/config/i386/avx10_2-512satcvtintrin.h @@ -0,0 +1,624 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED +#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED + +#if !defined (__AVX10_2_512__) +#pragma GCC push_options +#pragma GCC target("avx10.2-512") +#define __DISABLE_AVX10_2_512__ +#endif /* __AVX10_2_512__ */ + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtnebf16_epi16 (__m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvtnebf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtnebf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvtnebf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtnebf16_epi16 (__mmask32 __U, __m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvtnebf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtnebf16_epu16 (__m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvtnebf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtnebf16_epu16 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvtnebf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtnebf16_epu16 (__mmask32 __U, __m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvtnebf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvttnebf16_epi16 (__m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvttnebf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvttnebf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttnebf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvttnebf16_epi16 (__mmask32 __U, __m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvttnebf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvttnebf16_epu16 (__m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttnebf162iubs512_mask ((__v32bf) __A, + (__v32hi) _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvttnebf16_epu16 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttnebf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) + __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvttnebf16_epu16 (__mmask32 __U, __m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttnebf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvt_roundph_epi16 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvt_roundph_epi16 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvt_roundph_epi16 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvt_roundph_epu16 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvt_roundph_epu16 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvt_roundph_epu16 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvt_roundps_epi32 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvt_roundps_epu32 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtt_roundph_epi16 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtt_roundph_epi16 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtt_roundph_epi16 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtt_roundph_epu16 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtt_roundph_epu16 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtt_roundph_epu16 (__mmask32 __U, __m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtt_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtt_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} +#else +#define _mm512_ipcvt_roundph_epi16(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvt_roundph_epi16(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvt_roundph_epi16(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvt_roundph_epu16(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvt_roundph_epu16(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvt_roundph_epu16(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvt_roundps_epi32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvt_roundps_epi32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvt_roundps_epi32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvt_roundps_epu32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvt_roundps_epu32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvt_roundps_epu32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtt_roundph_epi16(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtt_roundph_epi16(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtt_roundph_epi16(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtt_roundph_epu16(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtt_roundph_epu16(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtt_roundph_epu16(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtt_roundps_epi32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtt_roundps_epi32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtt_roundps_epi32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtt_roundps_epu32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtt_roundps_epu32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtt_roundps_epu32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) +#endif + +#ifdef __DISABLE_AVX10_2_512__ +#undef __DISABLE_AVX10_2_512__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX10_2_512__ */ + +#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h new file mode 100644 index 000000000000..4fcf78955df2 --- /dev/null +++ b/gcc/config/i386/avx10_2satcvtintrin.h @@ -0,0 +1,1022 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2SATCVTINTRIN_H_INCLUDED +#define _AVX10_2SATCVTINTRIN_H_INCLUDED + +#if !defined (__AVX10_2_256__) +#pragma GCC push_options +#pragma GCC target("avx10.2") +#define __DISABLE_AVX10_2_256__ +#endif /* __AVX10_2_256__ */ + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtnebf16_epi16 (__m128bh __A) +{ + return (__m128i) __builtin_ia32_cvtnebf162ibs128_mask ((__v8bf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtnebf16_epi16 (__m128i __W, __mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvtnebf162ibs128_mask ((__v8bf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtnebf16_epi16 (__mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvtnebf162ibs128_mask ((__v8bf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvtnebf16_epi16 (__m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtnebf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvtnebf16_epi16 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvtnebf162ibs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvtnebf16_epi16 (__mmask16 __U, __m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtnebf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtnebf16_epu16 (__m128bh __A) +{ + return + (__m128i) __builtin_ia32_cvtnebf162iubs128_mask ((__v8bf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtnebf16_epu16 (__m128i __W, __mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvtnebf162iubs128_mask ((__v8bf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtnebf16_epu16 (__mmask8 __U, __m128bh __A) +{ + return + (__m128i) __builtin_ia32_cvtnebf162iubs128_mask ((__v8bf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvtnebf16_epu16 (__m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtnebf162iubs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvtnebf16_epu16 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvtnebf162iubs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvtnebf16_epu16 (__mmask16 __U, __m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtnebf162iubs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtph_epi16 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtph_epi16 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtph_epi16 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtph_epu16 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtph_epu16 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtph_epu16 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtps_epi32 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtps_epi32 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvtps_epu32 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvtps_epu32 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvttnebf16_epi16 (__m128bh __A) +{ + return + (__m128i) __builtin_ia32_cvttnebf162ibs128_mask ((__v8bf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvttnebf16_epi16 (__m128i __W, __mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvttnebf162ibs128_mask ((__v8bf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvttnebf16_epi16 (__mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvttnebf162ibs128_mask ((__v8bf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvttnebf16_epu16 (__m128bh __A) +{ + return + (__m128i) __builtin_ia32_cvttnebf162iubs128_mask ((__v8bf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvttnebf16_epu16 (__m128i __W, __mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvttnebf162iubs128_mask ((__v8bf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvttnebf16_epu16 (__mmask8 __U, __m128bh __A) +{ + return (__m128i) __builtin_ia32_cvttnebf162iubs128_mask ((__v8bf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvttnebf16_epi16 (__m256bh __A) +{ + return (__m256i) + __builtin_ia32_cvttnebf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvttnebf16_epi16 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvttnebf162ibs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvttnebf16_epi16 (__mmask16 __U, __m256bh __A) +{ + return (__m256i) + __builtin_ia32_cvttnebf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvttnebf16_epu16 (__m256bh __A) +{ + return (__m256i) + __builtin_ia32_cvttnebf162iubs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvttnebf16_epu16 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvttnebf162iubs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvttnebf16_epu16 (__mmask16 __U, __m256bh __A) +{ + return (__m256i) + __builtin_ia32_cvttnebf162iubs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvttph_epi16 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvttph_epi16 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvttph_epi16 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvttph_epu16 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvttph_epu16 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvttph_epu16 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvttps_epi32 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvttps_epi32 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvttps_epu32 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvttps_epu32 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvt_roundph_epi16 (__m256h __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) __A, + (__v16hi) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvt_roundph_epu16 (__m256h __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) __A, + (__v16hi) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvt_roundps_epi32 (__m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvt_roundps_epu32 (__m256 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvtt_roundph_epi16 (__m256h __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvtt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) __A, + (__v16hi) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvtt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvtt_roundph_epu16 (__m256h __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvtt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) __A, + (__v16hi) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvtt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvtt_roundps_epi32 (__m256 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvtt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvtt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvtt_roundps_epu32 (__m256 __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvtt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} +#else + +#define _mm256_ipcvt_roundph_epi16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvt_roundph_epi16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvt_roundph_epi16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvt_roundph_epu16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvt_roundph_epu16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvt_roundph_epu16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvt_roundps_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_ipcvt_roundps_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_ipcvt_roundps_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_ipcvt_roundps_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_ipcvt_roundps_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_ipcvt_roundps_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + + +#define _mm256_ipcvttne_roundbf16_epi16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttnebf162ibs256_mask_round ((__v16bf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvttne_roundbf16_epi16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttnebf162ibs256_mask_round ((__v16bf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvttne_roundbf16_epi16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttnebf162ibs256_mask_round ((__v16bf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvttne_roundbf16_epu16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttnebf162iubs256_mask_round ((__v16bf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvttne_roundbf16_epu16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttnebf162iubs256_mask_round ((__v16bf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvttne_roundbf16_epu16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttnebf162iubs256_mask_round ((__v16bf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvtt_roundph_epi16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvtt_roundph_epi16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvtt_roundph_epi16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvtt_roundph_epu16(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_undefined_si256 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm256_mask_ipcvtt_roundph_epu16(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_maskz_ipcvtt_roundph_epu16(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) (A), \ + (__v16hi) \ + (_mm256_setzero_si256 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm256_ipcvtt_roundps_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_ipcvtt_roundps_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_ipcvtt_roundps_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_ipcvtt_roundps_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm256_mask_ipcvtt_roundps_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm256_maskz_ipcvtt_roundps_epu32(U, A, R) \ +((__m256i) \ + __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) +#endif + +#ifdef __DISABLE_AVX10_2_256__ +#undef __DISABLE_AVX10_2_256__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX10_2_256__ */ + +#endif /* _AVX10_2SATCVTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index e6f53589e70b..b29785912873 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1493,3 +1493,9 @@ DEF_FUNCTION_TYPE (USI, V32BF, V32BF, INT, USI) DEF_FUNCTION_TYPE (UHI, V16BF, V16BF, INT, UHI) DEF_FUNCTION_TYPE (UQI, V8BF, V8BF, INT, UQI) DEF_FUNCTION_TYPE (INT, V8BF, V8BF) +DEF_FUNCTION_TYPE (V8HI, V8BF, V8HI, UQI) +DEF_FUNCTION_TYPE (V16HI, V16BF, V16HI, UHI) +DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI) +DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, UHI, INT) +DEF_FUNCTION_TYPE (V16HI, V16BF, V16HI, UHI, INT) +DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI, INT) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 25b8169c1ef6..b85eba5b3307 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3270,6 +3270,26 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__built BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16le", IX86_BUILTIN_VCOMSBF16LE, LE, (int) INT_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16lt", IX86_BUILTIN_VCOMSBF16LT, LT, (int) INT_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16neq", IX86_BUILTIN_VCOMSBF16NE, NE, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162ibsv8bf_mask, "__builtin_ia32_cvtnebf162ibs128_mask", IX86_BUILTIN_CVTNEBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162ibsv16bf_mask, "__builtin_ia32_cvtnebf162ibs256_mask", IX86_BUILTIN_CVTNEBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtnebf162ibsv32bf_mask, "__builtin_ia32_cvtnebf162ibs512_mask", IX86_BUILTIN_CVTNEBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162iubsv8bf_mask, "__builtin_ia32_cvtnebf162iubs128_mask", IX86_BUILTIN_CVTNEBF162IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162iubsv16bf_mask, "__builtin_ia32_cvtnebf162iubs256_mask", IX86_BUILTIN_CVTNEBF162IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtnebf162iubsv32bf_mask, "__builtin_ia32_cvtnebf162iubs512_mask", IX86_BUILTIN_CVTNEBF162IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2ibsv8hf_mask, "__builtin_ia32_cvtph2ibs128_mask", IX86_BUILTIN_CVTPH2IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2iubsv8hf_mask, "__builtin_ia32_cvtph2iubs128_mask", IX86_BUILTIN_CVTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2ibsv4sf_mask, "__builtin_ia32_cvtps2ibs128_mask", IX86_BUILTIN_CVTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2iubsv4sf_mask, "__builtin_ia32_cvtps2iubs128_mask", IX86_BUILTIN_CVTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162ibsv8bf_mask, "__builtin_ia32_cvttnebf162ibs128_mask", IX86_BUILTIN_CVTTNEBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162ibsv16bf_mask, "__builtin_ia32_cvttnebf162ibs256_mask", IX86_BUILTIN_CVTTNEBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttnebf162ibsv32bf_mask, "__builtin_ia32_cvttnebf162ibs512_mask", IX86_BUILTIN_CVTTNEBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162iubsv8bf_mask, "__builtin_ia32_cvttnebf162iubs128_mask", IX86_BUILTIN_CVTTNEBF162IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162iubsv16bf_mask, "__builtin_ia32_cvttnebf162iubs256_mask", IX86_BUILTIN_CVTTNEBF162IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttnebf162iubsv32bf_mask, "__builtin_ia32_cvttnebf162iubs512_mask", IX86_BUILTIN_CVTTNEBF162IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2ibsv8hf_mask, "__builtin_ia32_cvttph2ibs128_mask", IX86_BUILTIN_CVTTPH2IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv8hf_mask, "__builtin_ia32_cvttph2iubs128_mask", IX86_BUILTIN_CVTTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv4sf_mask, "__builtin_ia32_cvttps2ibs128_mask", IX86_BUILTIN_CVTTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv4sf_mask, "__builtin_ia32_cvttps2iubs128_mask", IX86_BUILTIN_CVTTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3730,6 +3750,22 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv16hf3_mask_round, "__builti BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv8sf3_mask_round, "__builtin_ia32_subps256_mask_round", IX86_BUILTIN_VSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvt2ps2phx_v32hf_mask_round, "__builtin_ia32_vcvt2ps2phx512_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v16hf_mask_round, "__builtin_ia32_vcvt2ps2phx256_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V16HF_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2ibsv16hf_mask_round, "__builtin_ia32_cvtph2ibs256_mask_round", IX86_BUILTIN_CVTPH2IBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtph2ibsv32hf_mask_round, "__builtin_ia32_cvtph2ibs512_mask_round", IX86_BUILTIN_CVTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2iubsv16hf_mask_round, "__builtin_ia32_cvtph2iubs256_mask_round", IX86_BUILTIN_CVTPH2IUBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtph2iubsv32hf_mask_round, "__builtin_ia32_cvtph2iubs512_mask_round", IX86_BUILTIN_CVTPH2IUBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2ibsv8sf_mask_round, "__builtin_ia32_cvtps2ibs256_mask_round", IX86_BUILTIN_CVTPS2IBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtps2ibsv16sf_mask_round, "__builtin_ia32_cvtps2ibs512_mask_round", IX86_BUILTIN_CVTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2iubsv8sf_mask_round, "__builtin_ia32_cvtps2iubs256_mask_round", IX86_BUILTIN_CVTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtps2iubsv16sf_mask_round, "__builtin_ia32_cvtps2iubs512_mask_round", IX86_BUILTIN_CVTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2ibsv16hf_mask_round, "__builtin_ia32_cvttph2ibs256_mask_round", IX86_BUILTIN_CVTTPH2IBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttph2ibsv32hf_mask_round, "__builtin_ia32_cvttph2ibs512_mask_round", IX86_BUILTIN_CVTTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv16hf_mask_round, "__builtin_ia32_cvttph2iubs256_mask_round", IX86_BUILTIN_CVTTPH2IUBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttph2iubsv32hf_mask_round, "__builtin_ia32_cvttph2iubs512_mask_round", IX86_BUILTIN_CVTTPH2IUBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv8sf_mask_round, "__builtin_ia32_cvttps2ibs256_mask_round", IX86_BUILTIN_CVTTPS2IBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_round, "__builtin_ia32_cvttps2ibs512_mask_round", IX86_BUILTIN_CVTTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask_round, "__builtin_ia32_cvttps2iubs256_mask_round", IX86_BUILTIN_CVTTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 356b807bf2e4..410d70f4b2c3 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -11433,10 +11433,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16QI_FTYPE_V16QI_V16QI_UHI: case V16QI_FTYPE_QI_V16QI_UHI: case V32HI_FTYPE_V8HI_V32HI_USI: + case V32HI_FTYPE_V32BF_V32HI_USI: case V32HI_FTYPE_HI_V32HI_USI: case V16HI_FTYPE_V8HI_V16HI_UHI: + case V16HI_FTYPE_V16BF_V16HI_UHI: case V16HI_FTYPE_HI_V16HI_UHI: case V8HI_FTYPE_V8HI_V8HI_UQI: + case V8HI_FTYPE_V8BF_V8HI_UQI: case V8BF_FTYPE_V8BF_V8BF_UQI: case V8HI_FTYPE_HI_V8HI_UQI: case V16HF_FTYPE_V16HF_V16HF_UHI: @@ -12421,6 +12424,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V8SF_FTYPE_V8DF_V8SF_QI_INT: case V8DF_FTYPE_V8DF_V8DF_QI_INT: case V32HI_FTYPE_V32HF_V32HI_USI_INT: + case V32HI_FTYPE_V32BF_V32HI_USI_INT: case V8SI_FTYPE_V8DF_V8SI_QI_INT: case V8DI_FTYPE_V8HF_V8DI_UQI_INT: case V8DI_FTYPE_V8DF_V8DI_QI_INT: @@ -12435,6 +12439,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V8DI_FTYPE_V8SF_V8DI_QI_INT: case V16SF_FTYPE_V16SI_V16SF_HI_INT: case V16SI_FTYPE_V16SF_V16SI_HI_INT: + case V16SI_FTYPE_V16SF_V16SI_UHI_INT: case V16SI_FTYPE_V16HF_V16SI_UHI_INT: case V16HF_FTYPE_V16HF_V16HF_V16HF_INT: case V16HF_FTYPE_V16SI_V16HF_UHI_INT: @@ -12467,6 +12472,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, case V16HF_FTYPE_V16SF_V16HF_UHI_INT: case V16HF_FTYPE_V16HF_V16HF_UHI_INT: case V16HF_FTYPE_V16HI_V16HF_UHI_INT: + case V16HI_FTYPE_V16BF_V16HI_UHI_INT: case V8HF_FTYPE_V8HF_V8HF_V8HF_INT: nargs = 4; break; diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 025334027eb1..c8e375070888 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -152,4 +152,7 @@ #include <avx10_2-512bf16intrin.h> +#include <avx10_2satcvtintrin.h> + +#include <avx10_2-512satcvtintrin.h> #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4b3e475347c6..113902e9864a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -235,6 +235,18 @@ UNSPEC_VGETMANTPBF16 UNSPEC_VFPCLASSPBF16 UNSPEC_VCOMSBF16 + UNSPEC_VCVTNEBF162IBS + UNSPEC_VCVTNEBF162IUBS + UNSPEC_VCVTPH2IBS + UNSPEC_VCVTPH2IUBS + UNSPEC_VCVTPS2IBS + UNSPEC_VCVTPS2IUBS + UNSPEC_VCVTTNEBF162IBS + UNSPEC_VCVTTNEBF162IUBS + UNSPEC_VCVTTPH2IBS + UNSPEC_VCVTTPH2IUBS + UNSPEC_VCVTTPS2IBS + UNSPEC_VCVTTPS2IUBS ]) (define_c_enum "unspecv" [ @@ -32197,3 +32209,100 @@ "TARGET_AVX10_2_256" "vcomsbf16\t{%1, %0|%0, %1}" [(set_attr "prefix" "evex")]) + +(define_int_iterator UNSPEC_CVTNE_BF16_IBS_ITER + [UNSPEC_VCVTNEBF162IBS + UNSPEC_VCVTNEBF162IUBS + UNSPEC_VCVTTNEBF162IBS + UNSPEC_VCVTTNEBF162IUBS]) + +(define_int_attr sat_cvt_sign_prefix + [(UNSPEC_VCVTNEBF162IBS "") + (UNSPEC_VCVTNEBF162IUBS "u") + (UNSPEC_VCVTTNEBF162IBS "") + (UNSPEC_VCVTTNEBF162IUBS "u") + (UNSPEC_VCVTPH2IBS "") + (UNSPEC_VCVTPH2IUBS "u") + (UNSPEC_VCVTTPH2IBS "") + (UNSPEC_VCVTTPH2IUBS "u") + (UNSPEC_VCVTPS2IBS "") + (UNSPEC_VCVTPS2IUBS "u") + (UNSPEC_VCVTTPS2IBS "") + (UNSPEC_VCVTTPS2IUBS "u")]) + +(define_int_attr sat_cvt_trunc_prefix + [(UNSPEC_VCVTNEBF162IBS "") + (UNSPEC_VCVTNEBF162IUBS "") + (UNSPEC_VCVTTNEBF162IBS "t") + (UNSPEC_VCVTTNEBF162IUBS "t")]) + +(define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>nebf162i<sat_cvt_sign_prefix>bs<mode><mask_name>" + [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") + (unspec:<sseintvecmode> + [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")] + UNSPEC_CVTNE_BF16_IBS_ITER))] + "TARGET_AVX10_2_256" + "vcvt<sat_cvt_trunc_prefix>nebf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_int_iterator UNSPEC_CVT_PH_IBS_ITER + [UNSPEC_VCVTPH2IBS + UNSPEC_VCVTPH2IUBS]) + +(define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>" + [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") + (unspec:<sseintvecmode> + [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")] + UNSPEC_CVT_PH_IBS_ITER))] + "TARGET_AVX10_2_256 && <round_mode_condition>" + "vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_int_iterator UNSPEC_CVTT_PH_IBS_ITER + [UNSPEC_VCVTTPH2IBS + UNSPEC_VCVTTPH2IUBS]) + +(define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>" + [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") + (unspec:<sseintvecmode> + [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + UNSPEC_CVTT_PH_IBS_ITER))] + "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>" + "vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_int_iterator UNSPEC_CVT_PS_IBS_ITER + [UNSPEC_VCVTPS2IBS + UNSPEC_VCVTPS2IUBS]) + +(define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>" + [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") + (unspec:<sseintvecmode> + [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")] + UNSPEC_CVT_PS_IBS_ITER))] + "TARGET_AVX10_2_256 && <round_mode_condition>" + "vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_int_iterator UNSPEC_CVTT_PS_IBS_ITER + [UNSPEC_VCVTTPS2IBS + UNSPEC_VCVTTPS2IUBS]) + +(define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>" + [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") + (unspec:<sseintvecmode> + [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + UNSPEC_CVTT_PS_IBS_ITER))] + "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>" + "vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index df4cfdfff8df..be2fb5ae15a2 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -1035,6 +1035,26 @@ #define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D) #define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D) +/* avx10_2-512satcvtintrin.h */ +#define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8) + +/* avx10_2satcvtintrin.h */ +#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvtps2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8) + #include <wmmintrin.h> #include <immintrin.h> #include <mm3dnow.h> diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c new file mode 100644 index 000000000000..84826c0fe5a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c @@ -0,0 +1,100 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx10.2 -mavx10.2-512" } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512 x; +volatile __m512h xh; +volatile __m512i xi; +volatile __m512bh xbh; +volatile __mmask8 m8; +volatile __mmask16 m16; +volatile __mmask32 m32; + +void extern +avx10_2_test (void) +{ + xi = _mm512_ipcvt_roundph_epi16 (xh, 4); + xi = _mm512_mask_ipcvt_roundph_epi16 (xi, m32, xh, 8); + xi = _mm512_maskz_ipcvt_roundph_epi16 (m32, xh, 11); + + xi = _mm512_ipcvt_roundph_epu16 (xh, 4); + xi = _mm512_mask_ipcvt_roundph_epu16 (xi, m32, xh, 8); + xi = _mm512_maskz_ipcvt_roundph_epu16 (m32, xh, 11); + + xi = _mm512_ipcvtt_roundph_epi16 (xh, 4); + xi = _mm512_mask_ipcvtt_roundph_epi16 (xi, m32, xh, 8); + xi = _mm512_maskz_ipcvtt_roundph_epi16 (m32, xh, 8); + + xi = _mm512_ipcvtt_roundph_epu16 (xh, 4); + xi = _mm512_mask_ipcvtt_roundph_epu16 (xi, m32, xh, 8); + xi = _mm512_maskz_ipcvtt_roundph_epu16 (m32, xh, 8); + + xi = _mm512_ipcvt_roundps_epi32 (x, 4); + xi = _mm512_mask_ipcvt_roundps_epi32 (xi, m16, x, 8); + xi = _mm512_maskz_ipcvt_roundps_epi32 (m16, x, 11); + + xi = _mm512_ipcvt_roundps_epu32 (x, 4); + xi = _mm512_mask_ipcvt_roundps_epu32 (xi, m16, x, 8); + xi = _mm512_maskz_ipcvt_roundps_epu32 (m16, x, 11); + + xi = _mm512_ipcvtt_roundps_epi32 (x, 4); + xi = _mm512_mask_ipcvtt_roundps_epi32 (xi, m16, x, 8); + xi = _mm512_maskz_ipcvtt_roundps_epi32 (m16, x, 8); + + xi = _mm512_ipcvtt_roundps_epu32 (x, 4); + xi = _mm512_mask_ipcvtt_roundps_epu32 (xi, m16, x, 8); + xi = _mm512_maskz_ipcvtt_roundps_epu32 (m16, x, 8); + + xi = _mm512_ipcvtnebf16_epi16 (xbh); + xi = _mm512_mask_ipcvtnebf16_epi16 (xi, m32, xbh); + xi = _mm512_maskz_ipcvtnebf16_epi16 (m32, xbh); + + xi = _mm512_ipcvtnebf16_epu16 (xbh); + xi = _mm512_mask_ipcvtnebf16_epu16 (xi, m32, xbh); + xi = _mm512_maskz_ipcvtnebf16_epu16 (m32, xbh); + + xi = _mm512_ipcvttnebf16_epi16 (xbh); + xi = _mm512_mask_ipcvttnebf16_epi16 (xi, m32, xbh); + xi = _mm512_maskz_ipcvttnebf16_epi16 (m32, xbh); + + xi = _mm512_ipcvttnebf16_epu16 (xbh); + xi = _mm512_mask_ipcvttnebf16_epu16 (xi, m32, xbh); + xi = _mm512_maskz_ipcvttnebf16_epu16 (m32, xbh); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c new file mode 100644 index 000000000000..489927ee065c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtnebf162ibs-2.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <math.h> +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (__bf16 *s, short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > SCHAR_MAX) + tmp = SCHAR_MAX; + else if (s[i] < SCHAR_MIN) + tmp = SCHAR_MIN; + else + tmp = nearbyint(_mm_cvtsbh_ss(s[i])); + r[i] = (unsigned short)tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, bf16_bf) s; + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = INTRINSIC (_ipcvtnebf16_epi16) (s.x); + res2.x = INTRINSIC (_mask_ipcvtnebf16_epi16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvtnebf16_epi16) (mask, s.x); + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtnebf162iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtnebf162iubs-2.c new file mode 100644 index 000000000000..f901f41ea8ba --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtnebf162iubs-2.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <math.h> +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (__bf16 *s, unsigned short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UCHAR_MAX) + tmp = UCHAR_MAX; + else if (s[i] < 0) + tmp = 0; + else + tmp = nearbyint(_mm_cvtsbh_ss(s[i])); + r[i] = (unsigned short)tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, bf16_bf) s; + UNION_TYPE (AVX512F_LEN, i_uw) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = INTRINSIC (_ipcvtnebf16_epu16) (s.x); + res2.x = INTRINSIC (_mask_ipcvtnebf16_epu16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvtnebf16_epu16) (mask, s.x); + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (i_uw) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uw) (res2, res_ref)) + abort (); + + MASK_ZERO (i_uw) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uw) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c new file mode 100644 index 000000000000..4ce8dd06bdca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (_Float16 *s, short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > SCHAR_MAX) + tmp = SCHAR_MAX; + else if (s[i] < SCHAR_MIN) + tmp = SCHAR_MIN; + else + tmp = __builtin_nearbyintf16(s[i]); + r[i] = (unsigned short) tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, h) s; + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvtph_epi16) (s.x); + res2.x = INTRINSIC (_mask_ipcvtph_epi16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvtph_epi16) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvt_roundph_epi16) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvt_roundph_epi16) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvt_roundph_epi16) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c new file mode 100644 index 000000000000..f78d6c7ee9e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (_Float16 *s, short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UCHAR_MAX) + tmp = UCHAR_MAX; + else if (s[i] < 0) + tmp = 0; + else + tmp = __builtin_nearbyintf16(s[i]); + r[i] = (unsigned short) tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, h) s; + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvtph_epu16) (s.x); + res2.x = INTRINSIC (_mask_ipcvtph_epu16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvtph_epu16) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvt_roundph_epu16) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvt_roundph_epu16) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvt_roundph_epu16) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c new file mode 100644 index 000000000000..4852a8bd6ddf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c @@ -0,0 +1,75 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> +#include <math.h> + +#define SRC_SIZE ((AVX512F_LEN) / 32) +#define DST_SIZE ((AVX512F_LEN) / 32) + +static void +CALC (float *s, int *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > SCHAR_MAX) + tmp = SCHAR_MAX; + else if (s[i] < SCHAR_MIN) + tmp = SCHAR_MIN; + else + tmp = nearbyint(s[i]); + r[i] = (unsigned int) tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvtps_epi32) (s.x); + res2.x = INTRINSIC (_mask_ipcvtps_epi32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvtps_epi32) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvt_roundps_epi32) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvt_roundps_epi32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvt_roundps_epi32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) + abort (); + + MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c new file mode 100644 index 000000000000..6e0ad7d150ce --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c @@ -0,0 +1,73 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> +#include <math.h> + +#define SRC_SIZE ((AVX512F_LEN) / 32) +#define DST_SIZE ((AVX512F_LEN) / 32) + +static void +CALC (float *s, int *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UCHAR_MAX) + r[i] = UCHAR_MAX; + else if (s[i] < 0) + r[i] = 0; + else + r[i] = nearbyint(s[i]); + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvtps_epu32) (s.x); + res2.x = INTRINSIC (_mask_ipcvtps_epu32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvtps_epu32) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvt_roundps_epu32) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvt_roundps_epu32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvt_roundps_epu32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) + abort (); + + MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttnebf162ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttnebf162ibs-2.c new file mode 100644 index 000000000000..23de8234aa68 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttnebf162ibs-2.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <math.h> +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (__bf16 *s, short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > SCHAR_MAX) + tmp = SCHAR_MAX; + else if (s[i] < SCHAR_MIN) + tmp = SCHAR_MIN; + else + tmp = s[i]; + r[i] = (unsigned short)tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, bf16_bf) s; + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = INTRINSIC (_ipcvttnebf16_epi16) (s.x); + res2.x = INTRINSIC (_mask_ipcvttnebf16_epi16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvttnebf16_epi16) (mask, s.x); + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttnebf162iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttnebf162iubs-2.c new file mode 100644 index 000000000000..858d8e73a007 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttnebf162iubs-2.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <math.h> +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (__bf16 *s, short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UCHAR_MAX) + tmp = UCHAR_MAX; + else if (s[i] < 0) + tmp = 0; + else + tmp = s[i]; + r[i] = (unsigned short) tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, bf16_bf) s; + UNION_TYPE (AVX512F_LEN, i_uw) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + res1.x = INTRINSIC (_ipcvttnebf16_epu16) (s.x); + res2.x = INTRINSIC (_mask_ipcvttnebf16_epu16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvttnebf16_epu16) (mask, s.x); + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (i_uw) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uw) (res2, res_ref)) + abort (); + + MASK_ZERO (i_uw) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_uw) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c new file mode 100644 index 000000000000..e2624fb64b26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (_Float16 *s, short *r) +{ + int i; + char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > SCHAR_MAX) + tmp = SCHAR_MAX; + else if (s[i] < SCHAR_MIN) + tmp = SCHAR_MIN; + else + tmp = s[i]; + r[i] = (unsigned char) tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, h) s; + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvttph_epi16) (s.x); + res2.x = INTRINSIC (_mask_ipcvttph_epi16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvttph_epi16) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvtt_roundph_epi16) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvtt_roundph_epi16) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvtt_roundph_epi16) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c new file mode 100644 index 000000000000..d98a462c4b38 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> + +#define SRC_SIZE ((AVX512F_LEN) / 16) +#define DST_SIZE ((AVX512F_LEN) / 16) + +static void +CALC (_Float16 *s, short *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UCHAR_MAX) + tmp = UCHAR_MAX; + else if (s[i] < 0) + tmp = 0; + else + tmp = s[i]; + r[i] = (unsigned short) tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, h) s; + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + short res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvttph_epu16) (s.x); + res2.x = INTRINSIC (_mask_ipcvttph_epu16) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvttph_epu16) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvtt_roundph_epu16) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvtt_roundph_epu16) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvtt_roundph_epu16) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c new file mode 100644 index 000000000000..47136108a6bd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c @@ -0,0 +1,75 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> +#include <math.h> + +#define SRC_SIZE ((AVX512F_LEN) / 32) +#define DST_SIZE ((AVX512F_LEN) / 32) + +static void +CALC (float *s, int *r) +{ + int i; + unsigned char tmp; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > SCHAR_MAX) + tmp = SCHAR_MAX; + else if (s[i] < SCHAR_MIN) + tmp = SCHAR_MIN; + else + tmp = s[i]; + r[i] = (unsigned int)tmp; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvttps_epi32) (s.x); + res2.x = INTRINSIC (_mask_ipcvttps_epi32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvttps_epi32) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvtt_roundps_epi32) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvtt_roundps_epi32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvtt_roundps_epi32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) + abort (); + + MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c new file mode 100644 index 000000000000..f753dd5a7075 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c @@ -0,0 +1,73 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include <limits.h> +#include <math.h> + +#define SRC_SIZE ((AVX512F_LEN) / 32) +#define DST_SIZE ((AVX512F_LEN) / 32) + +static void +CALC (float *s, int *r) +{ + int i; + + for (i = 0; i < SRC_SIZE; i++) + { + if (s[i] > UCHAR_MAX) + r[i] = UCHAR_MAX; + else if (s[i] < 0) + r[i] = 0; + else + r[i] = s[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, ) s; + UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; + MASK_TYPE mask = MASK_VALUE; + int res_ref[DST_SIZE] = { 0 }; + int i, sign = 1; + + for (i = 0; i < SRC_SIZE; i++) + { + s.a[i] = 1.23 * (i + 2) * sign; + sign = -sign; + } + + for (i = 0; i < DST_SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + +#if AVX512F_LEN == 128 + res1.x = INTRINSIC (_ipcvttps_epu32) (s.x); + res2.x = INTRINSIC (_mask_ipcvttps_epu32) (res2.x, mask, s.x); + res3.x = INTRINSIC (_maskz_ipcvttps_epu32) (mask, s.x); +#else + res1.x = INTRINSIC (_ipcvtt_roundps_epu32) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvtt_roundps_epu32) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvtt_roundps_epu32) (mask, s.x, 8); +#endif + + CALC (s.a, res_ref); + + if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) + abort (); + + MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) + abort (); + + MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE); + if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c new file mode 100644 index 000000000000..f04e3ecb6427 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c @@ -0,0 +1,187 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtps2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttps2iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162ibs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128 hx; +volatile __m128i hxi; +volatile __m128h hxh; +volatile __m128bh hxbh; +volatile __m256 x; +volatile __m256h xh; +volatile __m256i xi; +volatile __m256bh xbh; +volatile __mmask8 m8; +volatile __mmask16 m16; + +void extern +avx10_2_test (void) +{ + xi = _mm256_ipcvt_roundph_epi16 (xh, 4); + xi = _mm256_mask_ipcvt_roundph_epi16 (xi, m16, xh, 8); + xi = _mm256_maskz_ipcvt_roundph_epi16 (m16, xh, 11); + + xi = _mm256_ipcvt_roundph_epu16 (xh, 4); + xi = _mm256_mask_ipcvt_roundph_epu16 (xi, m16, xh, 8); + xi = _mm256_maskz_ipcvt_roundph_epu16 (m16, xh, 11); + + xi = _mm256_ipcvtt_roundph_epi16 (xh, 4); + xi = _mm256_mask_ipcvtt_roundph_epi16 (xi, m16, xh, 8); + xi = _mm256_maskz_ipcvtt_roundph_epi16 (m16, xh, 8); + + xi = _mm256_ipcvtt_roundph_epu16 (xh, 4); + xi = _mm256_mask_ipcvtt_roundph_epu16 (xi, m16, xh, 8); + xi = _mm256_maskz_ipcvtt_roundph_epu16 (m16, xh, 8); + + xi = _mm256_ipcvt_roundps_epi32 (x, 4); + xi = _mm256_mask_ipcvt_roundps_epi32 (xi, m8, x, 8); + xi = _mm256_maskz_ipcvt_roundps_epi32 (m8, x, 11); + + xi = _mm256_ipcvt_roundps_epu32 (x, 4); + xi = _mm256_mask_ipcvt_roundps_epu32 (xi, m8, x, 8); + xi = _mm256_maskz_ipcvt_roundps_epu32 (m8, x, 11); + + xi = _mm256_ipcvtt_roundps_epi32 (x, 4); + xi = _mm256_mask_ipcvtt_roundps_epi32 (xi, m8, x, 8); + xi = _mm256_maskz_ipcvtt_roundps_epi32 (m8, x, 8); + + xi = _mm256_ipcvtt_roundps_epu32 (x, 4); + xi = _mm256_mask_ipcvtt_roundps_epu32 (xi, m8, x, 8); + xi = _mm256_maskz_ipcvtt_roundps_epu32 (m8, x, 8); + + xi = _mm256_ipcvtnebf16_epi16 (xbh); + xi = _mm256_mask_ipcvtnebf16_epi16 (xi, m16, xbh); + xi = _mm256_maskz_ipcvtnebf16_epi16 (m16, xbh); + + xi = _mm256_ipcvtnebf16_epu16 (xbh); + xi = _mm256_mask_ipcvtnebf16_epu16 (xi, m16, xbh); + xi = _mm256_maskz_ipcvtnebf16_epu16 (m16, xbh); + + xi = _mm256_ipcvttnebf16_epi16 (xbh); + xi = _mm256_mask_ipcvttnebf16_epi16 (xi, m16, xbh); + xi = _mm256_maskz_ipcvttnebf16_epi16 (m16, xbh); + + xi = _mm256_ipcvttnebf16_epu16 (xbh); + xi = _mm256_mask_ipcvttnebf16_epu16 (xi, m16, xbh); + xi = _mm256_maskz_ipcvttnebf16_epu16 (m16, xbh); + + hxi = _mm_ipcvtph_epi16 (hxh); + hxi = _mm_mask_ipcvtph_epi16 (hxi, m8, hxh); + hxi = _mm_maskz_ipcvtph_epi16 (m8, hxh); + + hxi = _mm_ipcvtph_epu16 (hxh); + hxi = _mm_mask_ipcvtph_epu16 (hxi, m8, hxh); + hxi = _mm_maskz_ipcvtph_epu16 (m8, hxh); + + hxi = _mm_ipcvttph_epi16 (hxh); + hxi = _mm_mask_ipcvttph_epi16 (hxi, m8, hxh); + hxi = _mm_maskz_ipcvttph_epi16 (m8, hxh); + + hxi = _mm_ipcvttph_epu16 (hxh); + hxi = _mm_mask_ipcvttph_epu16 (hxi, m8, hxh); + hxi = _mm_maskz_ipcvttph_epu16 (m8, hxh); + + hxi = _mm_ipcvtps_epi32 (hx); + hxi = _mm_mask_ipcvtps_epi32 (hxi, m8, hx); + hxi = _mm_maskz_ipcvtps_epi32 (m8, hx); + + hxi = _mm_ipcvtps_epu32 (hx); + hxi = _mm_mask_ipcvtps_epu32 (hxi, m8, hx); + hxi = _mm_maskz_ipcvtps_epu32 (m8, hx); + + hxi = _mm_ipcvttps_epi32 (hx); + hxi = _mm_mask_ipcvttps_epi32 (hxi, m8, hx); + hxi = _mm_maskz_ipcvttps_epi32 (m8, hx); + + hxi = _mm_ipcvttps_epu32 (hx); + hxi = _mm_mask_ipcvttps_epu32 (hxi, m8, hx); + hxi = _mm_maskz_ipcvttps_epu32 (m8, hx); + + hxi = _mm_ipcvtnebf16_epi16 (hxbh); + hxi = _mm_mask_ipcvtnebf16_epi16 (hxi, m8, hxbh); + hxi = _mm_maskz_ipcvtnebf16_epi16 (m8, hxbh); + + hxi = _mm_ipcvtnebf16_epu16 (hxbh); + hxi = _mm_mask_ipcvtnebf16_epu16 (hxi, m8, hxbh); + hxi = _mm_maskz_ipcvtnebf16_epu16 (m8, hxbh); + + hxi = _mm_ipcvttnebf16_epi16 (hxbh); + hxi = _mm_mask_ipcvttnebf16_epi16 (hxi, m8, hxbh); + hxi = _mm_maskz_ipcvttnebf16_epi16 (m8, hxbh); + + hxi = _mm_ipcvttnebf16_epu16 (hxbh); + hxi = _mm_mask_ipcvttnebf16_epu16 (hxi, m8, hxbh); + hxi = _mm_maskz_ipcvttnebf16_epu16 (m8, hxbh); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtnebf162ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtnebf162ibs-2.c new file mode 100644 index 000000000000..130f19b253a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtnebf162ibs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtnebf162ibs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtnebf162ibs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtnebf162iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtnebf162iubs-2.c new file mode 100644 index 000000000000..af6ec54236f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtnebf162iubs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtnebf162iubs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtnebf162iubs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2ibs-2.c new file mode 100644 index 000000000000..9954fc14c356 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2ibs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2ibs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2ibs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2iubs-2.c new file mode 100644 index 000000000000..9bb25190af04 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2iubs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2iubs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2iubs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtps2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtps2ibs-2.c new file mode 100644 index 000000000000..ce76ed780ebe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtps2ibs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtps2ibs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtps2ibs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttnebf162ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttnebf162ibs-2.c new file mode 100644 index 000000000000..8eaf7bcff26b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttnebf162ibs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttnebf162ibs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttnebf162ibs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttnebf162iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttnebf162iubs-2.c new file mode 100644 index 000000000000..c12964a43573 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttnebf162iubs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttnebf162iubs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttnebf162iubs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttph2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttph2ibs-2.c new file mode 100644 index 000000000000..e8a4abb83a46 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttph2ibs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttph2ibs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttph2ibs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttph2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttph2iubs-2.c new file mode 100644 index 000000000000..3683ed0dc100 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttph2iubs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttph2iubs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttph2iubs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2ibs-2.c new file mode 100644 index 000000000000..4f8d45801720 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2ibs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2ibs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2ibs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2iubs-2.c new file mode 100644 index 000000000000..defd38540bfb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2iubs-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2iubs-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvttps2iubs-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h index b61c03b4781e..b49ff061f787 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-helper.h +++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h @@ -48,6 +48,7 @@ MAKE_MASK_MERGE(i_uw, unsigned short) MAKE_MASK_MERGE(bf16_uw, unsigned short) MAKE_MASK_MERGE(i_ud, unsigned int) MAKE_MASK_MERGE(i_uq, unsigned long long) +MAKE_MASK_MERGE(bf16_bf, __bf16) #define MASK_MERGE(TYPE) merge_masking_##TYPE @@ -74,6 +75,7 @@ MAKE_MASK_ZERO(i_uw, unsigned short) MAKE_MASK_ZERO(bf16_uw, unsigned short) MAKE_MASK_ZERO(i_ud, unsigned int) MAKE_MASK_ZERO(i_uq, unsigned long long) +MAKE_MASK_ZERO(bf16_bf, __bf16) #define MASK_ZERO(TYPE) zero_masking_##TYPE diff --git a/gcc/testsuite/gcc.target/i386/m512-check.h b/gcc/testsuite/gcc.target/i386/m512-check.h index bdc682d63bbd..f22dda2113f5 100644 --- a/gcc/testsuite/gcc.target/i386/m512-check.h +++ b/gcc/testsuite/gcc.target/i386/m512-check.h @@ -73,6 +73,12 @@ typedef union unsigned short a[32]; } union512bf16_uw; +typedef union +{ + __m512bh x; + __bf16 a[32]; +} union512bf16_bf; + typedef union { __m128h x; @@ -97,6 +103,18 @@ typedef union unsigned short a[16]; } union256bf16_uw; +typedef union +{ + __m128bh x; + __bf16 a[8]; +} union128bf16_bf; + +typedef union +{ + __m256bh x; + __bf16 a[16]; +} union256bf16_bf; + #define CHECK_ROUGH_EXP(UNION_TYPE, VALUE_TYPE, FMT) \ static int \ __attribute__((noinline, unused)) \ @@ -176,9 +194,12 @@ CHECK_ROUGH_EXP (union256h, _Float16, "%f") #if defined(AVX512BF16) CHECK_EXP (union512bf16_uw, unsigned short, "%d") +CHECK_EXP (union512bf16_bf, __bf16, "%f") #endif #if defined(AVX512BF16) CHECK_EXP (union128bf16_uw, unsigned short, "%d") CHECK_EXP (union256bf16_uw, unsigned short, "%d") +CHECK_EXP (union128bf16_bf, __bf16, "%f") +CHECK_EXP (union256bf16_bf, __bf16, "%f") #endif diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index e92d04af3f57..5669fa1aa00d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1043,4 +1043,24 @@ #define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D) #define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D) +/* avx10_2-512satcvtintrin.h */ +#define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8) + +/* avx10_2satcvtintrin.h */ +#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvtps2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8) + #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 49a82d8a2d57..550d2633b78c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1431,3 +1431,55 @@ test_2 (_mm256_cmp_pbh_mask, __mmask16, __m256bh, __m256bh, 1) test_2 (_mm_cmp_pbh_mask, __mmask8, __m128bh, __m128bh, 1) test_3 (_mm256_mask_cmp_pbh_mask, __mmask16, __mmask16, __m256bh, __m256bh, 1) test_3 (_mm_mask_cmp_pbh_mask, __mmask8, __mmask8, __m128bh, __m128bh, 1) + +/* avx10_2-512satcvtintrin.h */ +test_1 (_mm512_ipcvt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvt_roundps_epi32, __m512i, __m512, 8) +test_1 (_mm512_ipcvt_roundps_epu32, __m512i, __m512, 8) +test_1 (_mm512_ipcvtt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvtt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvtt_roundps_epi32, __m512i, __m512, 8) +test_1 (_mm512_ipcvtt_roundps_epu32, __m512i, __m512, 8) +test_2 (_mm512_maskz_ipcvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvt_roundps_epi32, __m512i, __mmask16, __m512, 8) +test_2 (_mm512_maskz_ipcvt_roundps_epu32, __m512i, __mmask16, __m512, 8) +test_2 (_mm512_maskz_ipcvtt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvtt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvtt_roundps_epi32, __m512i, __mmask16, __m512, 8) +test_2 (_mm512_maskz_ipcvtt_roundps_epu32, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) + +/* avx10_2satcvtintrin.h */ +test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvt_roundph_epu16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvt_roundps_epi32, __m256i, __m256, 8) +test_1 (_mm256_ipcvt_roundps_epu32, __m256i, __m256, 8) +test_1 (_mm256_ipcvtt_roundph_epi16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvtt_roundph_epu16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvtt_roundps_epi32, __m256i, __m256, 8) +test_1 (_mm256_ipcvtt_roundps_epu32, __m256i, __m256, 8) +test_2 (_mm256_maskz_ipcvt_roundph_epi16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvt_roundph_epu16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvt_roundps_epi32, __m256i, __mmask8, __m256, 8) +test_2 (_mm256_maskz_ipcvt_roundps_epu32, __m256i, __mmask8, __m256, 8) +test_2 (_mm256_maskz_ipcvtt_roundph_epi16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvtt_roundph_epu16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvtt_roundps_epi32, __m256i, __mmask8, __m256, 8) +test_2 (_mm256_maskz_ipcvtt_roundps_epu32, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvtt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 193057a47197..ba67ee269149 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -1470,3 +1470,55 @@ test_2 (_mm256_cmp_pbh_mask, __mmask16, __m256bh, __m256bh, 1) test_2 (_mm_cmp_pbh_mask, __mmask8, __m128bh, __m128bh, 1) test_3 (_mm256_mask_cmp_pbh_mask, __mmask16, __mmask16, __m256bh, __m256bh, 1) test_3 (_mm_mask_cmp_pbh_mask, __mmask8, __mmask8, __m128bh, __m128bh, 1) + +/* avx10_2-512satcvtintrin.h */ +test_1 (_mm512_ipcvt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvt_roundps_epi32, __m512i, __m512, 8) +test_1 (_mm512_ipcvt_roundps_epu32, __m512i, __m512, 8) +test_1 (_mm512_ipcvtt_roundph_epi16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvtt_roundph_epu16, __m512i, __m512h, 8) +test_1 (_mm512_ipcvtt_roundps_epi32, __m512i, __m512, 8) +test_1 (_mm512_ipcvtt_roundps_epu32, __m512i, __m512, 8) +test_2 (_mm512_maskz_ipcvt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvt_roundps_epi32, __m512i, __mmask16, __m512, 8) +test_2 (_mm512_maskz_ipcvt_roundps_epu32, __m512i, __mmask16, __m512, 8) +test_2 (_mm512_maskz_ipcvtt_roundph_epi16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvtt_roundph_epu16, __m512i, __mmask32, __m512h, 8) +test_2 (_mm512_maskz_ipcvtt_roundps_epi32, __m512i, __mmask16, __m512, 8) +test_2 (_mm512_maskz_ipcvtt_roundps_epu32, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8) +test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8) +test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8) + +/* avx10_2satcvtintrin.h */ +test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvt_roundph_epu16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvt_roundps_epi32, __m256i, __m256, 8) +test_1 (_mm256_ipcvt_roundps_epu32, __m256i, __m256, 8) +test_1 (_mm256_ipcvtt_roundph_epi16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvtt_roundph_epu16, __m256i, __m256h, 8) +test_1 (_mm256_ipcvtt_roundps_epi32, __m256i, __m256, 8) +test_1 (_mm256_ipcvtt_roundps_epu32, __m256i, __m256, 8) +test_2 (_mm256_maskz_ipcvt_roundph_epi16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvt_roundph_epu16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvt_roundps_epi32, __m256i, __mmask8, __m256, 8) +test_2 (_mm256_maskz_ipcvt_roundps_epu32, __m256i, __mmask8, __m256, 8) +test_2 (_mm256_maskz_ipcvtt_roundph_epi16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvtt_roundph_epu16, __m256i, __mmask16, __m256h, 8) +test_2 (_mm256_maskz_ipcvtt_roundps_epi32, __m256i, __mmask8, __m256, 8) +test_2 (_mm256_maskz_ipcvtt_roundps_epu32, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvtt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8) +test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8) +test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index a33eb9945dd7..7e8b5d018717 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1017,6 +1017,26 @@ #define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D) #define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D) +/* avx10_2-512satcvtintrin.h */ +#define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvtps2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8) + +/* avx10_2satcvtintrin.h */ +#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtps2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvtps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvtps2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8) +#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8) + #pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512") #include <x86intrin.h>