https://gcc.gnu.org/g:d584660fd44c714855c5295b45cb4a06f1d82e58
commit r15-7134-gd584660fd44c714855c5295b45cb4a06f1d82e58 Author: Haochen Jiang <haochen.ji...@intel.com> Date: Thu Jan 23 09:51:57 2025 +0800 i386: Change mnemonics from VF[,N]M[ADD,SUB][132,213,231]NEPBF16 to VF[,N]M[ADD,SUB][132,213,231]BF16 gcc/ChangeLog: PR target/118270 * config/i386/avx10_2-512bf16intrin.h: Change intrin and builtin names according to new mnemonics. * config/i386/avx10_2bf16intrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/sse.md (avx10_2_fmaddnepbf16_<mode>_maskz): Rename to... (avx10_2_fmaddbf16_<mode>_maskz): ...this. Adjust emit_insn. (avx10_2_fmaddnepbf16_<mode><sd_maskz_name>): Rename to... (avx10_2_fmaddbf16_<mode><sd_maskz_name>): ...this. Change instruction name output. (avx10_2_fmaddnepbf16_<mode>_mask): Rename to... (avx10_2_fmaddbf16_<mode>_mask): ...this. Change instruction name output. (avx10_2_fmaddnepbf16_<mode>_mask3): Rename to... (avx10_2_fmaddbf16_<mode>_mask3): ...this. Change instruction name output. (avx10_2_fnmaddnepbf16_<mode>_maskz): Rename to... (avx10_2_fnmaddbf16_<mode>_maskz): ...this. Adjust emit_insn. (avx10_2_fnmaddnepbf16_<mode><sd_maskz_name>): Rename to... (avx10_2_fnmaddbf16_<mode><sd_maskz_name>): ...this. Change instruction name output. (avx10_2_fnmaddnepbf16_<mode>_mask): Rename to... (avx10_2_fnmaddbf16_<mode>_mask): ...this. Change instruction name output. (avx10_2_fnmaddnepbf16_<mode>_mask3): Rename to... (avx10_2_fnmaddbf16_<mode>_mask3): ...this. Change instruction name output. (avx10_2_fmsubnepbf16_<mode>_maskz): Rename to... (avx10_2_fmsubbf16_<mode>_maskz): ...this. Adjust emit_insn. (avx10_2_fmsubnepbf16_<mode><sd_maskz_name>): Rename to... (avx10_2_fmsubbf16_<mode><sd_maskz_name>): ...this. Change instruction name output. (avx10_2_fmsubnepbf16_<mode>_mask): Rename to... (avx10_2_fmsubbf16_<mode>_mask): ...this. Change instruction name output. (avx10_2_fmsubnepbf16_<mode>_mask3): Rename to... (avx10_2_fmsubbf16_<mode>_mask3): ...this. Change instruction name output. (avx10_2_fnmsubnepbf16_<mode>_maskz): Rename to... (avx10_2_fnmsubbf16_<mode>_maskz): ...this. Adjust emit_insn. (avx10_2_fnmsubnepbf16_<mode><sd_maskz_name>): Rename to... (avx10_2_fnmsubbf16_<mode><sd_maskz_name>): ...this. Change instruction name output. (avx10_2_fnmsubnepbf16_<mode>_mask): Rename to... (avx10_2_fnmsubbf16_<mode>_mask): ...this. Change instruction name output. (avx10_2_fnmsubnepbf16_<mode>_mask3): Rename to... (avx10_2_fnmsubbf16_<mode>_mask3): ...this. Change instruction name output. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-512-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-512-bf-vector-fma-1.c: Move to... * gcc.target/i386/avx10_2-512-bf16-vector-fma-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vfmaddXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vfmsubXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vfnmaddXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vfnmsubXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-bf-vector-fma-1.c: Move to... * gcc.target/i386/avx10_2-bf16-vector-fma-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c: Move to... * gcc.target/i386/avx10_2-partial-bf16-vector-fma-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vfmaddXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vfmsubXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vfnmaddXXXbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vfnmsubXXXbf16-2.c: ...here. Adjust intrin call. Diff: --- gcc/config/i386/avx10_2-512bf16intrin.h | 86 +++++----- gcc/config/i386/avx10_2bf16intrin.h | 176 ++++++++++----------- gcc/config/i386/i386-builtin.def | 72 ++++----- gcc/config/i386/sse.md | 148 ++++++++--------- .../gcc.target/i386/avx10_2-512-bf-vector-fma-1.c | 34 ---- gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c | 64 ++++---- .../i386/avx10_2-512-bf16-vector-fma-1.c | 34 ++++ ...XXnepbf16-2.c => avx10_2-512-vfmaddXXXbf16-2.c} | 4 +- ...XXnepbf16-2.c => avx10_2-512-vfmsubXXXbf16-2.c} | 4 +- ...Xnepbf16-2.c => avx10_2-512-vfnmaddXXXbf16-2.c} | 4 +- ...Xnepbf16-2.c => avx10_2-512-vfnmsubXXXbf16-2.c} | 4 +- .../gcc.target/i386/avx10_2-bf-vector-fma-1.c | 63 -------- gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c | 128 +++++++-------- .../gcc.target/i386/avx10_2-bf16-vector-fma-1.c | 63 ++++++++ .../i386/avx10_2-partial-bf-vector-fma-1.c | 61 ------- .../i386/avx10_2-partial-bf16-vector-fma-1.c | 61 +++++++ ...addXXXnepbf16-2.c => avx10_2-vfmaddXXXbf16-2.c} | 4 +- ...subXXXnepbf16-2.c => avx10_2-vfmsubXXXbf16-2.c} | 4 +- ...ddXXXnepbf16-2.c => avx10_2-vfnmaddXXXbf16-2.c} | 4 +- ...ubXXXnepbf16-2.c => avx10_2-vfnmsubXXXbf16-2.c} | 4 +- 20 files changed, 511 insertions(+), 511 deletions(-) diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h index 4d197afc7027..4570c8f6a6b9 100644 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ b/gcc/config/i386/avx10_2-512bf16intrin.h @@ -218,142 +218,142 @@ _mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1); + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmaddne_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) +_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmaddne_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) +_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) { return (__m512bh) - __builtin_ia32_fmaddnepbf16512_mask3 (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmaddne_pbh (__mmask32 __U, __m512bh __A, +_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fmaddnepbf16512_maskz (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1); + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmsubne_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) +_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmsubne_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) +_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) { return (__m512bh) - __builtin_ia32_fmsubnepbf16512_mask3 (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmsubne_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) +_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fmsubnepbf16512_maskz (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1); + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmaddne_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) +_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmaddne_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) +_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) { return (__m512bh) - __builtin_ia32_fnmaddnepbf16512_mask3 (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmaddne_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) +_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fnmaddnepbf16512_maskz (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1); + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmsubne_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) +_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmsubne_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) +_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) { return (__m512bh) - __builtin_ia32_fnmsubnepbf16512_mask3 (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmsubne_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) +_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) { return (__m512bh) - __builtin_ia32_fnmsubnepbf16512_maskz (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); } extern __inline__ __m512bh diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index 216e8bd32cdd..6c65a3c261a7 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -400,281 +400,281 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_fmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +_mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1); + __builtin_ia32_fmaddbf16256_mask (__A, __B, __C, (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_fmaddne_pbh (__m256bh __A, __mmask16 __U, - __m256bh __B, __m256bh __C) +_mm256_mask_fmadd_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16256_mask (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask3_fmaddne_pbh (__m256bh __A, __m256bh __B, - __m256bh __C, __mmask16 __U) +_mm256_mask3_fmadd_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) { return (__m256bh) - __builtin_ia32_fmaddnepbf16256_mask3 (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16256_mask3 (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_fmaddne_pbh (__mmask16 __U, __m256bh __A, - __m256bh __B, __m256bh __C) +_mm256_maskz_fmadd_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fmaddnepbf16256_maskz (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16256_maskz (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +_mm_fmadd_pbh (__m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1); + __builtin_ia32_fmaddbf16128_mask (__A, __B, __C, (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fmaddne_pbh (__m128bh __A, __mmask8 __U, - __m128bh __B, __m128bh __C) +_mm_mask_fmadd_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16128_mask (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask3_fmaddne_pbh (__m128bh __A, __m128bh __B, - __m128bh __C, __mmask8 __U) +_mm_mask3_fmadd_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) { return (__m128bh) - __builtin_ia32_fmaddnepbf16128_mask3 (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16128_mask3 (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_fmaddne_pbh (__mmask8 __U, __m128bh __A, - __m128bh __B, __m128bh __C) +_mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fmaddnepbf16128_maskz (__A, __B, __C, __U); + __builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_fmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +_mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1); + __builtin_ia32_fmsubbf16256_mask (__A, __B, __C, (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_fmsubne_pbh (__m256bh __A, __mmask16 __U, - __m256bh __B, __m256bh __C) +_mm256_mask_fmsub_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) { - return (__m256bh) __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, __U); + return (__m256bh) __builtin_ia32_fmsubbf16256_mask (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask3_fmsubne_pbh (__m256bh __A, __m256bh __B, - __m256bh __C, __mmask16 __U) +_mm256_mask3_fmsub_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) { return (__m256bh) - __builtin_ia32_fmsubnepbf16256_mask3 (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16256_mask3 (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_fmsubne_pbh (__mmask16 __U, __m256bh __A, - __m256bh __B, __m256bh __C) +_mm256_maskz_fmsub_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fmsubnepbf16256_maskz (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16256_maskz (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +_mm_fmsub_pbh (__m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1); + __builtin_ia32_fmsubbf16128_mask (__A, __B, __C, (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fmsubne_pbh (__m128bh __A, __mmask8 __U, - __m128bh __B, __m128bh __C) +_mm_mask_fmsub_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16128_mask (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask3_fmsubne_pbh (__m128bh __A, __m128bh __B, - __m128bh __C, __mmask8 __U) +_mm_mask3_fmsub_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) { return (__m128bh) - __builtin_ia32_fmsubnepbf16128_mask3 (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16128_mask3 (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_fmsubne_pbh (__mmask8 __U, __m128bh __A, - __m128bh __B, __m128bh __C) +_mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fmsubnepbf16128_maskz (__A, __B, __C, __U); + __builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_fnmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +_mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1); + __builtin_ia32_fnmaddbf16256_mask (__A, __B, __C, (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_fnmaddne_pbh (__m256bh __A, __mmask16 __U, - __m256bh __B, __m256bh __C) +_mm256_mask_fnmadd_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16256_mask (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask3_fnmaddne_pbh (__m256bh __A, __m256bh __B, - __m256bh __C, __mmask16 __U) +_mm256_mask3_fnmadd_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) { return (__m256bh) - __builtin_ia32_fnmaddnepbf16256_mask3 (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16256_mask3 (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_fnmaddne_pbh (__mmask16 __U, __m256bh __A, - __m256bh __B, __m256bh __C) +_mm256_maskz_fnmadd_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fnmaddnepbf16256_maskz (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16256_maskz (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fnmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +_mm_fnmadd_pbh (__m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1); + __builtin_ia32_fnmaddbf16128_mask (__A, __B, __C, (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fnmaddne_pbh (__m128bh __A, __mmask8 __U, - __m128bh __B, __m128bh __C) +_mm_mask_fnmadd_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16128_mask (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask3_fnmaddne_pbh (__m128bh __A, __m128bh __B, - __m128bh __C, __mmask8 __U) +_mm_mask3_fnmadd_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) { return (__m128bh) - __builtin_ia32_fnmaddnepbf16128_mask3 (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16128_mask3 (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_fnmaddne_pbh (__mmask8 __U, __m128bh __A, - __m128bh __B, __m128bh __C) +_mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fnmaddnepbf16128_maskz (__A, __B, __C, __U); + __builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_fnmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +_mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1); + __builtin_ia32_fnmsubbf16256_mask (__A, __B, __C, (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_fnmsubne_pbh (__m256bh __A, __mmask16 __U, - __m256bh __B, __m256bh __C) +_mm256_mask_fnmsub_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16256_mask (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask3_fnmsubne_pbh (__m256bh __A, __m256bh __B, - __m256bh __C, __mmask16 __U) +_mm256_mask3_fnmsub_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) { return (__m256bh) - __builtin_ia32_fnmsubnepbf16256_mask3 (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16256_mask3 (__A, __B, __C, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_fnmsubne_pbh (__mmask16 __U, __m256bh __A, - __m256bh __B, __m256bh __C) +_mm256_maskz_fnmsub_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) { return (__m256bh) - __builtin_ia32_fnmsubnepbf16256_maskz (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16256_maskz (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_fnmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +_mm_fnmsub_pbh (__m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1); + __builtin_ia32_fnmsubbf16128_mask (__A, __B, __C, (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_fnmsubne_pbh (__m128bh __A, __mmask8 __U, - __m128bh __B, __m128bh __C) +_mm_mask_fnmsub_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16128_mask (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask3_fnmsubne_pbh (__m128bh __A, __m128bh __B, - __m128bh __C, __mmask8 __U) +_mm_mask3_fnmsub_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) { return (__m128bh) - __builtin_ia32_fnmsubnepbf16128_mask3 (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16128_mask3 (__A, __B, __C, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_fnmsubne_pbh (__mmask8 __U, __m128bh __A, - __m128bh __B, __m128bh __C) +_mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) { return (__m128bh) - __builtin_ia32_fnmsubnepbf16128_maskz (__A, __B, __C, __U); + __builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U); } extern __inline__ __m256bh diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 1de85b7454fd..5a643ca662e8 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3221,42 +3221,42 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__b BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf_mask, "__builtin_ia32_scalefpbf16256_mask", IX86_BUILTIN_SCALEFPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf, "__builtin_ia32_scalefpbf16128", IX86_BUILTIN_SCALEFPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf_mask, "__builtin_ia32_scalefpbf16128_mask", IX86_BUILTIN_SCALEFPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask, "__builtin_ia32_fmaddnepbf16512_mask", IX86_BUILTIN_FMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask3, "__builtin_ia32_fmaddnepbf16512_mask3", IX86_BUILTIN_FMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_maskz, "__builtin_ia32_fmaddnepbf16512_maskz", IX86_BUILTIN_FMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask, "__builtin_ia32_fmaddnepbf16256_mask", IX86_BUILTIN_FMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask3, "__builtin_ia32_fmaddnepbf16256_mask3", IX86_BUILTIN_FMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_maskz, "__builtin_ia32_fmaddnepbf16256_maskz", IX86_BUILTIN_FMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask, "__builtin_ia32_fmaddnepbf16128_mask", IX86_BUILTIN_FMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask3, "__builtin_ia32_fmaddnepbf16128_mask3", IX86_BUILTIN_FMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_maskz, "__builtin_ia32_fmaddnepbf16128_maskz", IX86_BUILTIN_FMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask, "__builtin_ia32_fmsubnepbf16512_mask", IX86_BUILTIN_FMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask3, "__builtin_ia32_fmsubnepbf16512_mask3", IX86_BUILTIN_FMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_maskz, "__builtin_ia32_fmsubnepbf16512_maskz", IX86_BUILTIN_FMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask, "__builtin_ia32_fmsubnepbf16256_mask", IX86_BUILTIN_FMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask3, "__builtin_ia32_fmsubnepbf16256_mask3", IX86_BUILTIN_FMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_maskz, "__builtin_ia32_fmsubnepbf16256_maskz", IX86_BUILTIN_FMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask, "__builtin_ia32_fmsubnepbf16128_mask", IX86_BUILTIN_FMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask3, "__builtin_ia32_fmsubnepbf16128_mask3", IX86_BUILTIN_FMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_maskz, "__builtin_ia32_fmsubnepbf16128_maskz", IX86_BUILTIN_FMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask, "__builtin_ia32_fnmaddnepbf16512_mask", IX86_BUILTIN_FNMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask3, "__builtin_ia32_fnmaddnepbf16512_mask3", IX86_BUILTIN_FNMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_maskz, "__builtin_ia32_fnmaddnepbf16512_maskz", IX86_BUILTIN_FNMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask, "__builtin_ia32_fnmaddnepbf16256_mask", IX86_BUILTIN_FNMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask3, "__builtin_ia32_fnmaddnepbf16256_mask3", IX86_BUILTIN_FNMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_maskz, "__builtin_ia32_fnmaddnepbf16256_maskz", IX86_BUILTIN_FNMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask, "__builtin_ia32_fnmaddnepbf16128_mask", IX86_BUILTIN_FNMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask3, "__builtin_ia32_fnmaddnepbf16128_mask3", IX86_BUILTIN_FNMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_maskz, "__builtin_ia32_fnmaddnepbf16128_maskz", IX86_BUILTIN_FNMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask, "__builtin_ia32_fnmsubnepbf16512_mask", IX86_BUILTIN_FNMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask3, "__builtin_ia32_fnmsubnepbf16512_mask3", IX86_BUILTIN_FNMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_maskz, "__builtin_ia32_fnmsubnepbf16512_maskz", IX86_BUILTIN_FNMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask, "__builtin_ia32_fnmsubnepbf16256_mask", IX86_BUILTIN_FNMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask3, "__builtin_ia32_fnmsubnepbf16256_mask3", IX86_BUILTIN_FNMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_maskz, "__builtin_ia32_fnmsubnepbf16256_maskz", IX86_BUILTIN_FNMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask, "__builtin_ia32_fnmsubnepbf16128_mask", IX86_BUILTIN_FNMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask3, "__builtin_ia32_fnmsubnepbf16128_mask3", IX86_BUILTIN_FNMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_maskz, "__builtin_ia32_fnmsubnepbf16128_maskz", IX86_BUILTIN_FNMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddbf16_v32bf_mask, "__builtin_ia32_fmaddbf16512_mask", IX86_BUILTIN_FMADDBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddbf16_v32bf_mask3, "__builtin_ia32_fmaddbf16512_mask3", IX86_BUILTIN_FMADDBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddbf16_v32bf_maskz, "__builtin_ia32_fmaddbf16512_maskz", IX86_BUILTIN_FMADDBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddbf16_v16bf_mask, "__builtin_ia32_fmaddbf16256_mask", IX86_BUILTIN_FMADDBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddbf16_v16bf_mask3, "__builtin_ia32_fmaddbf16256_mask3", IX86_BUILTIN_FMADDBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddbf16_v16bf_maskz, "__builtin_ia32_fmaddbf16256_maskz", IX86_BUILTIN_FMADDBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddbf16_v8bf_mask, "__builtin_ia32_fmaddbf16128_mask", IX86_BUILTIN_FMADDBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddbf16_v8bf_mask3, "__builtin_ia32_fmaddbf16128_mask3", IX86_BUILTIN_FMADDBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddbf16_v8bf_maskz, "__builtin_ia32_fmaddbf16128_maskz", IX86_BUILTIN_FMADDBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubbf16_v32bf_mask, "__builtin_ia32_fmsubbf16512_mask", IX86_BUILTIN_FMSUBBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubbf16_v32bf_mask3, "__builtin_ia32_fmsubbf16512_mask3", IX86_BUILTIN_FMSUBBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubbf16_v32bf_maskz, "__builtin_ia32_fmsubbf16512_maskz", IX86_BUILTIN_FMSUBBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubbf16_v16bf_mask, "__builtin_ia32_fmsubbf16256_mask", IX86_BUILTIN_FMSUBBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubbf16_v16bf_mask3, "__builtin_ia32_fmsubbf16256_mask3", IX86_BUILTIN_FMSUBBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubbf16_v16bf_maskz, "__builtin_ia32_fmsubbf16256_maskz", IX86_BUILTIN_FMSUBBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubbf16_v8bf_mask, "__builtin_ia32_fmsubbf16128_mask", IX86_BUILTIN_FMSUBBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubbf16_v8bf_mask3, "__builtin_ia32_fmsubbf16128_mask3", IX86_BUILTIN_FMSUBBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubbf16_v8bf_maskz, "__builtin_ia32_fmsubbf16128_maskz", IX86_BUILTIN_FMSUBBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddbf16_v32bf_mask, "__builtin_ia32_fnmaddbf16512_mask", IX86_BUILTIN_FNMADDBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddbf16_v32bf_mask3, "__builtin_ia32_fnmaddbf16512_mask3", IX86_BUILTIN_FNMADDBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddbf16_v32bf_maskz, "__builtin_ia32_fnmaddbf16512_maskz", IX86_BUILTIN_FNMADDBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddbf16_v16bf_mask, "__builtin_ia32_fnmaddbf16256_mask", IX86_BUILTIN_FNMADDBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddbf16_v16bf_mask3, "__builtin_ia32_fnmaddbf16256_mask3", IX86_BUILTIN_FNMADDBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddbf16_v16bf_maskz, "__builtin_ia32_fnmaddbf16256_maskz", IX86_BUILTIN_FNMADDBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddbf16_v8bf_mask, "__builtin_ia32_fnmaddbf16128_mask", IX86_BUILTIN_FNMADDBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddbf16_v8bf_mask3, "__builtin_ia32_fnmaddbf16128_mask3", IX86_BUILTIN_FNMADDBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddbf16_v8bf_maskz, "__builtin_ia32_fnmaddbf16128_maskz", IX86_BUILTIN_FNMADDBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubbf16_v32bf_mask, "__builtin_ia32_fnmsubbf16512_mask", IX86_BUILTIN_FNMSUBBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubbf16_v32bf_mask3, "__builtin_ia32_fnmsubbf16512_mask3", IX86_BUILTIN_FNMSUBBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubbf16_v32bf_maskz, "__builtin_ia32_fnmsubbf16512_maskz", IX86_BUILTIN_FNMSUBBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v16bf_mask, "__builtin_ia32_fnmsubbf16256_mask", IX86_BUILTIN_FNMSUBBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v16bf_mask3, "__builtin_ia32_fnmsubbf16256_mask3", IX86_BUILTIN_FNMSUBBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v16bf_maskz, "__builtin_ia32_fnmsubbf16256_maskz", IX86_BUILTIN_FNMSUBBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v8bf_mask, "__builtin_ia32_fnmsubbf16128_mask", IX86_BUILTIN_FNMSUBBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v8bf_mask3, "__builtin_ia32_fnmsubbf16128_mask3", IX86_BUILTIN_FNMSUBBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v8bf_maskz, "__builtin_ia32_fnmsubbf16128_maskz", IX86_BUILTIN_FNMSUBBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rsqrtpbf16_v32bf_mask, "__builtin_ia32_rsqrtpbf16512_mask", IX86_BUILTIN_RSQRTPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtpbf16_v16bf_mask, "__builtin_ia32_rsqrtpbf16256_mask", IX86_BUILTIN_RSQRTPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtpbf16_v8bf_mask, "__builtin_ia32_rsqrtpbf16128_mask", IX86_BUILTIN_RSQRTPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 43141246b993..c3cb93288da7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -32111,7 +32111,7 @@ "v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex")]) -(define_expand "avx10_2_fmaddnepbf16_<mode>_maskz" +(define_expand "avx10_2_fmaddbf16_<mode>_maskz" [(match_operand:VBF_AVX10_2 0 "register_operand") (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") @@ -32119,14 +32119,14 @@ (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2_256" { - emit_insn (gen_avx10_2_fmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1], - operands[2], operands[3], - CONST0_RTX(<MODE>mode), - operands[4])); + emit_insn (gen_avx10_2_fmaddbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX (<MODE>mode), + operands[4])); DONE; }) -(define_insn "avx10_2_fmaddnepbf16_<mode><sd_maskz_name>" +(define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") (fma:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") @@ -32134,14 +32134,14 @@ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] "TARGET_AVX10_2_256" "@ - vfmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfmadd213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfmadd231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fmaddnepbf16_<mode>_mask" +(define_insn "avx10_2_fmaddbf16_<mode>_mask" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 @@ -32152,13 +32152,13 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2_256" "@ - vfmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfmadd132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfmadd213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fmaddnepbf16_<mode>_mask3" +(define_insn "avx10_2_fmaddbf16_<mode>_mask3" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 @@ -32168,12 +32168,12 @@ (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2_256" - "vfmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfmadd231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "avx10_2_fnmaddnepbf16_<mode>_maskz" +(define_expand "avx10_2_fnmaddbf16_<mode>_maskz" [(match_operand:VBF_AVX10_2 0 "register_operand") (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") @@ -32181,34 +32181,34 @@ (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2_256" { - emit_insn (gen_avx10_2_fnmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1], - operands[2], operands[3], - CONST0_RTX(<MODE>mode), - operands[4])); + emit_insn (gen_avx10_2_fnmaddbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX (<MODE>mode), + operands[4])); DONE; }) -(define_insn "avx10_2_fnmaddnepbf16_<mode><sd_maskz_name>" +(define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] "TARGET_AVX10_2_256" "@ - vfnmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfnmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfnmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfnmadd213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfnmadd231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask" +(define_insn "avx10_2_fnmaddbf16_<mode>_mask" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) @@ -32216,29 +32216,29 @@ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2_256" "@ - vfnmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfnmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfnmadd132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfnmadd213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask3" +(define_insn "avx10_2_fnmaddbf16_<mode>_mask3" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2_256" - "vfnmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfnmadd231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "avx10_2_fmsubnepbf16_<mode>_maskz" +(define_expand "avx10_2_fmsubbf16_<mode>_maskz" [(match_operand:VBF_AVX10_2 0 "register_operand") (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") @@ -32246,64 +32246,64 @@ (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2_256" { - emit_insn (gen_avx10_2_fmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1], - operands[2], operands[3], - CONST0_RTX(<MODE>mode), - operands[4])); + emit_insn (gen_avx10_2_fmsubbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX (<MODE>mode), + operands[4])); DONE; }) -(define_insn "avx10_2_fmsubnepbf16_<mode><sd_maskz_name>" +(define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") (fma:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] "TARGET_AVX10_2_256" "@ - vfmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfmsub213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfmsub231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fmsubnepbf16_<mode>_mask" +(define_insn "avx10_2_fmsubbf16_<mode>_mask" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2_256" "@ - vfmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfmsub132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfmsub213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fmsubnepbf16_<mode>_mask3" +(define_insn "avx10_2_fmsubbf16_<mode>_mask3" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2_256" - "vfmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfmsub231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "avx10_2_fnmsubnepbf16_<mode>_maskz" +(define_expand "avx10_2_fnmsubbf16_<mode>_maskz" [(match_operand:VBF_AVX10_2 0 "register_operand") (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") @@ -32311,62 +32311,62 @@ (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2_256" { - emit_insn (gen_avx10_2_fnmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1], - operands[2], operands[3], - CONST0_RTX(<MODE>mode), - operands[4])); + emit_insn (gen_avx10_2_fnmsubbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX (<MODE>mode), + operands[4])); DONE; }) -(define_insn "avx10_2_fnmsubnepbf16_<mode><sd_maskz_name>" +(define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] "TARGET_AVX10_2_256" "@ - vfnmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} - vfnmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} - vfnmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfnmsub213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfnmsub231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask" +(define_insn "avx10_2_fnmsubbf16_<mode>_mask" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2_256" "@ - vfnmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} - vfnmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + vfnmsub132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfnmsub213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask3" +(define_insn "avx10_2_fnmsubbf16_<mode>_mask3" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (vec_merge:VBF_AVX10_2 (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (neg:VBF_AVX10_2 + (neg:VBF_AVX10_2 (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2_256" - "vfnmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + "vfnmsub231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "ssemuladd") (set_attr "mode" "<sseinsnmode>")]) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c deleted file mode 100644 index 0a7cecaae029..000000000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c +++ /dev/null @@ -1,34 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -O2" } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ - -#include <immintrin.h> - -typedef __bf16 v32bf __attribute__ ((__vector_size__ (64))); - -v32bf -foo_madd (v32bf a, v32bf b, v32bf c) -{ - return a * b + c; -} - -v32bf -foo_msub (v32bf a, v32bf b, v32bf c) -{ - return a * b - c; -} - -v32bf -foo_nmadd (v32bf a, v32bf b, v32bf c) -{ - return -a * b + c; -} - -v32bf -foo_nmsub (v32bf a, v32bf b, v32bf c) -{ - return -a * b - c; -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c index 52c802d48803..488ccc9e5a0c 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c @@ -21,22 +21,22 @@ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ @@ -95,22 +95,22 @@ avx10_2_512_test (void) res = _mm512_mask_scalef_pbh (res, m32, x1, x2); res = _mm512_maskz_scalef_pbh (m32, x1, x2); - res = _mm512_fmaddne_pbh (res, x1, x2); - res = _mm512_mask_fmaddne_pbh (res, m32, x1, x2); - res = _mm512_mask3_fmaddne_pbh (res, x1, x2, m32); - res = _mm512_maskz_fmaddne_pbh (m32,res, x1, x2); - res = _mm512_fmsubne_pbh (res, x1, x2); - res = _mm512_mask_fmsubne_pbh (res, m32, x1, x2); - res = _mm512_mask3_fmsubne_pbh (res, x1, x2, m32); - res = _mm512_maskz_fmsubne_pbh (m32,res, x1, x2); - res = _mm512_fnmaddne_pbh (res, x1, x2); - res = _mm512_mask_fnmaddne_pbh (res, m32, x1, x2); - res = _mm512_mask3_fnmaddne_pbh (res, x1, x2, m32); - res = _mm512_maskz_fnmaddne_pbh (m32,res, x1, x2); - res = _mm512_fnmsubne_pbh (res, x1, x2); - res = _mm512_mask_fnmsubne_pbh (res, m32, x1, x2); - res = _mm512_mask3_fnmsubne_pbh (res, x1, x2, m32); - res = _mm512_maskz_fnmsubne_pbh (m32,res, x1, x2); + res = _mm512_fmadd_pbh (res, x1, x2); + res = _mm512_mask_fmadd_pbh (res, m32, x1, x2); + res = _mm512_mask3_fmadd_pbh (res, x1, x2, m32); + res = _mm512_maskz_fmadd_pbh (m32,res, x1, x2); + res = _mm512_fmsub_pbh (res, x1, x2); + res = _mm512_mask_fmsub_pbh (res, m32, x1, x2); + res = _mm512_mask3_fmsub_pbh (res, x1, x2, m32); + res = _mm512_maskz_fmsub_pbh (m32,res, x1, x2); + res = _mm512_fnmadd_pbh (res, x1, x2); + res = _mm512_mask_fnmadd_pbh (res, m32, x1, x2); + res = _mm512_mask3_fnmadd_pbh (res, x1, x2, m32); + res = _mm512_maskz_fnmadd_pbh (m32,res, x1, x2); + res = _mm512_fnmsub_pbh (res, x1, x2); + res = _mm512_mask_fnmsub_pbh (res, m32, x1, x2); + res = _mm512_mask3_fnmsub_pbh (res, x1, x2, m32); + res = _mm512_maskz_fnmsub_pbh (m32,res, x1, x2); res = _mm512_rsqrt_pbh (x1); res = _mm512_mask_rsqrt_pbh (res, m32, x1); diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-fma-1.c new file mode 100644 index 000000000000..77198d2f1726 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-fma-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +typedef __bf16 v32bf __attribute__ ((__vector_size__ (64))); + +v32bf +foo_madd (v32bf a, v32bf b, v32bf c) +{ + return a * b + c; +} + +v32bf +foo_msub (v32bf a, v32bf b, v32bf c) +{ + return a * b - c; +} + +v32bf +foo_nmadd (v32bf a, v32bf b, v32bf c) +{ + return -a * b + c; +} + +v32bf +foo_nmsub (v32bf a, v32bf b, v32bf c) +{ + return -a * b - c; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXbf16-2.c similarity index 90% rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXbf16-2.c index 702121024d01..2e5c424fd432 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXbf16-2.c @@ -40,8 +40,8 @@ TEST (void) MASK_MERGE (bf16_uw) (res1.a, mask, SIZE); MASK_MERGE (bf16_uw) (res2.a, mask, SIZE); - res1.x = INTRINSIC (_mask_fmaddne_pbh) (res1.x, mask, src1.x, src2.x); - res2.x = INTRINSIC (_mask3_fmaddne_pbh) (src1.x, src2.x, res2.x, mask); + res1.x = INTRINSIC (_mask_fmadd_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fmadd_pbh) (src1.x, src2.x, res2.x, mask); MASK_MERGE (bf16_uw) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXbf16-2.c similarity index 90% rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXbf16-2.c index a071ec1d0aea..983ca2e517dd 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXbf16-2.c @@ -41,8 +41,8 @@ TEST (void) MASK_MERGE (bf16_uw) (res1.a, mask, SIZE); MASK_MERGE (bf16_uw) (res2.a, mask, SIZE); - res1.x = INTRINSIC (_mask_fmsubne_pbh) (res1.x, mask, src1.x, src2.x); - res2.x = INTRINSIC (_mask3_fmsubne_pbh) (src1.x, src2.x, res2.x, mask); + res1.x = INTRINSIC (_mask_fmsub_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fmsub_pbh) (src1.x, src2.x, res2.x, mask); MASK_MERGE (bf16_uw) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXbf16-2.c similarity index 90% rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXbf16-2.c index d2559494bb75..0dd199642848 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXbf16-2.c @@ -41,8 +41,8 @@ TEST (void) MASK_MERGE (bf16_uw) (res1.a, mask, SIZE); MASK_MERGE (bf16_uw) (res2.a, mask, SIZE); - res1.x = INTRINSIC (_mask_fnmaddne_pbh) (res1.x, mask, src1.x, src2.x); - res2.x = INTRINSIC (_mask3_fnmaddne_pbh) (src1.x, src2.x, res2.x, mask); + res1.x = INTRINSIC (_mask_fnmadd_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fnmadd_pbh) (src1.x, src2.x, res2.x, mask); MASK_MERGE (bf16_uw) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXbf16-2.c similarity index 90% rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXbf16-2.c index fcc976fe15cc..95ed19ca3a37 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXbf16-2.c @@ -41,8 +41,8 @@ TEST (void) MASK_MERGE (bf16_uw) (res1.a, mask, SIZE); MASK_MERGE (bf16_uw) (res2.a, mask, SIZE); - res1.x = INTRINSIC (_mask_fnmsubne_pbh) (res1.x, mask, src1.x, src2.x); - res2.x = INTRINSIC (_mask3_fnmsubne_pbh) (src1.x, src2.x, res2.x, mask); + res1.x = INTRINSIC (_mask_fnmsub_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fnmsub_pbh) (src1.x, src2.x, res2.x, mask); MASK_MERGE (bf16_uw) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c deleted file mode 100644 index 31cf28ed7c3f..000000000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c +++ /dev/null @@ -1,63 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ - -#include <immintrin.h> - -typedef __bf16 v16bf __attribute__ ((__vector_size__ (32))); -typedef __bf16 v8bf __attribute__ ((__vector_size__ (16))); - -v16bf -foo_madd_256 (v16bf a, v16bf b, v16bf c) -{ - return a * b + c; -} - -v16bf -foo_msub_256 (v16bf a, v16bf b, v16bf c) -{ - return a * b - c; -} - -v16bf -foo_nmadd_256 (v16bf a, v16bf b, v16bf c) -{ - return -a * b + c; -} - -v16bf -foo_nmsub_256 (v16bf a, v16bf b, v16bf c) -{ - return -a * b - c; -} - -v8bf -foo_madd_128 (v8bf a, v8bf b, v8bf c) -{ - return a * b + c; -} - -v8bf -foo_msub_128 (v8bf a, v8bf b, v8bf c) -{ - return a * b - c; -} - -v8bf -foo_nmadd_128 (v8bf a, v8bf b, v8bf c) -{ - return -a * b + c; -} - -v8bf -foo_nmsub_128 (v8bf a, v8bf b, v8bf c) -{ - return -a * b - c; -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c index 7512af7aede8..d4e540036c4c 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c @@ -42,38 +42,38 @@ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ @@ -185,41 +185,41 @@ avx10_2_test (void) res1 = _mm_mask_scalef_pbh (res1, m8, x3, x4); res1 = _mm_maskz_scalef_pbh (m8, x3, x4); - res = _mm256_fmaddne_pbh (res, x1, x2); - res = _mm256_mask_fmaddne_pbh (res, m16, x1, x2); - res = _mm256_mask3_fmaddne_pbh (res, x1, x2, m16); - res = _mm256_maskz_fmaddne_pbh (m16,res, x1, x2); - res1 = _mm_fmaddne_pbh (res1, x3, x4); - res1 = _mm_mask_fmaddne_pbh (res1, m8, x3, x4); - res1 = _mm_mask3_fmaddne_pbh (res1, x3, x4, m8); - res1 = _mm_maskz_fmaddne_pbh (m8,res1, x3, x4); + res = _mm256_fmadd_pbh (res, x1, x2); + res = _mm256_mask_fmadd_pbh (res, m16, x1, x2); + res = _mm256_mask3_fmadd_pbh (res, x1, x2, m16); + res = _mm256_maskz_fmadd_pbh (m16,res, x1, x2); + res1 = _mm_fmadd_pbh (res1, x3, x4); + res1 = _mm_mask_fmadd_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fmadd_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fmadd_pbh (m8,res1, x3, x4); - res = _mm256_fmsubne_pbh (res, x1, x2); - res = _mm256_mask_fmsubne_pbh (res, m16, x1, x2); - res = _mm256_mask3_fmsubne_pbh (res, x1, x2, m16); - res = _mm256_maskz_fmsubne_pbh (m16,res, x1, x2); - res1 = _mm_fmsubne_pbh (res1, x3, x4); - res1 = _mm_mask_fmsubne_pbh (res1, m8, x3, x4); - res1 = _mm_mask3_fmsubne_pbh (res1, x3, x4, m8); - res1 = _mm_maskz_fmsubne_pbh (m8,res1, x3, x4); + res = _mm256_fmsub_pbh (res, x1, x2); + res = _mm256_mask_fmsub_pbh (res, m16, x1, x2); + res = _mm256_mask3_fmsub_pbh (res, x1, x2, m16); + res = _mm256_maskz_fmsub_pbh (m16,res, x1, x2); + res1 = _mm_fmsub_pbh (res1, x3, x4); + res1 = _mm_mask_fmsub_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fmsub_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fmsub_pbh (m8,res1, x3, x4); - res = _mm256_fnmaddne_pbh (res, x1, x2); - res = _mm256_mask_fnmaddne_pbh (res, m16, x1, x2); - res = _mm256_mask3_fnmaddne_pbh (res, x1, x2, m16); - res = _mm256_maskz_fnmaddne_pbh (m16,res, x1, x2); - res1 = _mm_fnmaddne_pbh (res1, x3, x4); - res1 = _mm_mask_fnmaddne_pbh (res1, m8, x3, x4); - res1 = _mm_mask3_fnmaddne_pbh (res1, x3, x4, m8); - res1 = _mm_maskz_fnmaddne_pbh (m8,res1, x3, x4); + res = _mm256_fnmadd_pbh (res, x1, x2); + res = _mm256_mask_fnmadd_pbh (res, m16, x1, x2); + res = _mm256_mask3_fnmadd_pbh (res, x1, x2, m16); + res = _mm256_maskz_fnmadd_pbh (m16,res, x1, x2); + res1 = _mm_fnmadd_pbh (res1, x3, x4); + res1 = _mm_mask_fnmadd_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fnmadd_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fnmadd_pbh (m8,res1, x3, x4); - res = _mm256_fnmsubne_pbh (res, x1, x2); - res = _mm256_mask_fnmsubne_pbh (res, m16, x1, x2); - res = _mm256_mask3_fnmsubne_pbh (res, x1, x2, m16); - res = _mm256_maskz_fnmsubne_pbh (m16,res, x1, x2); - res1 = _mm_fnmsubne_pbh (res1, x3, x4); - res1 = _mm_mask_fnmsubne_pbh (res1, m8, x3, x4); - res1 = _mm_mask3_fnmsubne_pbh (res1, x3, x4, m8); - res1 = _mm_maskz_fnmsubne_pbh (m8,res1, x3, x4); + res = _mm256_fnmsub_pbh (res, x1, x2); + res = _mm256_mask_fnmsub_pbh (res, m16, x1, x2); + res = _mm256_mask3_fnmsub_pbh (res, x1, x2, m16); + res = _mm256_maskz_fnmsub_pbh (m16,res, x1, x2); + res1 = _mm_fnmsub_pbh (res1, x3, x4); + res1 = _mm_mask_fnmsub_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fnmsub_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fnmsub_pbh (m8,res1, x3, x4); res = _mm256_rsqrt_pbh (x1); res = _mm256_mask_rsqrt_pbh (res, m16, x1); diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-fma-1.c new file mode 100644 index 000000000000..05f86f78fba1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-fma-1.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +typedef __bf16 v16bf __attribute__ ((__vector_size__ (32))); +typedef __bf16 v8bf __attribute__ ((__vector_size__ (16))); + +v16bf +foo_madd_256 (v16bf a, v16bf b, v16bf c) +{ + return a * b + c; +} + +v16bf +foo_msub_256 (v16bf a, v16bf b, v16bf c) +{ + return a * b - c; +} + +v16bf +foo_nmadd_256 (v16bf a, v16bf b, v16bf c) +{ + return -a * b + c; +} + +v16bf +foo_nmsub_256 (v16bf a, v16bf b, v16bf c) +{ + return -a * b - c; +} + +v8bf +foo_madd_128 (v8bf a, v8bf b, v8bf c) +{ + return a * b + c; +} + +v8bf +foo_msub_128 (v8bf a, v8bf b, v8bf c) +{ + return a * b - c; +} + +v8bf +foo_nmadd_128 (v8bf a, v8bf b, v8bf c) +{ + return -a * b + c; +} + +v8bf +foo_nmsub_128 (v8bf a, v8bf b, v8bf c) +{ + return -a * b - c; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c deleted file mode 100644 index c101b668040f..000000000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-fma-1.c +++ /dev/null @@ -1,61 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ - -typedef __bf16 v4bf __attribute__ ((__vector_size__ (8))); -typedef __bf16 v2bf __attribute__ ((__vector_size__ (4))); - -v4bf -foo_madd_64 (v4bf a, v4bf b, v4bf c) -{ - return a * b + c; -} - -v4bf -foo_msub_64 (v4bf a, v4bf b, v4bf c) -{ - return a * b - c; -} - -v4bf -foo_nmadd_64 (v4bf a, v4bf b, v4bf c) -{ - return -a * b + c; -} - -v4bf -foo_nmsub_64 (v4bf a, v4bf b, v4bf c) -{ - return -a * b - c; -} - -v2bf -foo_madd_32 (v2bf a, v2bf b, v2bf c) -{ - return a * b + c; -} - -v2bf -foo_msub_32 (v2bf a, v2bf b, v2bf c) -{ - return a * b - c; -} - -v2bf -foo_nmadd_32 (v2bf a, v2bf b, v2bf c) -{ - return -a * b + c; -} - -v2bf -foo_nmsub_32 (v2bf a, v2bf b, v2bf c) -{ - return -a * b - c; -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-fma-1.c new file mode 100644 index 000000000000..0fa63de6a6d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-fma-1.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[^\n\r\]*xmm\[0-9\]" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "vfmadd132bf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vfmsub132bf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vfnmadd132bf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vfnmsub132bf16\[^\n\r\]*xmm\[0-9\]" 2 { target { ! ia32 } } } } */ + +typedef __bf16 v4bf __attribute__ ((__vector_size__ (8))); +typedef __bf16 v2bf __attribute__ ((__vector_size__ (4))); + +v4bf +foo_madd_64 (v4bf a, v4bf b, v4bf c) +{ + return a * b + c; +} + +v4bf +foo_msub_64 (v4bf a, v4bf b, v4bf c) +{ + return a * b - c; +} + +v4bf +foo_nmadd_64 (v4bf a, v4bf b, v4bf c) +{ + return -a * b + c; +} + +v4bf +foo_nmsub_64 (v4bf a, v4bf b, v4bf c) +{ + return -a * b - c; +} + +v2bf +foo_madd_32 (v2bf a, v2bf b, v2bf c) +{ + return a * b + c; +} + +v2bf +foo_msub_32 (v2bf a, v2bf b, v2bf c) +{ + return a * b - c; +} + +v2bf +foo_nmadd_32 (v2bf a, v2bf b, v2bf c) +{ + return -a * b + c; +} + +v2bf +foo_nmsub_32 (v2bf a, v2bf b, v2bf c) +{ + return -a * b - c; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXbf16-2.c similarity index 77% rename from gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXbf16-2.c index cce6106b432f..85041d449457 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXbf16-2.c @@ -6,11 +6,11 @@ #define AVX512VL #define AVX512F_LEN 256 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfmaddXXXnepbf16-2.c" +#include "avx10_2-512-vfmaddXXXbf16-2.c" #undef AVX512F_LEN #undef AVX512F_LEN_HALF #define AVX512F_LEN 128 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfmaddXXXnepbf16-2.c" +#include "avx10_2-512-vfmaddXXXbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXbf16-2.c similarity index 77% rename from gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXbf16-2.c index 3b8a16097b09..761d5d190f9b 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXbf16-2.c @@ -6,11 +6,11 @@ #define AVX512VL #define AVX512F_LEN 256 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfmsubXXXnepbf16-2.c" +#include "avx10_2-512-vfmsubXXXbf16-2.c" #undef AVX512F_LEN #undef AVX512F_LEN_HALF #define AVX512F_LEN 128 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfmsubXXXnepbf16-2.c" +#include "avx10_2-512-vfmsubXXXbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXbf16-2.c similarity index 77% rename from gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXbf16-2.c index f76b5f3efc73..9b260aa07d8e 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXbf16-2.c @@ -6,11 +6,11 @@ #define AVX512VL #define AVX512F_LEN 256 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfnmaddXXXnepbf16-2.c" +#include "avx10_2-512-vfnmaddXXXbf16-2.c" #undef AVX512F_LEN #undef AVX512F_LEN_HALF #define AVX512F_LEN 128 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfnmaddXXXnepbf16-2.c" +#include "avx10_2-512-vfnmaddXXXbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXbf16-2.c similarity index 77% rename from gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXbf16-2.c index 07279528fb0e..86539f7d52f3 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXbf16-2.c @@ -6,11 +6,11 @@ #define AVX512VL #define AVX512F_LEN 256 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfnmsubXXXnepbf16-2.c" +#include "avx10_2-512-vfnmsubXXXbf16-2.c" #undef AVX512F_LEN #undef AVX512F_LEN_HALF #define AVX512F_LEN 128 #define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vfnmsubXXXnepbf16-2.c" +#include "avx10_2-512-vfnmsubXXXbf16-2.c"