gcc/ChangeLog: * config/i386/avx512vldqintrin.h: Remove target attribute. * config/i386/i386-builtin.def (BDESC): Add OPTION_MASK_ISA2_AVX10_1. * config/i386/sse.md (VF_AVX512VLDQ_AVX10_1): New. (VFH_AVX512VLDQ_AVX10_1): Ditto. (VF1_AVX512VLDQ_AVX10_1): Ditto. (<mask_codefor>reducep<mode><mask_name><round_saeonly_name>): Change iterator to VFH_AVX512VLDQ_AVX10_1. Remove target check. (vec_pack<floatprefix>_float_<mode>): Change iterator to VI8_AVX512VLDQ_AVX10_1. Remove target check. (vec_unpack_<fixprefix>fix_trunc_lo_<mode>): Change iterator to VF1_AVX512VLDQ_AVX10_1. Remove target check. (vec_unpack_<fixprefix>fix_trunc_hi_<mode>): Ditto. (VI48F_256_DQVL_AVX10_1): Rename from VI48F_256_DQ. (avx512vl_vextractf128<mode>): Change iterator to VI48F_256_DQVL_AVX10_1. Remove target check. (vec_extract_hi_<mode>_mask): Add TARGET_AVX10_1. (vec_extract_hi_<mode>): Ditto. (avx512vl_vinsert<mode>): Ditto. (vec_set_lo_<mode><mask_name>): Ditto. (vec_set_hi_<mode><mask_name>): Ditto. (avx512dq_rangep<mode><mask_name><round_saeonly_name>): Change iterator to VF_AVX512VLDQ_AVX10_1. Remove target check. (avx512dq_fpclass<mode><mask_scalar_merge_name>): Change iterator to VFH_AVX512VLDQ_AVX10_1. Remove target check. * config/i386/subst.md (mask_avx512dq_condition): Add TARGET_AVX10_1. (mask_scalar_merge): Ditto. --- gcc/config/i386/avx512vldqintrin.h | 11 ---- gcc/config/i386/i386-builtin.def | 32 +++++----- gcc/config/i386/sse.md | 94 ++++++++++++++++++------------ gcc/config/i386/subst.md | 4 +- 4 files changed, 76 insertions(+), 65 deletions(-)
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h index a8d14a4efc9..1fbf93a0b52 100644 --- a/gcc/config/i386/avx512vldqintrin.h +++ b/gcc/config/i386/avx512vldqintrin.h @@ -1331,12 +1331,6 @@ _mm256_movepi64_mask (__m256i __A) return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } -#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) -#pragma GCC push_options -#pragma GCC target("avx512vl,avx512dq") -#define __DISABLE_AVX512VLDQ__ -#endif /* __AVX512VLDQ__ */ - #ifdef __OPTIMIZE__ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -2008,9 +2002,4 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, #endif -#ifdef __DISABLE_AVX512VLDQ__ -#undef __DISABLE_AVX512VLDQ__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512VLDQ__ */ - #endif /* _AVX512VLDQINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index aa0a29caa9f..34768552e78 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1782,8 +1782,8 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__b BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI) @@ -1810,10 +1810,10 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask, "__builtin_ia32_reducesd_mask", IX86_BUILTIN_REDUCESD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask, "__builtin_ia32_reducess_mask", IX86_BUILTIN_REDUCESS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) @@ -1908,10 +1908,10 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI) @@ -2076,8 +2076,8 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__bu BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI) @@ -2184,11 +2184,11 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask", IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX10_1, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask", IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9003776ee01..6784a8c5369 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -467,6 +467,14 @@ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) +(define_mode_iterator VF_AVX512VLDQ_AVX10_1 + [(V16SF "TARGET_AVX512DQ") + (V8SF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V4SF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V8DF "TARGET_AVX512DQ") + (V4DF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V2DF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1")]) + ;; AVX512ER SF plus 128- and 256-bit SF vector modes (define_mode_iterator VF1_AVX512ER_128_256 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF]) @@ -478,6 +486,17 @@ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) +(define_mode_iterator VFH_AVX512VLDQ_AVX10_1 + [(V32HF "TARGET_AVX512FP16") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V16SF "TARGET_AVX512DQ") + (V8SF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V4SF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V8DF "TARGET_AVX512DQ") + (V4DF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V2DF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1")]) + (define_mode_iterator VF2_AVX512VLDQ_AVX10_1 [(V8DF "TARGET_AVX512DQ") (V4DF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") @@ -486,6 +505,11 @@ (define_mode_iterator VF1_AVX512VL [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) +(define_mode_iterator VF1_AVX512VLDQ_AVX10_1 + [(V16SF "TARGET_AVX512DQ") + (V8SF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V4SF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1")]) + (define_mode_iterator VF_AVX512FP16 [V32HF V16HF V8HF]) @@ -3520,12 +3544,12 @@ }) (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") - (unspec:VFH_AVX512VL - [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") + [(set (match_operand:VFH_AVX512VLDQ_AVX10_1 0 "register_operand" "=v") + (unspec:VFH_AVX512VLDQ_AVX10_1 + [(match_operand:VFH_AVX512VLDQ_AVX10_1 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_REDUCE))] - "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))" + "" "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}" [(set_attr "type" "sse") (set_attr "prefix" "evex") @@ -8514,9 +8538,9 @@ (define_expand "vec_pack<floatprefix>_float_<mode>" [(match_operand:<ssePSmode> 0 "register_operand") (any_float:<ssePSmode> - (match_operand:VI8_AVX512VL 1 "register_operand")) - (match_operand:VI8_AVX512VL 2 "register_operand")] - "TARGET_AVX512DQ" + (match_operand:VI8_AVX512VLDQ_AVX10_1 1 "register_operand")) + (match_operand:VI8_AVX512VLDQ_AVX10_1 2 "register_operand")] + "" { rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode); rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode); @@ -8975,8 +8999,8 @@ (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>" [(match_operand:<vunpckfixt_mode> 0 "register_operand") (any_fix:<vunpckfixt_mode> - (match_operand:VF1_AVX512VL 1 "register_operand"))] - "TARGET_AVX512DQ" + (match_operand:VF1_AVX512VLDQ_AVX10_1 1 "register_operand"))] + "" { rtx tem = operands[1]; rtx (*gen) (rtx, rtx); @@ -8998,8 +9022,8 @@ (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>" [(match_operand:<vunpckfixt_mode> 0 "register_operand") (any_fix:<vunpckfixt_mode> - (match_operand:VF1_AVX512VL 1 "register_operand"))] - "TARGET_AVX512DQ" + (match_operand:VF1_AVX512VLDQ_AVX10_1 1 "register_operand"))] + "" { rtx tem; rtx (*gen) (rtx, rtx); @@ -11812,16 +11836,19 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_mode_iterator VI48F_256_DQ - [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")]) +(define_mode_iterator VI48F_256_DQVL_AVX10_1 + [(V8SI "TARGET_AVX512VL") + (V8SF "TARGET_AVX512VL") + (V4DI "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1") + (V4DF "(TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1")]) (define_expand "avx512vl_vextractf128<mode>" [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") - (match_operand:VI48F_256_DQ 1 "register_operand") + (match_operand:VI48F_256_DQVL_AVX10_1 1 "register_operand") (match_operand:SI 2 "const_0_to_1_operand") (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand") (match_operand:QI 4 "register_operand")] - "TARGET_AVX512VL" + "" { rtx (*insn)(rtx, rtx, rtx, rtx); rtx dest = operands[0]; @@ -11960,8 +11987,7 @@ (parallel [(const_int 0) (const_int 1)])) (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0") (match_operand:QI 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512DQ - && TARGET_AVX512VL + "((TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1) && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))" "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}" [(set_attr "type" "sselog1") @@ -11997,8 +12023,7 @@ (parallel [(const_int 2) (const_int 3)])) (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0") (match_operand:QI 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512DQ - && TARGET_AVX512VL + "((TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1) && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))" "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}" [(set_attr "type" "sselog1") @@ -12013,13 +12038,10 @@ (parallel [(const_int 2) (const_int 3)])))] "TARGET_AVX" { - if (TARGET_AVX512VL) - { - if (TARGET_AVX512DQ) - return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}"; - else - return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"; - } + if ((TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1) + return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}"; + else if (TARGET_AVX512VL) + return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"; else return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"; } @@ -27201,7 +27223,7 @@ (match_operand:SI 3 "const_0_to_1_operand") (match_operand:VI48F_256 4 "register_operand") (match_operand:<avx512fmaskmode> 5 "register_operand")] - "TARGET_AVX512VL" + "TARGET_AVX512VL || TARGET_AVX10_1" { rtx (*insn)(rtx, rtx, rtx, rtx, rtx); @@ -27256,7 +27278,7 @@ (parallel [(const_int 2) (const_int 3)]))))] "TARGET_AVX && <mask_avx512dq_condition>" { - if (TARGET_AVX512DQ) + if ((TARGET_AVX512DQ && TARGET_AVX512VL) || TARGET_AVX10_1) return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; else if (TARGET_AVX512VL) return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; @@ -27278,7 +27300,7 @@ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] "TARGET_AVX && <mask_avx512dq_condition>" { - if (TARGET_AVX512DQ) + if ((TARGET_AVX512DQ && TARGET_AVX512VL)|| TARGET_AVX10_1) return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; else if (TARGET_AVX512VL) return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; @@ -28549,13 +28571,13 @@ "operands[2] = CONST0_RTX (<MODE>mode);") (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "v") - (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") + [(set (match_operand:VF_AVX512VLDQ_AVX10_1 0 "register_operand" "=v") + (unspec:VF_AVX512VLDQ_AVX10_1 + [(match_operand:VF_AVX512VLDQ_AVX10_1 1 "register_operand" "v") + (match_operand:VF_AVX512VLDQ_AVX10_1 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") (match_operand:SI 3 "const_0_to_15_operand")] UNSPEC_RANGE))] - "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>" + "<round_saeonly_mode512bit_condition>" { if (TARGET_DEST_FALSE_DEP_FOR_GLC && <mask4_dest_false_dep_for_glc_cond> @@ -28594,10 +28616,10 @@ (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> - [(match_operand:VFH_AVX512VL 1 "vector_operand" "vm") + [(match_operand:VFH_AVX512VLDQ_AVX10_1 1 "vector_operand" "vm") (match_operand 2 "const_0_to_255_operand")] UNSPEC_FPCLASS))] - "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)" + "" "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; [(set_attr "type" "sse") (set_attr "length_immediate" "1") diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 59c4b395a9d..fe923458ab8 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -65,7 +65,7 @@ || TARGET_AVX10_1)") (define_subst_attr "mask_avx512vl_condition" "mask" "1" "(TARGET_AVX512VL || TARGET_AVX10_1)") (define_subst_attr "mask_avx512bw_condition" "mask" "1" "TARGET_AVX512BW") -(define_subst_attr "mask_avx512dq_condition" "mask" "1" "TARGET_AVX512DQ") +(define_subst_attr "mask_avx512dq_condition" "mask" "1" "(TARGET_AVX512DQ || TARGET_AVX10_1)") (define_subst_attr "mask_prefix" "mask" "vex" "evex") (define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex") (define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex,evex") @@ -120,7 +120,7 @@ (define_subst "mask_scalar_merge" [(set (match_operand:SUBST_S 0) (match_operand:SUBST_S 1))] - "TARGET_AVX512F" + "TARGET_AVX512F || TARGET_AVX10_1" [(set (match_dup 0) (and:SUBST_S (match_dup 1) -- 2.31.1