> > Just rename the instruction and fix all its call sites. The name of > > the insn pattern is internal to the compiler and can be renamed at > > will. > > Ideally, we should standardize all the names to a standard name, so > e.g. ufix_ -> fixuns_ and ufloat -> floatuns.
Updated. There's some typo for the standard pattern name for unsigned_{float,fix}, it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2 in current trunk, the patch fix the typo, also change all though ufix_trunc/ufloat patterns. Also vcvttps2udq is available under AVX512VL, so it can be generated directly instead of being emulated via vcvttps2dq. gcc/ChangeLog: PR target/85048 * config/i386/i386-builtin.def (BDESC): Adjust icode name from ufloat/ufix to floatuns/fixuns. * config/i386/i386-expand.cc (ix86_expand_vector_convert_uns_vsivsf): Adjust comments. * config/i386/sse.md (ufloat<sseintvecmodelower><mode>2<mask_name><round_name>): Renamed to .. (<mask_codefor>floatuns<sseintvecmodelower><mode>2<mask_name><round_name>):.. this. (<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>): Renamed to .. (<mask_codefor><avx512>_fixuns_notrunc<sf2simodelower><mode><mask_name><round_name>): .. this. (<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>): Renamed to .. (fix<fixunssuffix>_truncv16sfv16si2<mask_name><round_saeonly_name>):.. this. (ufloat<si2dfmodelower><mode>2<mask_name>): Renamed to .. (floatuns<si2dfmodelower><mode>2<mask_name>): .. this. (ufloatv2siv2df2<mask_name>): Renamed to .. (<mask_codefor>floatunsv2siv2df2<mask_name>): .. this. (ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>): Renamed to .. (fixuns_notrunc<mode><si2dfmodelower>2<mask_name><round_name>): .. this. (ufix_notruncv2dfv2si2): Renamed to .. (fixuns_notruncv2dfv2si2):.. this. (ufix_notruncv2dfv2si2_mask): Renamed to .. (fixuns_notruncv2dfv2si2_mask): .. this. (*ufix_notruncv2dfv2si2_mask_1): Renamed to .. (*fixuns_notruncv2dfv2si2_mask_1): .. this. (ufix_truncv2dfv2si2): Renamed to .. (*fixuns_truncv2dfv2si2): .. this. (ufix_truncv2dfv2si2_mask): Renamed to .. (fixuns_truncv2dfv2si2_mask): .. this. (*ufix_truncv2dfv2si2_mask_1): Renamed to .. (*fixuns_truncv2dfv2si2_mask_1): .. this. (ufix_truncv4dfv4si2<mask_name>): Renamed to .. (fixuns_truncv4dfv4si2<mask_name>): .. this. (ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>): Renamed to .. (fixuns_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>): .. this. (ufix_trunc<mode><sseintvecmodelower>2<mask_name>): Renamed to .. (<mask_codefor>fixuns_trunc<mode><sseintvecmodelower>2<mask_name>): .. this. gcc/testsuite/ChangeLog: * g++.target/i386/pr85048.C: New test. --- gcc/config/i386/i386-builtin.def | 40 +++++++------- gcc/config/i386/i386-expand.cc | 2 +- gcc/config/i386/sse.md | 69 +++++++++++-------------- gcc/testsuite/g++.target/i386/pr85048.C | 33 ++++++++++++ 4 files changed, 84 insertions(+), 60 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 17dfe40fac7..6dae6972d81 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1384,7 +1384,7 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builti BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) @@ -1719,32 +1719,32 @@ BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_t BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fixuns_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatunsv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatunsv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatunsv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_floatunsv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI) @@ -2072,8 +2072,8 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_fixuns_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_fixuns_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI) @@ -3019,11 +3019,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcom BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT) @@ -3034,8 +3034,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) @@ -3185,7 +3185,7 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 1e3ce4b7c3f..de6cbfb6b29 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2033,7 +2033,7 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) } /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* - pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. + pattern can be used on it instead of fixuns_trunc*. This is done by doing just signed conversion if < 0x1p31, and otherwise by subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 172ec3bea4f..513960e8f33 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -7850,7 +7850,7 @@ (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>" (set_attr "prefix" "maybe_vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>" +(define_insn "<mask_codefor>floatuns<sseintvecmodelower><mode>2<mask_name><round_name>" [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") (unsigned_float:VF1_AVX512VL (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))] @@ -7861,24 +7861,16 @@ (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>" (set_attr "mode" "<MODE>")]) (define_expand "floatuns<sseintvecmodelower><mode>2" - [(match_operand:VF1 0 "register_operand") - (match_operand:<sseintvecmode> 1 "register_operand")] + [(set (match_operand:VF1 0 "register_operand") + (unsigned_float:VF1 + (match_operand:<sseintvecmode> 1 "register_operand")))] "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)" { - if (<MODE>mode == V16SFmode) - emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1])); - else - if (TARGET_AVX512VL) - { - if (<MODE>mode == V4SFmode) - emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1])); - else - emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1])); - } - else - ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); - - DONE; + if (<MODE>mode != V16SFmode && !TARGET_AVX512VL) + { + ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); + DONE; + } }) @@ -7913,7 +7905,7 @@ (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>" (set_attr "prefix" "evex") (set_attr "mode" "XI")]) -(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>" +(define_insn "<mask_codefor><avx512>_fixuns_notrunc<sf2simodelower><mode><mask_name><round_name>" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") (unspec:VI4_AVX512VL [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")] @@ -7970,7 +7962,7 @@ (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>" +(define_insn "fix<fixunssuffix>_truncv16sfv16si2<mask_name><round_saeonly_name>" [(set (match_operand:V16SI 0 "register_operand" "=v") (any_fix:V16SI (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] @@ -8010,22 +8002,21 @@ (define_insn "fix_truncv4sfv4si2<mask_name>" (set_attr "mode" "TI")]) (define_expand "fixuns_trunc<mode><sseintvecmodelower>2" - [(match_operand:<sseintvecmode> 0 "register_operand") - (match_operand:VF1 1 "register_operand")] + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (unsigned_fix:<sseintvecmode> + (match_operand:VF1_128_256 1 "register_operand")))] "TARGET_SSE2" { - if (<MODE>mode == V16SFmode) - emit_insn (gen_ufix_truncv16sfv16si2 (operands[0], - operands[1])); - else + /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors. */ + if (!TARGET_AVX512VL) { rtx tmp[3]; tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); tmp[1] = gen_reg_rtx (<sseintvecmode>mode); emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); + DONE; } - DONE; }) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -8413,7 +8404,7 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1" (set_attr "prefix" "evex") (set_attr "mode" "V4SF")]) -(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>" +(define_insn "floatuns<si2dfmodelower><mode>2<mask_name>" [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v") (unsigned_float:VF2_512_256VL (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))] @@ -8423,7 +8414,7 @@ (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>" (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "ufloatv2siv2df2<mask_name>" +(define_insn "<mask_codefor>floatunsv2siv2df2<mask_name>" [(set (match_operand:V2DF 0 "register_operand" "=v") (unsigned_float:V2DF (vec_select:V2SI @@ -8572,11 +8563,11 @@ (define_insn "*sse2_cvtpd2dq_mask_1" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -;; For ufix_notrunc* insn patterns +;; For fixuns_notrunc* insn patterns (define_mode_attr pd2udqsuff [(V8DF "") (V4DF "{y}")]) -(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>" +(define_insn "fixuns_notrunc<mode><si2dfmodelower>2<mask_name><round_name>" [(set (match_operand:<si2dfmode> 0 "register_operand" "=v") (unspec:<si2dfmode> [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")] @@ -8587,7 +8578,7 @@ (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "ufix_notruncv2dfv2si2" +(define_insn "fixuns_notruncv2dfv2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (unspec:V2SI @@ -8600,7 +8591,7 @@ (define_insn "ufix_notruncv2dfv2si2" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "ufix_notruncv2dfv2si2_mask" +(define_insn "fixuns_notruncv2dfv2si2_mask" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (vec_merge:V2SI @@ -8618,7 +8609,7 @@ (define_insn "ufix_notruncv2dfv2si2_mask" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*ufix_notruncv2dfv2si2_mask_1" +(define_insn "*fixuns_notruncv2dfv2si2_mask_1" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (vec_merge:V2SI @@ -8644,7 +8635,7 @@ (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>" (set_attr "prefix" "evex") (set_attr "mode" "OI")]) -(define_insn "ufix_truncv2dfv2si2" +(define_insn "*fixuns_truncv2dfv2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) @@ -8655,7 +8646,7 @@ (define_insn "ufix_truncv2dfv2si2" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "ufix_truncv2dfv2si2_mask" +(define_insn "fixuns_truncv2dfv2si2_mask" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (vec_merge:V2SI @@ -8671,7 +8662,7 @@ (define_insn "ufix_truncv2dfv2si2_mask" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*ufix_truncv2dfv2si2_mask_1" +(define_insn "*fixuns_truncv2dfv2si2_mask_1" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (vec_merge:V2SI @@ -8694,7 +8685,7 @@ (define_insn "fix_truncv4dfv4si2<mask_name>" (set_attr "prefix" "maybe_evex") (set_attr "mode" "OI")]) -(define_insn "ufix_truncv4dfv4si2<mask_name>" +(define_insn "fixuns_truncv4dfv4si2<mask_name>" [(set (match_operand:V4SI 0 "register_operand" "=v") (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512VL && TARGET_AVX512F" @@ -8724,7 +8715,7 @@ (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseintvecmode2>")]) -(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" +(define_insn "fixuns_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")] @@ -8827,7 +8818,7 @@ (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>" DONE; }) -(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>" +(define_insn "<mask_codefor>fixuns_trunc<mode><sseintvecmodelower>2<mask_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unsigned_fix:<sseintvecmode> (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C new file mode 100644 index 00000000000..52973c18ebd --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr85048.C @@ -0,0 +1,33 @@ +/* PR target/85048 */ +/* { dg-do compile } */ +/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */ + +#include <cstdint> + +template <class T, int N, int Size = N * sizeof(T)> +using V [[gnu::vector_size(Size)]] = T; + +template <class From, class To> V<To, 4> cvt4(V<From, 4> x) { + return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])}; +} +template <class From, class To> V<To, 8> cvt8(V<From, 8> x) { + return V<To, 8>{ + To(x[0]), To(x[1]), To(x[2]), To(x[3]), + To(x[4]), To(x[5]), To(x[6]), To(x[7]) + }; +} + +#define _(name, from, to, size) \ +auto name(V<from, size> x) { return cvt##size<from, to>(x); } +// integral -> double +_(vcvtudq2pd, uint32_t, double, 4) +_(vcvtudq2pd, uint32_t, double, 8) + +_( cvttps2udq, float, uint32_t, 4) +_(vcvttps2udq, float, uint32_t, 8) + +// double -> integral +_(vcvttpd2udq, double, uint32_t, 4) -- 2.39.1.388.g2fc9e9ca3c