[gcc r15-337] i386: Fix some intrinsics without alignment requirements.
https://gcc.gnu.org/g:5967696c0f6300da4387fea5d102be5bc9f23233 commit r15-337-g5967696c0f6300da4387fea5d102be5bc9f23233 Author: Hu, Lin1 Date: Fri Jan 19 15:22:10 2024 +0800 i386: Fix some intrinsics without alignment requirements. gcc/ChangeLog: PR target/84508 * config/i386/emmintrin.h (_mm_load_sd): Remove alignment requirement. (_mm_store_sd): Ditto. (_mm_loadh_pd): Ditto. (_mm_loadl_pd): Ditto. (_mm_storel_pd): Add alignment requirement. * config/i386/xmmintrin.h (_mm_loadh_pi): Remove alignment requirement. (_mm_loadl_pi): Ditto. (_mm_load_ss): Ditto. (_mm_store_ss): Ditto. gcc/testsuite/ChangeLog: PR target/84508 * gcc.target/i386/pr84508-1.c: New test. * gcc.target/i386/pr84508-2.c: Ditto. Diff: --- gcc/config/i386/emmintrin.h | 11 ++- gcc/config/i386/xmmintrin.h | 9 + gcc/testsuite/gcc.target/i386/pr84508-1.c | 11 +++ gcc/testsuite/gcc.target/i386/pr84508-2.c | 11 +++ 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 915a5234c38c..fa301103daf8 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -56,6 +56,7 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); /* Unaligned version of the same types. */ typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); +typedef double double_u __attribute__ ((__may_alias__, __aligned__ (1))); /* Create a selector for use with the SHUFPD instruction. */ #define _MM_SHUFFLE2(fp1,fp0) \ @@ -145,7 +146,7 @@ _mm_load1_pd (double const *__P) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_load_sd (double const *__P) { - return _mm_set_sd (*__P); + return __extension__ (__m128d) { *(double_u *)__P, 0.0 }; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -180,7 +181,7 @@ _mm_storeu_pd (double *__P, __m128d __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_store_sd (double *__P, __m128d __A) { - *__P = ((__v2df)__A)[0]; + *(double_u *)__P = ((__v2df)__A)[0] ; } extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -192,7 +193,7 @@ _mm_cvtsd_f64 (__m128d __A) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_storel_pd (double *__P, __m128d __A) { - _mm_store_sd (__P, __A); + *__P = ((__v2df)__A)[0]; } /* Stores the upper DPFP value. */ @@ -973,13 +974,13 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pd (__m128d __A, double const *__B) { - return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); + return __extension__ (__m128d) { ((__v2df)__A)[0], *(double_u*)__B }; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pd (__m128d __A, double const *__B) { - return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); + return __extension__ (__m128d) { *(double_u*)__B, ((__v2df)__A)[1] }; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 71b9955b8438..9e20f262839b 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -73,6 +73,7 @@ typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); /* Unaligned version of the same type. */ typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); +typedef float float_u __attribute__ ((__may_alias__, __aligned__ (1))); /* Internal data types for implementing the intrinsics. */ typedef float __v4sf __attribute__ ((__vector_size__ (16))); @@ -774,7 +775,7 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B) /* Sets the upper two SPFP values with 64-bits of data loaded from P; the lower two values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_loadh_pi (__m128 __A, __m64 const *__P) +_mm_loadh_pi (__m128 __A, __m64_u const *__P) { return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P); } @@ -803,7 +804,7 @@ _mm_movelh_ps (__m128 __A, __m128 __B) /* Sets the lower two SPFP values with 64-bits of data loaded from P; the upper two values are passed through from A. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)
[gcc r15-3474] testsuite: Fix xorsign.c, vect-double-2.c fails with -march=x86-64-v2
https://gcc.gnu.org/g:811204f52a111af24ba4b00df9e947a44c4c1161 commit r15-3474-g811204f52a111af24ba4b00df9e947a44c4c1161 Author: Hu, Lin1 Date: Thu Sep 5 14:51:42 2024 +0800 testsuite: Fix xorsign.c, vect-double-2.c fails with -march=x86-64-v2 These testcases raise fails with -march=x86-64-v2, so add -mno-sse4 to avoid these unexpected fails. gcc/testsuite/ChangeLog: PR testsuite/116608 * gcc.target/i386/vect-double-2.c: Add extra option -mno-sse4 * gcc.target/i386/xorsign.c: Ditto. Diff: --- gcc/testsuite/gcc.target/i386/vect-double-2.c | 2 +- gcc/testsuite/gcc.target/i386/xorsign.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/vect-double-2.c b/gcc/testsuite/gcc.target/i386/vect-double-2.c index eea53bfa6b1..065d2e5af08 100644 --- a/gcc/testsuite/gcc.target/i386/vect-double-2.c +++ b/gcc/testsuite/gcc.target/i386/vect-double-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats -mno-sse4" } */ extern void abort (void); diff --git a/gcc/testsuite/gcc.target/i386/xorsign.c b/gcc/testsuite/gcc.target/i386/xorsign.c index ebed5edccb6..f280dd20d7b 100644 --- a/gcc/testsuite/gcc.target/i386/xorsign.c +++ b/gcc/testsuite/gcc.target/i386/xorsign.c @@ -1,5 +1,5 @@ /* { dg-do run { target sse2_runtime } } */ -/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize -fdump-tree-vect-details -save-temps" } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize -fdump-tree-vect-details -save-temps -mno-sse4" } */ extern void abort ();
[gcc r15-1677] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.
https://gcc.gnu.org/g:c320a7efcd35ba6c6be70dc9b2fe562a9673e363 commit r15-1677-gc320a7efcd35ba6c6be70dc9b2fe562a9673e363 Author: Hu, Lin1 Date: Thu Feb 1 15:15:01 2024 +0800 vect: generate suitable convert insn for int -> int, float -> float and int <-> float. gcc/ChangeLog: PR target/107432 * tree-vect-generic.cc (expand_vector_conversion): Support convert for int -> int, float -> float and int <-> float. * tree-vect-stmts.cc (vectorizable_conversion): Wrap the indirect convert part. (supportable_indirect_convert_operation): New function. * tree-vectorizer.h (supportable_indirect_convert_operation): Define the new function. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-1.c: New test. * gcc.target/i386/pr107432-2.c: Ditto. * gcc.target/i386/pr107432-3.c: Ditto. * gcc.target/i386/pr107432-4.c: Ditto. * gcc.target/i386/pr107432-5.c: Ditto. * gcc.target/i386/pr107432-6.c: Ditto. * gcc.target/i386/pr107432-7.c: Ditto. Diff: --- gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 + gcc/testsuite/gcc.target/i386/pr107432-3.c | 55 +++ gcc/testsuite/gcc.target/i386/pr107432-4.c | 56 +++ gcc/testsuite/gcc.target/i386/pr107432-5.c | 72 + gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 + gcc/testsuite/gcc.target/i386/pr107432-7.c | 150 ++ gcc/tree-vect-generic.cc | 29 +++- gcc/tree-vect-stmts.cc | 241 ++--- gcc/tree-vectorizer.h | 4 + 10 files changed, 990 insertions(+), 95 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c new file mode 100644 index 000..a4f37447eb4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c @@ -0,0 +1,234 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */ +/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ + +#include + +typedef short __v2hi __attribute__ ((__vector_size__ (4))); +typedef char __v2qi __attribute__ ((__vector_size__ (2))); +typedef char __v4qi __attribute__ ((__vector_size__ (4))); +typedef char __v8qi __attribute__ ((__vector_size__ (8))); + +typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4))); +typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); +typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4))); +typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8))); + +__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2si); +} + +__m128imm256_cvtepi64_epi32_builtin_convertvector(__m256i a) +{ + return (__m128i)__builtin_convertvector((__v4di)a, __v4si); +} + +__m256imm512_cvtepi64_epi32_builtin_convertvector(__m512i a) +{ + return (__m256i)__builtin_convertvector((__v8di)a, __v8si); +} + +__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2hi); +} + +__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a) +{ + return __builtin_convertvector((__v4di)a, __v4hi); +} + +__m128imm512_cvtepi64_epi16_builtin_convertvector(__m512i a) +{ + return (__m128i)__builtin_convertvector((__v8di)a, __v8hi); +} + +__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2qi); +} + +__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a) +{ + return __builtin_convertvector((__v4di)a, __v4qi); +} + +__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a) +{ + return __builtin_convertvector((__v8di)a, __v8qi); +} + +__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a) +{ + return __builtin_convertvector((__v2si)a, __v2hi); +} + +__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v4si)a, __v4hi); +} + +__m128imm256_cvtepi32_epi16_builtin_convertvector(__m256i a) +{ + return (__m128i)__builtin_convertvector((__v8si)a, __v8hi); +} + +__m256imm5
[gcc r15-1678] vect: Support v4hi -> v4qi.
https://gcc.gnu.org/g:e5f8a39941f6f0f25dac88bd71fd368fb284a10f commit r15-1678-ge5f8a39941f6f0f25dac88bd71fd368fb284a10f Author: Hu, Lin1 Date: Wed Feb 28 18:11:55 2024 +0800 vect: Support v4hi -> v4qi. gcc/ChangeLog: PR target/107432 * config/i386/mmx.md (VI2_32_64): New mode iterator. (mmxhalfmode): New mode atter. (mmxhalfmodelower): Ditto. (truncv2hiv2qi2): Extend mode v4hi and change name from truncv2hiv2qi to trunc2. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-1.c: Modify test. * gcc.target/i386/pr107432-6.c: Add test. * gcc.target/i386/pr108938-3.c: This patch supports truncv4hiv4qi affect bswap optimization, so I added the -mno-avx option for now, and open a bugzilla. Diff: --- gcc/config/i386/mmx.md | 17 + gcc/testsuite/gcc.target/i386/pr107432-1.c | 13 - gcc/testsuite/gcc.target/i386/pr107432-6.c | 29 ++--- gcc/testsuite/gcc.target/i386/pr108938-3.c | 2 +- 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index ea53f516cbb..24c0516726c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -67,6 +67,9 @@ ;; 4-byte integer vector modes (define_mode_iterator VI_32 [V4QI V2HI]) +;; 8-byte and 4-byte HImode vector modes +(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI]) + ;; 4-byte and 2-byte integer vector modes (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) @@ -106,6 +109,12 @@ (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) +(define_mode_attr mmxhalfmode + [(V4HI "V4QI") (V2HI "V2QI")]) + +(define_mode_attr mmxhalfmodelower + [(V4HI "v4qi") (V2HI "v2qi")]) + ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr mmxintvecmode [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI") @@ -4880,10 +4889,10 @@ DONE; }) -(define_insn "truncv2hiv2qi2" - [(set (match_operand:V2QI 0 "register_operand" "=v") - (truncate:V2QI - (match_operand:V2HI 1 "register_operand" "v")))] +(define_insn "trunc2" + [(set (match_operand: 0 "register_operand" "=v") + (truncate: + (match_operand:VI2_32_64 1 "register_operand" "v")))] "TARGET_AVX512VL && TARGET_AVX512BW" "vpmovwb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c index a4f37447eb4..afdf367afe2 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c @@ -7,7 +7,8 @@ /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */ /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */ #include @@ -113,6 +114,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a) return __builtin_convertvector((__v2hi)a, __v2qi); } +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a) +{ + return __builtin_convertvector((__v4hi)a, __v4qi); +} + __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hi)a, __v8qi); @@ -218,6 +224,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a) return __builtin_convertvector((__v2hu)a, __v2qu); } +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a) +{ + return __builtin_convertvector((__v4hu)a, __v4qu); +} + __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a) { return __builtin_convertvector((__v8hu)a, __v8qu); diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c index 4a68a10b089..dd585b2a351 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c @@ -1,18 +1,15 @@ /* { dg-do compile } */ /* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */ -/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */ -/* { dg-final { scan-assembl
[gcc r15-1679] vect: support direct conversion under x86-64-v3.
https://gcc.gnu.org/g:4385dc97b0d28e54541eb2418d6e68fc672441d7 commit r15-1679-g4385dc97b0d28e54541eb2418d6e68fc672441d7 Author: Hu, Lin1 Date: Wed Mar 6 19:58:48 2024 +0800 vect: support direct conversion under x86-64-v3. gcc/ChangeLog: PR target/107432 * config/i386/i386-expand.cc (ix86_expand_trunc_with_avx2_noavx512f): New function for generate a series of suitable insn. * config/i386/i386-protos.h (ix86_expand_trunc_with_avx2_noavx512f): Define new function. * config/i386/sse.md: Extend trunc2 for x86-64-v3. (ssebytemode) Add V8HI. (PMOV_DST_MODE_2_AVX2): New mode iterator. (PMOV_SRC_MODE_3_AVX2): Ditto. * config/i386/mmx.md (trunc2): Ditto. (avx512vl_trunc2): Ditto. (truncv2si2): Ditto. (avx512vl_truncv2si2): Ditto. (mmxbytemode): New mode attr. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-8.c: New test. * gcc.target/i386/pr107432-9.c: Ditto. * gcc.target/i386/pr92645-4.c: Modify test. Diff: --- gcc/config/i386/i386-expand.cc | 44 -- gcc/config/i386/i386-protos.h | 3 + gcc/config/i386/mmx.md | 35 +++- gcc/config/i386/sse.md | 88 +++- gcc/testsuite/gcc.target/i386/pr107432-8.c | 94 + gcc/testsuite/gcc.target/i386/pr107432-9.c | 129 + gcc/testsuite/gcc.target/i386/pr92645-4.c | 2 - 7 files changed, 363 insertions(+), 32 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5dfa7d49f58..eccad080f7c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1898,10 +1898,6 @@ ix86_split_convert_uns_si_sse (rtx operands[]) emit_insn (gen_xorv4si3 (value, value, large)); } -static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, -machine_mode mode, rtx target, -rtx var, int one_var); - /* Convert an unsigned DImode value into a DFmode, using only SSE. Expects the 64-bit DImode to be supplied in a pair of integral registers. Requires SSE2; will use SSE3 if available. For x86_32, @@ -16126,7 +16122,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, whose ONE_VAR element is VAR, and other elements are zero. Return true if successful. */ -static bool +bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, rtx target, rtx var, int one_var) { @@ -26137,4 +26133,42 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx, return target; } +/* Trunc a vector to a narrow vector, like v4di -> v4si. */ + +void +ix86_expand_trunc_with_avx2_noavx512f (rtx output, rtx input, machine_mode cvt_mode) +{ + machine_mode out_mode = GET_MODE (output); + machine_mode in_mode = GET_MODE (input); + int len = GET_MODE_SIZE (in_mode); + gcc_assert (len == GET_MODE_SIZE (cvt_mode) + && GET_MODE_INNER (out_mode) == GET_MODE_INNER (cvt_mode) + && (REG_P (input) || SUBREG_P (input))); + scalar_mode inner_out_mode = GET_MODE_INNER (out_mode); + int in_innersize = GET_MODE_SIZE (GET_MODE_INNER (in_mode)); + int out_innersize = GET_MODE_SIZE (inner_out_mode); + + struct expand_vec_perm_d d; + d.target = gen_reg_rtx (cvt_mode); + d.op0 = lowpart_subreg (cvt_mode, force_reg(in_mode, input), in_mode); + d.op1 = d.op0; + d.vmode = cvt_mode; + d.nelt = GET_MODE_NUNITS (cvt_mode); + d.testing_p = false; + d.one_operand_p = true; + + /* Init perm. Put the needed bits of input in order and + fill the rest of bits by default. */ + for (int i = 0; i < d.nelt; ++i) +{ + d.perm[i] = i; + if (i < GET_MODE_NUNITS (out_mode)) + d.perm[i] = i * (in_innersize / out_innersize); +} + + bool ok = ix86_expand_vec_perm_const_1(&d); + gcc_assert (ok); + emit_move_insn (output, gen_lowpart (out_mode, d.target)); +} + #include "gt-i386-expand.h" diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 4f48dc0bf75..1a76090b9da 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -248,6 +248,7 @@ extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, enum rtx_code, extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx, enum rtx_code, tree, tree, enum rtx_code); extern int ix86_get_flags_cc (enum rtx_code); +extern void ix86_expand_trunc_with_avx2_noavx512f (rtx, rtx, machine_mode); extern rtx ix86_memtag_untagged_pointer (rtx, rtx); extern bool ix86_memtag_can_tag_addresses (void); @@ -298,6 +299,8 @@ extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx,
[gcc r15-1680] i386: Refactor vcvttps2qq/vcvtqq2ps patterns.
https://gcc.gnu.org/g:94495247341bc05b77536271fe3dd789dad62624 commit r15-1680-g94495247341bc05b77536271fe3dd789dad62624 Author: Hu, Lin1 Date: Tue Jun 25 18:25:59 2024 +0800 i386: Refactor vcvttps2qq/vcvtqq2ps patterns. Refactor vcvttps2qq/vcvtqq2ps patterns for remove redundant round_*_modev8sf_condition. gcc/ChangeLog: * config/i386/sse.md (float2 ): Refactor the pattern. (unspec_fix_trunc2 ): Ditto. (fix_trunc2 ): Ditto. * config/i386/subst.md (round_modev8sf_condition): Remove. (round_saeonly_modev8sf_condition): Ditto. Diff: --- gcc/config/i386/sse.md | 51 +--- gcc/config/i386/subst.md | 2 -- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 56ee7119e7c..a94ec3c441f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1158,6 +1158,9 @@ (define_mode_attr ssePSmode2 [(V8DI "V8SF") (V4DI "V4SF")]) +(define_mode_attr ssePSmode2lower + [(V8DI "v8sf") (V4DI "v4sf")]) + ;; Mapping of vector modes back to the scalar modes (define_mode_attr ssescalarmode [(V64QI "QI") (V32QI "QI") (V16QI "QI") @@ -8862,27 +8865,17 @@ ;; For float insn patterns (define_mode_attr qq2pssuff - [(V8SF "") (V4SF "{y}")]) - -(define_mode_attr sselongvecmode - [(V8SF "V8DI") (V4SF "V4DI")]) - -(define_mode_attr sselongvecmodelower - [(V8SF "v8di") (V4SF "v4di")]) - -(define_mode_attr sseintvecmode3 - [(V8SF "XI") (V4SF "OI") - (V8DF "OI") (V4DF "TI")]) + [(V8DI "") (V4DI "{y}")]) -(define_insn "float2" - [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v") -(any_float:VF1_128_256VL - (match_operand: 1 "nonimmediate_operand" "")))] - "TARGET_AVX512DQ && " +(define_insn "float2" + [(set (match_operand: 0 "register_operand" "=v") +(any_float: + (match_operand:VI8_256_512 1 "nonimmediate_operand" "")))] + "TARGET_AVX512DQ && " "vcvtqq2ps\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) (define_expand "avx512dq_floatv2div2sf2" [(set (match_operand:V4SF 0 "register_operand" "=v") @@ -9417,26 +9410,26 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "unspec_fix_trunc2" - [(set (match_operand: 0 "register_operand" "=v") - (unspec: - [(match_operand:VF1_128_256VL 1 "" "")] +(define_insn "unspec_fix_trunc2" + [(set (match_operand:VI8_256_512 0 "register_operand" "=v") + (unspec:VI8_256_512 + [(match_operand: 1 "" "")] UNSPEC_VCVTT_U))] - "TARGET_AVX512DQ && " + "TARGET_AVX512DQ && " "vcvttps2qq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) -(define_insn "fix_trunc2" - [(set (match_operand: 0 "register_operand" "=v") - (any_fix: - (match_operand:VF1_128_256VL 1 "" "")))] - "TARGET_AVX512DQ && " +(define_insn "fix_trunc2" + [(set (match_operand:VI8_256_512 0 "register_operand" "=v") + (any_fix:VI8_256_512 + (match_operand: 1 "" "")))] + "TARGET_AVX512DQ && " "vcvttps2qq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") - (set_attr "mode" "")]) + (set_attr "mode" "")]) (define_insn "unspec_avx512dq_fix_truncv2sfv2di2" [(set (match_operand:V2DI 0 "register_operand" "=v") diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 7a9b697e0f6..40fb92094d2 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -211,7 +211,6 @@ || mode == V16SImode || mode == V32HFmode)") -(define_subst_attr "round_modev8sf_condition" "round" "1" "(mode == V8SFmode)") (define_subst_attr "round_modev4sf_condition" "round" "1" "(mode == V4SFmode)") (define_subst_attr "round_codefor" "round" "*" "") (define_subst_attr "round_opnum" "round" "5" "6") @@ -257,7 +256,6 @@ || mode == V16SImode || mode == V32HFmode)") -(define_subst_attr "round_saeonly_modev8sf_condition" "round_saeonly" "1" "(mode == V8SFmode)") (define_subst "round_saeonly" [(set (match_operand:SUBST_A 0)
[gcc r15-1830] vect: Fix ICE caused by missing check for TREE_CODE == SSA_NAME
https://gcc.gnu.org/g:d1eeafe40f263acdb5eb1b57f777e064a11ced2b commit r15-1830-gd1eeafe40f263acdb5eb1b57f777e064a11ced2b Author: Hu, Lin1 Date: Wed Jul 3 10:07:02 2024 +0800 vect: Fix ICE caused by missing check for TREE_CODE == SSA_NAME Need to check if the tree's code is SSA_NAME before SSA_NAME_RANGE_INFO. 2024-07-03 Hu, Lin1 Andrew Pinski gcc/ChangeLog: PR tree-optimization/115753 * tree-vect-stmts.cc (supportable_indirect_convert_operation): Add TYPE_CODE check before SSA_NAME_RANGE_INFO. gcc/testsuite/ChangeLog: PR tree-optimization/115753 * gcc.dg/vect/pr115753-1.c: New test. * gcc.dg/vect/pr115753-2.c: Ditto. * gcc.dg/vect/pr115753-3.c: Ditto. Diff: --- gcc/testsuite/gcc.dg/vect/pr115753-1.c | 12 gcc/testsuite/gcc.dg/vect/pr115753-2.c | 20 gcc/testsuite/gcc.dg/vect/pr115753-3.c | 15 +++ gcc/tree-vect-stmts.cc | 2 +- 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-1.c b/gcc/testsuite/gcc.dg/vect/pr115753-1.c new file mode 100644 index 000..2c1b6e5df63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115753-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math" } */ +/* { dg-add-options float16 } */ +/* { dg-require-effective-target float16 } */ + +void f(_Complex _Float16*); +void +foo1 (_Complex _Float16 *d) +{ +_Complex _Float16 cf = 3967 + 3791 * 1i; +f(&cf); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-2.c b/gcc/testsuite/gcc.dg/vect/pr115753-2.c new file mode 100644 index 000..ceacada2a76 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115753-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math" } */ +/* { dg-add-options float16 } */ +/* { dg-require-effective-target float16 } */ + +void f(_Float16*); +void +foo1 () +{ + int t0 = 3967; + int t1 = 3969; + int t2 = 3971; + int t3 = 3973; + _Float16 tt[4]; + tt[0] = t0; + tt[1] = t1; + tt[2] = t2; + tt[3] = t3; + f(&tt[0]); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-3.c b/gcc/testsuite/gcc.dg/vect/pr115753-3.c new file mode 100644 index 000..8e95445897c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115753-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math" } */ + +void f(float*); +void +foo1 () +{ + long long t0 = __LONG_LONG_MAX__; + long long t1 = __LONG_LONG_MAX__ - 1; + float tt[2]; + tt[0] = t0; + tt[1] = t1; + f(&tt[0]); +} + diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 156c11fee82..fdcda0d2aba 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -14680,7 +14680,7 @@ supportable_indirect_convert_operation (code_helper code, In the future, if it is supported, changes may need to be made to this part, such as checking the RANGE of each element in the vector. */ - if (!SSA_NAME_RANGE_INFO (op0) + if ((TREE_CODE (op0) == SSA_NAME && !SSA_NAME_RANGE_INFO (op0)) || !vect_get_range_info (op0, &op_min_value, &op_max_value)) break;
[gcc r15-1853] i386: Refactor ssedoublemode
https://gcc.gnu.org/g:319d3956b16b1270f27e9cbf749e881c4ff7dfb4 commit r15-1853-g319d3956b16b1270f27e9cbf749e881c4ff7dfb4 Author: Hu, Lin1 Date: Thu Jul 4 11:18:46 2024 +0800 i386: Refactor ssedoublemode ssedoublemode's double should mean double type, like SI -> DI. And we need to refactor some patterns with instead of . gcc/ChangeLog: * config/i386/sse.md (ssedoublemode): Remove mappings to twice the number of same-sized elements. Add mappings to the same number of double-sized elements. (define_split for vec_concat_minus_plus): Change mode_attr from ssedoublemode to ssedoublevecmode. (define_split for vec_concat_plus_minus): Ditto. (avx512dq_shuf_64x2_1): Ditto. (avx512f_shuf_64x2_1): Ditto. (avx512vl_shuf_32x4_1): Ditto. (avx512f_shuf_32x4_1): Ditto. Diff: --- gcc/config/i386/sse.md | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d71b0f2567e..bda66d5e121 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -808,13 +808,12 @@ (V8HI "v8si") (V16HI "v16si") (V32HI "v32si") (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")]) +;; Map vector mode to the same number of double sized elements. (define_mode_attr ssedoublemode - [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF") - (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF") + [(V4SF "V4DF") (V8SF "V8DF") (V16SF "V16DF") (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI") - (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI") - (V4DI "V8DI") (V8DI "V16DI")]) + (V4SI "V4DI") (V8SI "V8DI") (V16SI "V16DI")]) (define_mode_attr ssebytemode [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI") @@ -3319,7 +3318,7 @@ (define_split [(set (match_operand:VF_128_256 0 "register_operand") (match_operator:VF_128_256 7 "addsub_vs_operator" - [(vec_concat: + [(vec_concat: (minus:VF_128_256 (match_operand:VF_128_256 1 "register_operand") (match_operand:VF_128_256 2 "vector_operand")) @@ -3353,7 +3352,7 @@ (define_split [(set (match_operand:VF_128_256 0 "register_operand") (match_operator:VF_128_256 7 "addsub_vs_operator" - [(vec_concat: + [(vec_concat: (plus:VF_128_256 (match_operand:VF_128_256 1 "vector_operand") (match_operand:VF_128_256 2 "vector_operand")) @@ -19869,7 +19868,7 @@ (define_insn "avx512dq_shuf_64x2_1" [(set (match_operand:VI8F_256 0 "register_operand" "=x,v") (vec_select:VI8F_256 - (vec_concat: + (vec_concat: (match_operand:VI8F_256 1 "register_operand" "x,v") (match_operand:VI8F_256 2 "nonimmediate_operand" "xjm,vm")) (parallel [(match_operand 3 "const_0_to_3_operand") @@ -19922,7 +19921,7 @@ (define_insn "avx512f_shuf_64x2_1" [(set (match_operand:V8FI 0 "register_operand" "=v") (vec_select:V8FI - (vec_concat: + (vec_concat: (match_operand:V8FI 1 "register_operand" "v") (match_operand:V8FI 2 "nonimmediate_operand" "vm")) (parallel [(match_operand 3 "const_0_to_7_operand") @@ -20020,7 +20019,7 @@ (define_insn "avx512vl_shuf_32x4_1" [(set (match_operand:VI4F_256 0 "register_operand" "=x,v") (vec_select:VI4F_256 - (vec_concat: + (vec_concat: (match_operand:VI4F_256 1 "register_operand" "x,v") (match_operand:VI4F_256 2 "nonimmediate_operand" "xjm,vm")) (parallel [(match_operand 3 "const_0_to_7_operand") @@ -20091,7 +20090,7 @@ (define_insn "avx512f_shuf_32x4_1" [(set (match_operand:V16FI 0 "register_operand" "=v") (vec_select:V16FI - (vec_concat: + (vec_concat: (match_operand:V16FI 1 "register_operand" "v") (match_operand:V16FI 2 "nonimmediate_operand" "vm")) (parallel [(match_operand 3 "const_0_to_15_operand")
[gcc r15-2052] i386: extend trunc{128}2{16,32,64}'s scope.
https://gcc.gnu.org/g:a902e35396d68f10bd27477153fafa4f5ac9c319 commit r15-2052-ga902e35396d68f10bd27477153fafa4f5ac9c319 Author: Hu, Lin1 Date: Thu Jul 11 15:03:22 2024 +0800 i386: extend trunc{128}2{16,32,64}'s scope. Based on actual usage, trunc{128}2{16,32,64} use some instructions from sse/sse3, so extend their scope to extend the scope of optimization. gcc/ChangeLog: PR target/107432 * config/i386/sse.md (PMOV_SRC_MODE_3_AVX2): Add TARGET_AVX2 for V4DI and V8SI. (PMOV_SRC_MODE_4): Add TARGET_AVX2 for V4DI. (trunc2): Change constraint from TARGET_AVX2 to TARGET_SSSE3. (trunc2): Ditto. (truncv2div2si2): Change constraint from TARGET_AVX2 to TARGET_SSE. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-10.c: New test. Diff: --- gcc/config/i386/sse.md | 11 gcc/testsuite/gcc.target/i386/pr107432-10.c | 41 + 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c134494cd200..e44822f705b4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15000,7 +15000,8 @@ "TARGET_AVX512VL") (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")]) -(define_mode_iterator PMOV_SRC_MODE_3_AVX2 [V4DI V2DI V8SI V4SI V8HI]) +(define_mode_iterator PMOV_SRC_MODE_3_AVX2 + [(V4DI "TARGET_AVX2") V2DI (V8SI "TARGET_AVX2") V4SI V8HI]) (define_mode_attr pmov_dst_3_lower [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")]) (define_mode_attr pmov_dst_3 @@ -15014,7 +15015,7 @@ [(set (match_operand: 0 "register_operand") (truncate: (match_operand:PMOV_SRC_MODE_3_AVX2 1 "register_operand")))] - "TARGET_AVX2" + "TARGET_SSSE3" { if (TARGET_AVX512VL && (mode != V8HImode || TARGET_AVX512BW)) @@ -15390,7 +15391,7 @@ (match_dup 2)))] "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") -(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) +(define_mode_iterator PMOV_SRC_MODE_4 [(V4DI "TARGET_AVX2") V2DI V4SI]) (define_mode_attr pmov_dst_4 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")]) (define_mode_attr pmov_dst_4_lower @@ -15404,7 +15405,7 @@ [(set (match_operand: 0 "register_operand") (truncate: (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))] - "TARGET_AVX2" + "TARGET_SSSE3" { if (TARGET_AVX512VL) { @@ -15659,7 +15660,7 @@ [(set (match_operand:V2SI 0 "register_operand") (truncate:V2SI (match_operand:V2DI 1 "register_operand")))] - "TARGET_AVX2" + "TARGET_SSE" { if (TARGET_AVX512VL) { diff --git a/gcc/testsuite/gcc.target/i386/pr107432-10.c b/gcc/testsuite/gcc.target/i386/pr107432-10.c new file mode 100644 index ..57edf7cfc781 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107432-10.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v2 -O2" } */ +/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-times "pshufb" 5 } } */ + +#include + +typedef short __v2hi __attribute__ ((__vector_size__ (4))); +typedef char __v2qi __attribute__ ((__vector_size__ (2))); +typedef char __v4qi __attribute__ ((__vector_size__ (4))); +typedef char __v8qi __attribute__ ((__vector_size__ (8))); + +__v2si mm_cvtepi64_epi32_builtin_convertvector(__v2di a) +{ + return __builtin_convertvector((__v2di)a, __v2si); +} + +__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2hi); +} + +__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v4si)a, __v4hi); +} + +__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v2di)a, __v2qi); +} + +__v4qi mm_cvtepi32_epi8_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v4si)a, __v4qi); +} + +__v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a) +{ + return __builtin_convertvector((__v8hi)a, __v8qi); +}
[gcc r15-974] i386: Optimize EQ/NE comparison between avx512 kmask and -1.
https://gcc.gnu.org/g:bf7745f887c765e06f2e75508f263debb60aeb2e commit r15-974-gbf7745f887c765e06f2e75508f263debb60aeb2e Author: Hu, Lin1 Date: Thu May 9 09:29:07 2024 +0800 i386: Optimize EQ/NE comparison between avx512 kmask and -1. Acheive EQ/NE comparison between avx512 kmask and -1 by using kxortest with checking CF. gcc/ChangeLog: PR target/113609 * config/i386/sse.md (*kortest_cmp_setcc): New define_insn_and_split. (*kortest_cmp_jcc): Ditto. gcc/testsuite/ChangeLog: PR target/113609 * gcc.target/i386/pr113609-1.c: New test. * gcc.target/i386/pr113609-2.c: Ditto. Diff: --- gcc/config/i386/sse.md | 67 ++ gcc/testsuite/gcc.target/i386/pr113609-1.c | 194 + gcc/testsuite/gcc.target/i386/pr113609-2.c | 161 3 files changed, 422 insertions(+) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7cd912eeeb1..a5a7347f23a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2201,6 +2201,73 @@ UNSPEC_KORTEST))] "TARGET_AVX512F") +;; Optimize cmp + setcc with mask register by kortest + setcc. +(define_insn_and_split "*kortest_cmp_setcc" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm") +(match_operator:QI 1 "bt_comparison_operator" + [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k, ") +(const_int -1)])) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (MASK_REGNO_P (REGNO (operands[2]))) +{ + emit_insn (gen_kortest_ccc (operands[2], operands[2])); + operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG); +} + else +{ + operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG); + emit_insn (gen_rtx_SET (operands[4], + gen_rtx_COMPARE (CCZmode, + operands[2], + constm1_rtx))); +} + ix86_expand_setcc (operands[0], +GET_CODE (operands[1]), +operands[4], +const0_rtx); + DONE; +}) + +;; Optimize cmp + jcc with mask register by kortest + jcc. +(define_insn_and_split "*kortest_cmp_jcc" + [(set (pc) + (if_then_else + (match_operator 0 "bt_comparison_operator" + [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, ") + (const_int -1)]) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (MASK_REGNO_P (REGNO (operands[1]))) +{ + emit_insn (gen_kortest_ccc (operands[1], operands[1])); + operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG); +} + else +{ + operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG); + emit_insn (gen_rtx_SET (operands[4], + gen_rtx_COMPARE (CCZmode, + operands[1], + constm1_rtx))); +} + ix86_expand_branch (GET_CODE (operands[0]), + operands[4], + const0_rtx, + operands[2]); + DONE; +}) + (define_insn "kunpckhi" [(set (match_operand:HI 0 "register_operand" "=k") (ior:HI diff --git a/gcc/testsuite/gcc.target/i386/pr113609-1.c b/gcc/testsuite/gcc.target/i386/pr113609-1.c new file mode 100644 index 000..f0639b8500a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113609-1.c @@ -0,0 +1,194 @@ +/* PR target/113609 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v4" } */ +/* { dg-final { scan-assembler-not "^cmp" } } */ +/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */ + +#include + +unsigned int +cmp_vector_sete_mask8(__m128i a, __m128i b) +{ +__mmask8 k = _mm_cmpeq_epi16_mask (a, b); +if (k == (__mmask8) -1) + return 1; +else + return 0; +} + +unsigned int +cmp_vector_sete_mask16(__m128i a, __m128i b) +{ +__mmask16 k = _mm_cmpeq_epi8_mask (a, b); +if (k
[gcc r15-1370] i386: Refine all cvtt* instructions with UNSPEC instead of FIX/UNSIGNED_FIX.
https://gcc.gnu.org/g:b5d3ad256afdfd891d37d8fdb126d599f150e78b commit r15-1370-gb5d3ad256afdfd891d37d8fdb126d599f150e78b Author: Hu, Lin1 Date: Wed Jun 12 16:25:34 2024 +0800 i386: Refine all cvtt* instructions with UNSPEC instead of FIX/UNSIGNED_FIX. gcc/ChangeLog: PR target/115161 * config/i386/i386-builtin.def: Change CODE_FOR_* for cvtt*'s builtins. * config/i386/sse.md: (unspec_avx512fp16_fix _trunc2): Use UNSPEC instead of FIX/UNSIGNED_FIX. (unspec_avx512fp16_fix_trunc2): Ditto. (unspec_avx512fp16_fix_truncv2di2): Ditto. (unspec_avx512fp16_fix_trunc2): Ditto. (unspec_sse_cvttps2pi): Ditto. (unspec_sse_cvttss2si): Ditto. (unspec_fix_truncv16sfv16si2): Ditto. (unspec_fix_truncv8sfv8si2): Ditto. (unspec_fix_truncv4sfv4si2): Ditto. (unspec_sse2_cvttpd2pi): Ditto. (unspec_fixuns_truncv2dfv2si2): Ditto. (unspec_avx512f_vcvttss2usi): Ditto. (unspec_avx512f_vcvttsd2usi): Ditto. (unspec_sse2_cvttsd2si): Ditto. (unspec_fix_truncv8dfv8si2): Ditto. (*unspec_fixuns_truncv2dfv2si2): Ditto. (unspec_fixuns_truncv2dfv2si2_mask): Ditto. (unspec_fix_truncv4dfv4si2): Ditto. (unspec_fixuns_truncv4dfv4si2): Ditto. (unspec_fix _trunc2): Ditto. (unspec_fix _trunc2): Ditto. (unspec_avx512dq_fix_truncv2sfv2di2): Ditto. (unspec_fixuns_trunc2): Ditto. (unspec_sse2_cvttpd2dq): Ditto. gcc/testsuite/ChangeLog: PR target/115161 * gcc.target/i386/pr115161-1.c: New test. Diff: --- gcc/config/i386/i386-builtin.def | 128 +-- gcc/config/i386/sse.md | 335 + gcc/testsuite/gcc.target/i386/pr115161-1.c | 65 ++ 3 files changed, 464 insertions(+), 64 deletions(-) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index a28c48c75668..edb1d2f11b22 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -635,9 +635,9 @@ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF) +BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, 0, CODE_FOR_unspec_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF) +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_unspec_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF) +BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) @@ -729,19 +729,19 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2p BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF) BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF) BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF) -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF) -BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF) +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_unspec_sse2_cvttpd2dq
[gcc r15-1389] i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx
https://gcc.gnu.org/g:7c6f79eea9febce3b21c5783bac9b0a36e08f003 commit r15-1389-g7c6f79eea9febce3b21c5783bac9b0a36e08f003 Author: Hu, Lin1 Date: Wed Mar 20 16:01:45 2024 +0800 i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx gcc/ChangeLog: * config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h, and move macros to xmmintrin.h * config/i386/emmintrin.h: Add cmp[p|s]s intrins. * config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d]. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Raise error when imm is in range of [8, 32] without avx. * config/i386/predicates.md (cmpps_imm_operand): New predicate. * config/i386/sse.md (avx_cmp3): Modefy define_insn. (avx_vmcmp3): Ditto. * config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2. (_CMP_LT_OS): Ditto (_CMP_LE_OS): Ditto (_CMP_UNORD_Q): Ditto (_CMP_NEQ_UQ): Ditto (_CMP_NLT_US): Ditto (_CMP_NLE_US): Ditto (_CMP_ORD_Q): Ditto (_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h (_mm_cmp_ss): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/sse-cmp-1.c: New test. * gcc.target/i386/sse-cmp-2.c: Ditto. * gcc.target/i386/sse-cmp-error.c: Ditto. Diff: --- gcc/config/i386/avxintrin.h | 56 gcc/config/i386/emmintrin.h | 22 ++ gcc/config/i386/i386-builtin.def | 10 +-- gcc/config/i386/i386-expand.cc| 6 ++ gcc/config/i386/predicates.md | 5 ++ gcc/config/i386/sse.md| 42 +++- gcc/config/i386/xmmintrin.h | 41 gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 ++ gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++ gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 + 10 files changed, 236 insertions(+), 78 deletions(-) diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 802145408881..ec9b9905b5f6 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ (32), /* Compare predicates for scalar and packed compare intrinsics. */ -/* Equal (ordered, non-signaling) */ -#define _CMP_EQ_OQ 0x00 -/* Less-than (ordered, signaling) */ -#define _CMP_LT_OS 0x01 -/* Less-than-or-equal (ordered, signaling) */ -#define _CMP_LE_OS 0x02 -/* Unordered (non-signaling) */ -#define _CMP_UNORD_Q 0x03 -/* Not-equal (unordered, non-signaling) */ -#define _CMP_NEQ_UQ0x04 -/* Not-less-than (unordered, signaling) */ -#define _CMP_NLT_US0x05 -/* Not-less-than-or-equal (unordered, signaling) */ -#define _CMP_NLE_US0x06 -/* Ordered (nonsignaling) */ -#define _CMP_ORD_Q 0x07 /* Equal (unordered, non-signaling) */ #define _CMP_EQ_UQ 0x08 /* Not-greater-than-or-equal (unordered, signaling) */ @@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B) } #ifdef __OPTIMIZE__ -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P) -{ - return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P) -{ - return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P); -} - extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P) { @@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P) return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y, __P); } - -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P) -{ - return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P); -} - -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P) -{ - return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P); -} #else -#define _mm_cmp_pd(X, Y, P)\ - ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P))) - -#define _mm_cmp_ps(X, Y, P)\ - ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P))) - #define _mm
[gcc r15-4245] i386: Fix some patterns's mem attribute.
https://gcc.gnu.org/g:9f2f108a8a68c7b7b2de5350439a8ab8e17a54da commit r15-4245-g9f2f108a8a68c7b7b2de5350439a8ab8e17a54da Author: Hu, Lin1 Date: Wed Oct 9 10:20:05 2024 +0800 i386: Fix some patterns's mem attribute. Hi, all This is another patch to modify some pattern's type attr from ssemov to ssemov2. Some ssemov pattern's mem attr should be load when their 2 operand is a memory operand. Bootstrapped and regtested on x86-64-linux-pc, OK for trunk? BRs, Lin gcc/ChangeLog: * config/i386/sse.md (sse_movhlps): Change type attr from ssemov to ssemov2. (sse_loadhps): Ditto. (*vec_concat): Ditto. (vec_setv2df_0): Ditto. (sse_loadlps): Change attr from ssemov to ssemov2 except for 2, 3. (sse2_loadhps): Change attr from ssemov to ssemov2 except for 0, 1. (sse2_loadlpd): Change attr from ssemov to ssemov2 except for 0, 1, 2. (sse2_movsd_): Change attr from ssemov to ssemov2 except for 5. (vec_concatv2df): Change attr from ssemov to ssemov2 except for 0, 1, 2. (*vec_concat): Change attr from ssemov to ssemov2 for 3, 4. (vec_concatv2di): Change attr from ssemov to ssemov2 except for 0, 1, 2, 3, 4, 5. Diff: --- gcc/config/i386/sse.md | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ccef3e063eca..a45b50ad7324 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10995,7 +10995,7 @@ vmovlps\t{%H2, %1, %0|%0, %1, %H2} %vmovhps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) @@ -11557,7 +11557,7 @@ vmovlhps\t{%2, %1, %0|%0, %1, %2} %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) @@ -11610,7 +11610,7 @@ vmovlps\t{%2, %1, %0|%0, %1, %q2} %vmovlps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") - (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") + (set_attr "type" "sseshuf,sseshuf,ssemov2,ssemov2,ssemov") (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "0,1") (const_string "1") @@ -11766,7 +11766,7 @@ movhps\t{%2, %0|%0, %q2} vmovhps\t{%2, %1, %0|%0, %1, %q2}" [(set_attr "isa" "noavx,avx,noavx,avx") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) @@ -12214,7 +12214,7 @@ movlpd\t{%2, %0|%0, %2} vmovlpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx,noavx,avx") - (set_attr "type" "ssemov") + (set_attr "type" "ssemov2") (set_attr "mode" "DF")]) (define_expand "vec_set" @@ -14665,7 +14665,7 @@ # #" [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") - (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") + (set_attr "type" "ssemov2,ssemov2,sselog,sselog,ssemov,fmov,imov") (set (attr "prefix_data16") (if_then_else (eq_attr "alternative" "0") (const_string "1") @@ -14735,6 +14735,8 @@ (const_string "fmov") (eq_attr "alternative" "10") (const_string "imov") + (eq_attr "alternative" "0,1,2") + (const_string "ssemov2") ] (const_string "ssemov"))) (set (attr "prefix_data16") @@ -14787,7 +14789,7 @@ (if_then_else (eq_attr "alternative" "5") (const_string "sselog") - (const_string "ssemov"))) + (const_string "ssemov2"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "alternative" "2,4") @@ -14859,7 +14861,7 @@ (if_then_else (eq_attr "alternative" "0,1,2") (const_string "sselog") - (const_string "ssemov"))) + (const_string "ssemov2"))) (set (attr "prefix_data16") (if_then_else (eq_attr "alternative" "3") (const_string "1") @@ -21545,7 +21547,7 @@ movhps\t{%2, %0|%0, %q2} vmovhps\t{%2, %1, %0|%0, %1, %q2}" [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") - (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") + (set_attr "type" "sselog,sselog,ssemov,ssemov2,ssemov2") (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex") (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) @@ -21653,7 +21655,7 @@ (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") (const_string "sselog") - (const_string "ssemov")))
[gcc r15-4952] i386: Handling exception input of __builtin_ia32_prefetch. [PR117416]
https://gcc.gnu.org/g:ea46a216d48597b220ae69e79f6513c763f953be commit r15-4952-gea46a216d48597b220ae69e79f6513c763f953be Author: Hu, Lin1 Date: Mon Nov 4 14:52:56 2024 +0800 i386: Handling exception input of __builtin_ia32_prefetch. [PR117416] op1 should be between 0 and 2. Add an error handler, and op3 should be 0 or 1, raise a warning, when op3 is an invalid value. gcc/ChangeLog: PR target/117416 * config/i386/i386-expand.cc (ix86_expand_builtin): Raise warning when op1 isn't in range of [0, 2] and set op1 as const0_rtx, and raise warning when op3 isn't in range of [0, 1]. gcc/testsuite/ChangeLog: PR target/117416 * gcc.target/i386/pr117416-1.c: New test. * gcc.target/i386/pr117416-2.c: Ditto. Diff: --- gcc/config/i386/i386-expand.cc | 11 +++ gcc/testsuite/gcc.target/i386/pr117416-1.c | 13 + gcc/testsuite/gcc.target/i386/pr117416-2.c | 13 + 3 files changed, 37 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 6eef27f3fcda..ff07ab40848e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -14202,6 +14202,13 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, return const0_rtx; } + if (!IN_RANGE (INTVAL (op1), 0, 2)) + { + warning (0, "invalid second argument to" +" %<__builtin_ia32_prefetch%>; using zero"); + op1 = const0_rtx; + } + if (INTVAL (op3) == 1) { if (INTVAL (op2) < 2 || INTVAL (op2) > 3) @@ -14224,6 +14231,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, } else { + if (INTVAL (op3) != 0) + warning (0, "invalid forth argument to" + " %<__builtin_ia32_prefetch%>; using zero"); + if (!address_operand (op0, VOIDmode)) { op0 = convert_memory_address (Pmode, op0); diff --git a/gcc/testsuite/gcc.target/i386/pr117416-1.c b/gcc/testsuite/gcc.target/i386/pr117416-1.c new file mode 100644 index ..65788f268d9d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117416-1.c @@ -0,0 +1,13 @@ +/* PR target/117416 */ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +#include + +void* p; + +void extern +prefetch_test (void) +{ + __builtin_ia32_prefetch (p, 5, 0, 0); /* { dg-warning "invalid second argument to '__builtin_ia32_prefetch'; using zero" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/pr117416-2.c b/gcc/testsuite/gcc.target/i386/pr117416-2.c new file mode 100644 index ..07799e36cfe1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117416-2.c @@ -0,0 +1,13 @@ +/* PR target/117416 */ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +#include + +void* p; + +void extern +prefetch_test (void) +{ + __builtin_ia32_prefetch (p, 0, 0, 2); /* { dg-warning "invalid forth argument to '__builtin_ia32_prefetch'; using zero" } */ +}
[gcc r15-4973] i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions.
https://gcc.gnu.org/g:8ac694ae67e24a798dce368587bed4c40b90fbc0 commit r15-4973-g8ac694ae67e24a798dce368587bed4c40b90fbc0 Author: Hu, Lin1 Date: Tue Nov 5 15:49:57 2024 +0800 i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions. gcc/ChangeLog: PR target/117304 * config/i386/i386-builtin.def: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 512-bits instructions. gcc/testsuite/ChangeLog: PR target/117304 * gcc.target/i386/pr117304-1.c: New test. Diff: --- gcc/config/i386/i386-builtin.def | 10 +- gcc/testsuite/gcc.target/i386/pr117304-1.c | 28 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index c484e6dc29e4..26c23780b1c6 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3357,11 +3357,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c new file mode 100644 index ..fc1c5bfd3e35 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c @@ -0,0 +1,28 @@ +/* PR target/117304 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mno-evex512" } */ + +typedef __attribute__((__vector_size__(32))) int __v8si; +typedef __attribute__((__vector_size__(32))) unsigned int __v8su; +typedef __attribute__((__vector_size__(64))) double __v8df; +typedef __attribute__((__vector_size__(64))) int __v16si; +typedef __attribute__((__vector_size__(64))) unsigned int __v16su; +typedef __attribute__((__vector_size__(64))) float __v16sf; +typedef float __m512 __attribute__ ((__vector_size__ (6
[gcc r15-5184] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]
https://gcc.gnu.org/g:2272cd2508f1854c880082f792de15e76ec09a99 commit r15-5184-g2272cd2508f1854c880082f792de15e76ec09a99 Author: Hu, Lin1 Date: Wed Nov 6 15:42:13 2024 +0800 i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418] -maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to 64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. gcc/ChangeLog: PR target/117418 * config/i386/i386-expand.cc (ix86_expand_builtin): Convert pointer's mode according to Pmode. gcc/testsuite/ChangeLog: PR target/117418 * gcc.target/i386/pr117418-1.c: New test. Diff: --- gcc/config/i386/i386-expand.cc | 12 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 2 files changed, 36 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5c4a8e07d621..a6e6e738a524 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -14064,6 +14064,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = expand_normal (arg1); op2 = expand_normal (arg2); + if (GET_MODE (op1) != Pmode) + op1 = convert_to_mode (Pmode, op1, 1); + if (!address_operand (op2, VOIDmode)) { op2 = convert_memory_address (Pmode, op2); @@ -14099,6 +14102,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_label (ok_label); emit_insn (gen_rtx_SET (target, pat)); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + for (i = 0; i < 8; i++) { op = gen_rtx_MEM (V2DImode, @@ -14123,6 +14129,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op2) != Pmode) + op2 = convert_to_mode (Pmode, op2, 1); + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); @@ -14160,6 +14169,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op3) != Pmode) + op3 = convert_to_mode (Pmode, op3, 1); + /* Force to use xmm0, xmm1 for keylow, keyhi*/ op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c new file mode 100644 index ..4839b139b79a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c @@ -0,0 +1,24 @@ +/* PR target/117418 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ + +typedef __attribute__((__vector_size__(16))) long long V; +V a; + +void +foo() +{ +__builtin_ia32_aesdec128kl_u8 (&a, a, &a); +__builtin_ia32_aesdec256kl_u8 (&a, a, &a); +__builtin_ia32_aesenc128kl_u8 (&a, a, &a); +__builtin_ia32_aesenc256kl_u8 (&a, a, &a); +__builtin_ia32_encodekey128_u32 (0, a, &a); +__builtin_ia32_encodekey256_u32 (0, a, a, &a); +}
[gcc r13-9183] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]
https://gcc.gnu.org/g:4758f8d410e961b09c8be619d6d0a71d5e7e4aa5 commit r13-9183-g4758f8d410e961b09c8be619d6d0a71d5e7e4aa5 Author: Hu, Lin1 Date: Wed Nov 6 15:42:13 2024 +0800 i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418] -maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to 64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. gcc/ChangeLog: PR target/117418 * config/i386/i386-expand.cc (ix86_expand_builtin): Convert pointer's mode according to Pmode. gcc/testsuite/ChangeLog: PR target/117418 * gcc.target/i386/pr117418-1.c: New test. (cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99) Diff: --- gcc/config/i386/i386-expand.cc | 12 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 2 files changed, 36 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index dc85103f3a81..aba810f3fa2d 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -13096,6 +13096,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = expand_normal (arg1); op2 = expand_normal (arg2); + if (GET_MODE (op1) != Pmode) + op1 = convert_to_mode (Pmode, op1, 1); + if (!address_operand (op2, VOIDmode)) { op2 = convert_memory_address (Pmode, op2); @@ -13131,6 +13134,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_label (ok_label); emit_insn (gen_rtx_SET (target, pat)); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + for (i = 0; i < 8; i++) { op = gen_rtx_MEM (V2DImode, @@ -13155,6 +13161,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op2) != Pmode) + op2 = convert_to_mode (Pmode, op2, 1); + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); @@ -13192,6 +13201,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op3) != Pmode) + op3 = convert_to_mode (Pmode, op3, 1); + /* Force to use xmm0, xmm1 for keylow, keyhi*/ op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c new file mode 100644 index ..4839b139b79a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c @@ -0,0 +1,24 @@ +/* PR target/117418 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ + +typedef __attribute__((__vector_size__(16))) long long V; +V a; + +void +foo() +{ +__builtin_ia32_aesdec128kl_u8 (&a, a, &a); +__builtin_ia32_aesdec256kl_u8 (&a, a, &a); +__builtin_ia32_aesenc128kl_u8 (&a, a, &a); +__builtin_ia32_aesenc256kl_u8 (&a, a, &a); +__builtin_ia32_encodekey128_u32 (0, a, &a); +__builtin_ia32_encodekey256_u32 (0, a, a, &a); +}
[gcc r12-10813] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]
https://gcc.gnu.org/g:e41fdca8a290c4d72b1972af8cdfd1dd60af31df commit r12-10813-ge41fdca8a290c4d72b1972af8cdfd1dd60af31df Author: Hu, Lin1 Date: Wed Nov 6 15:42:13 2024 +0800 i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418] -maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to 64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. gcc/ChangeLog: PR target/117418 * config/i386/i386-expand.cc (ix86_expand_builtin): Convert pointer's mode according to Pmode. gcc/testsuite/ChangeLog: PR target/117418 * gcc.target/i386/pr117418-1.c: New test. (cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99) Diff: --- gcc/config/i386/i386-expand.cc | 12 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 2 files changed, 36 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 909c11e4195b..5c8d9c556af2 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12747,6 +12747,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = expand_normal (arg1); op2 = expand_normal (arg2); + if (GET_MODE (op1) != Pmode) + op1 = convert_to_mode (Pmode, op1, 1); + if (!address_operand (op2, VOIDmode)) { op2 = convert_memory_address (Pmode, op2); @@ -12782,6 +12785,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_label (ok_label); emit_insn (gen_rtx_SET (target, pat)); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + for (i = 0; i < 8; i++) { op = gen_rtx_MEM (V2DImode, @@ -12806,6 +12812,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op2) != Pmode) + op2 = convert_to_mode (Pmode, op2, 1); + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); @@ -12843,6 +12852,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op3) != Pmode) + op3 = convert_to_mode (Pmode, op3, 1); + /* Force to use xmm0, xmm1 for keylow, keyhi*/ op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c new file mode 100644 index ..4839b139b79a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c @@ -0,0 +1,24 @@ +/* PR target/117418 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ + +typedef __attribute__((__vector_size__(16))) long long V; +V a; + +void +foo() +{ +__builtin_ia32_aesdec128kl_u8 (&a, a, &a); +__builtin_ia32_aesdec256kl_u8 (&a, a, &a); +__builtin_ia32_aesenc128kl_u8 (&a, a, &a); +__builtin_ia32_aesenc256kl_u8 (&a, a, &a); +__builtin_ia32_encodekey128_u32 (0, a, &a); +__builtin_ia32_encodekey256_u32 (0, a, a, &a); +}
[gcc r15-3704] i386: Add ssemov2, sseicvt2 for some load instructions that use memory on operand2
https://gcc.gnu.org/g:1cf1bf7899985df31e1ebccb5d6f1ca762991dcf commit r15-3704-g1cf1bf7899985df31e1ebccb5d6f1ca762991dcf Author: Hu, Lin1 Date: Wed Sep 11 10:10:40 2024 +0800 i386: Add ssemov2, sseicvt2 for some load instructions that use memory on operand2 The memory attr of some instructions should be 'load', but these are 'none', currently. gcc/ChangeLog: * config/i386/i386.md: Add ssemov2, sseicvt2. * config/i386/sse.md (sse2_cvtsi2sd): Apply sseicvt2. (sse2_cvtsi2sdq): Ditto. (vec_set_0): Apply ssemov2 for 4, 6. Diff: --- gcc/config/i386/i386.md | 11 +++ gcc/config/i386/sse.md | 6 -- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c04415149490..9c2a0aa61126 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -539,10 +539,10 @@ str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, fxch,fistp,fisttp,frndint, - sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, + sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1, ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, - ssecvt,ssecvt1,sseicvt,sseins, + ssecvt,ssecvt1,sseicvt,sseicvt2,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg, lwp,mskmov,msklog, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" @@ -560,10 +560,10 @@ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, fxch,fistp,fisttp,frndint") (const_string "i387") -(eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, +(eq_attr "type" "sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1, ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, - ssecvt,ssecvt1,sseicvt,sseins, + ssecvt,ssecvt1,sseicvt,sseicvt2,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") @@ -858,6 +858,9 @@ mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog") (match_operand 2 "memory_operand")) (const_string "load") +(and (eq_attr "type" "ssemov2,sseicvt2") + (match_operand 2 "memory_operand")) + (const_string "load") (and (eq_attr "type" "icmov,ssemuladd,sse4arg") (match_operand 3 "memory_operand")) (const_string "load") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1ae61182d0cc..ff4f33b7b637 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8876,7 +8876,7 @@ cvtsi2sd{l}\t{%2, %0|%0, %2} vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") - (set_attr "type" "sseicvt") + (set_attr "type" "sseicvt2") (set_attr "athlon_decode" "double,direct,*") (set_attr "amdfam10_decode" "vector,double,*") (set_attr "bdver1_decode" "double,direct,*") @@ -8898,7 +8898,7 @@ cvtsi2sd{q}\t{%2, %0|%0, %2} vcvtsi2sd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") - (set_attr "type" "sseicvt") + (set_attr "type" "sseicvt2") (set_attr "athlon_decode" "double,direct,*") (set_attr "amdfam10_decode" "vector,double,*") (set_attr "bdver1_decode" "double,direct,*") @@ -11808,6 +11808,8 @@ (const_string "imov") (eq_attr "alternative" "14") (const_string "fmov") + (eq_attr "alternative" "4,6") + (const_string "ssemov2") ] (const_string "ssemov"))) (set (attr "addr")
[gcc r14-10895] i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions.
https://gcc.gnu.org/g:05fd99e3d5e9f00e4e23596ed15a3cec2aaba128 commit r14-10895-g05fd99e3d5e9f00e4e23596ed15a3cec2aaba128 Author: Hu, Lin1 Date: Tue Nov 5 15:49:57 2024 +0800 i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions. gcc/ChangeLog: PR target/117304 * config/i386/i386-builtin.def: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 512-bits instructions. gcc/testsuite/ChangeLog: PR target/117304 * gcc.target/i386/pr117304-1.c: New test. (cherry picked from commit 8ac694ae67e24a798dce368587bed4c40b90fbc0) Diff: --- gcc/config/i386/i386-builtin.def | 10 +- gcc/testsuite/gcc.target/i386/pr117304-1.c | 28 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index fdd9dba6e542..ee34e0a14979 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3065,11 +3065,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c new file mode 100644 index ..da26f4bd1b78 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c @@ -0,0 +1,28 @@ +/* PR target/117304 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mno-evex512 -mavx512vl" } */ + +typedef __attribute__((__vector_size__(32))) int __v8si; +typedef __attribute__((__vector_size__(32))) unsigned int __v8su; +typedef __attribute__((__vector_size__(64))) double __v8df; +typedef __attribute__((__vector_size__(64))) int __v16si; +typedef __attribute__((__vector_size__(64))) unsigned int __v16su; +typedef __attribute__((__vector_size__(64))) float __v16sf; +typedef float __m512 __attr
[gcc r14-10896] i386: Modify regexp of pr117304-1.c
https://gcc.gnu.org/g:6a0e143a6449bcc250af13642263f671f756500b commit r14-10896-g6a0e143a6449bcc250af13642263f671f756500b Author: Hu, Lin1 Date: Thu Nov 7 10:13:15 2024 +0800 i386: Modify regexp of pr117304-1.c Since the test doesn't care if the hint is correct, modify the regexp of the hint part to avoid future changes to the hint that would cause the test to fail. gcc/testsuite/ChangeLog: * gcc.target/i386/pr117304-1.c: Modify regexp. (cherry picked from commit 4473cf8409f4db19ad91bd784e32dc54eccf02a3) Diff: --- gcc/testsuite/gcc.target/i386/pr117304-1.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c index da26f4bd1b78..4f00ff7c92a1 100644 --- a/gcc/testsuite/gcc.target/i386/pr117304-1.c +++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c @@ -20,9 +20,9 @@ volatile __v16su ui; void foo() { - hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_cvttpd2dq128_mask'?" } */ - hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_cvttpd2udq128_mask'?" } */ - ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_cvttps2dq128_mask'?" } */ - ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_cvttps2udq128_mask'?" } */ - __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_cvtudq2ps128_mask'?" } */ + hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ }
[gcc r15-5006] i386: Modify regexp of pr117304-1.c
https://gcc.gnu.org/g:4473cf8409f4db19ad91bd784e32dc54eccf02a3 commit r15-5006-g4473cf8409f4db19ad91bd784e32dc54eccf02a3 Author: Hu, Lin1 Date: Thu Nov 7 10:13:15 2024 +0800 i386: Modify regexp of pr117304-1.c Since the test doesn't care if the hint is correct, modify the regexp of the hint part to avoid future changes to the hint that would cause the test to fail. gcc/testsuite/ChangeLog: * gcc.target/i386/pr117304-1.c: Modify regexp. Diff: --- gcc/testsuite/gcc.target/i386/pr117304-1.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c index fc1c5bfd3e35..ec75f271447a 100644 --- a/gcc/testsuite/gcc.target/i386/pr117304-1.c +++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c @@ -20,9 +20,9 @@ volatile __v16su ui; void foo() { - hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_cvttpd2dq128_mask'?" } */ - hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_cvttpd2udq128_mask'?" } */ - ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_cvttps2dq128_mask'?" } */ - ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_cvttps2udq128_mask'?" } */ - __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_cvtudq2ps128_mask'?" } */ + hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ + __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ }
[gcc r14-10937] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]
https://gcc.gnu.org/g:8b4bb54e6c45411845ec559c49f594a6239c3969 commit r14-10937-g8b4bb54e6c45411845ec559c49f594a6239c3969 Author: Hu, Lin1 Date: Wed Nov 6 15:42:13 2024 +0800 i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418] -maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to 64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. gcc/ChangeLog: PR target/117418 * config/i386/i386-expand.cc (ix86_expand_builtin): Convert pointer's mode according to Pmode. gcc/testsuite/ChangeLog: PR target/117418 * gcc.target/i386/pr117418-1.c: New test. (cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99) Diff: --- gcc/config/i386/i386-expand.cc | 12 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 2 files changed, 36 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 7019116fcac1..8e9dde145cfb 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -13477,6 +13477,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = expand_normal (arg1); op2 = expand_normal (arg2); + if (GET_MODE (op1) != Pmode) + op1 = convert_to_mode (Pmode, op1, 1); + if (!address_operand (op2, VOIDmode)) { op2 = convert_memory_address (Pmode, op2); @@ -13512,6 +13515,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_label (ok_label); emit_insn (gen_rtx_SET (target, pat)); + if (GET_MODE (op0) != Pmode) + op0 = convert_to_mode (Pmode, op0, 1); + for (i = 0; i < 8; i++) { op = gen_rtx_MEM (V2DImode, @@ -13536,6 +13542,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op2) != Pmode) + op2 = convert_to_mode (Pmode, op2, 1); + op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); @@ -13573,6 +13582,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, if (!REG_P (op0)) op0 = copy_to_mode_reg (SImode, op0); + if (GET_MODE (op3) != Pmode) + op3 = convert_to_mode (Pmode, op3, 1); + /* Force to use xmm0, xmm1 for keylow, keyhi*/ op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); emit_move_insn (op, op1); diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c new file mode 100644 index ..4839b139b79a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c @@ -0,0 +1,24 @@ +/* PR target/117418 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ +/* { dg-require-effective-target maybe_x32 } */ +/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ +/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ +/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ + +typedef __attribute__((__vector_size__(16))) long long V; +V a; + +void +foo() +{ +__builtin_ia32_aesdec128kl_u8 (&a, a, &a); +__builtin_ia32_aesdec256kl_u8 (&a, a, &a); +__builtin_ia32_aesenc128kl_u8 (&a, a, &a); +__builtin_ia32_aesenc256kl_u8 (&a, a, &a); +__builtin_ia32_encodekey128_u32 (0, a, &a); +__builtin_ia32_encodekey256_u32 (0, a, a, &a); +}
[gcc r15-8876] i386: Fix AVX10.2 sat cvt intrinsic.
https://gcc.gnu.org/g:90ab42f92b876b74056db297557e8c3d51cdd773 commit r15-8876-g90ab42f92b876b74056db297557e8c3d51cdd773 Author: Hu, Lin1 Date: Tue Mar 25 09:24:59 2025 +0800 i386: Fix AVX10.2 sat cvt intrinsic. The patch aims to modify the missed fixed for vcvttph2iubs's testcase. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Modify testcase. Diff: --- .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c | 28 +- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c index d057c83831a0..1db5a891c216 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c @@ -9,6 +9,7 @@ #endif #include "avx10-helper.h" #include +#include #define SIZE (AVX512F_LEN / 16) #include "avx512f-mask-type.h" @@ -37,7 +38,7 @@ TEST (void) UNION_TYPE (AVX512F_LEN, h) s; UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; MASK_TYPE mask = MASK_VALUE; - short res_ref[SIZE] = { 0 }; + short res_ref[SIZE] = { 0 }, res_ref2[SIZE] = { 0 }; int i, sign = 1; for (i = 0; i < SIZE; i++) @@ -54,11 +55,7 @@ TEST (void) res3.x = INTRINSIC (_maskz_ipcvtts_ph_epu8) (mask, s.x); CALC (s.a, res_ref); - -#if AVX512F_LEN == 512 - res1.x = INTRINSIC (_ipcvtts_roundph_epu8) (s.x, 8); - res2.x = INTRINSIC (_mask_ipcvtts_roundph_epu8) (res2.x, mask, s.x, 8); - res3.x = INTRINSIC (_maskz_ipcvtts_roundph_epu8) (mask, s.x, 8); + memcpy(res_ref2, res_ref, sizeof(res_ref)); if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) abort (); @@ -70,5 +67,24 @@ TEST (void) MASK_ZERO (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) abort (); + +#if AVX512F_LEN == 512 + for (i = 0; i < SIZE; i++) +res2.a[i] = DEFAULT_VALUE; + + res1.x = INTRINSIC (_ipcvtts_roundph_epu8) (s.x, 8); + res2.x = INTRINSIC (_mask_ipcvtts_roundph_epu8) (res2.x, mask, s.x, 8); + res3.x = INTRINSIC (_maskz_ipcvtts_roundph_epu8) (mask, s.x, 8); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref2)) +abort (); + + MASK_MERGE (i_w) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref2)) +abort (); + + MASK_ZERO (i_w) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref2)) +abort (); #endif }
[gcc r15-8457] i386: Update Suffix for AVX10.2 SAT CVT Intrinsics
https://gcc.gnu.org/g:82bbc9da2c7a24a38916158eaff767cc82a7b6bf commit r15-8457-g82bbc9da2c7a24a38916158eaff767cc82a7b6bf Author: Hu, Lin1 Date: Tue Mar 18 10:03:22 2025 +0800 i386: Update Suffix for AVX10.2 SAT CVT Intrinsics The intrinsic names for *[i|u]bs instructions in AVX10.2 are missing the required _ep[i|u]8 suffix. This patch aims to fix the issue. gcc/ChangeLog: * config/i386/avx10_2-512satcvtintrin.h: Change *i[u]bs's type suffix of intrin name. * config/i386/avx10_2satcvtintrin.h: Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-satcvt-1.c: Modify intrin name. * gcc.target/i386/avx10_2-512-vcvtbf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtbf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttbf162ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttbf162iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-satcvt-1.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. Diff: --- gcc/config/i386/avx10_2-512satcvtintrin.h | 152 ++--- gcc/config/i386/avx10_2satcvtintrin.h | 236 ++--- .../gcc.target/i386/avx10_2-512-satcvt-1.c | 72 +++ .../gcc.target/i386/avx10_2-512-vcvtbf162ibs-2.c | 6 +- .../gcc.target/i386/avx10_2-512-vcvtbf162iubs-2.c | 6 +- .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c | 12 +- .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c| 12 +- .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c | 12 +- .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c| 12 +- .../gcc.target/i386/avx10_2-512-vcvttbf162ibs-2.c | 6 +- .../gcc.target/i386/avx10_2-512-vcvttbf162iubs-2.c | 6 +- .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c| 12 +- .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c | 12 +- .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c| 12 +- .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c | 12 +- gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c | 144 ++--- gcc/testsuite/gcc.target/i386/sse-14.c | 96 - gcc/testsuite/gcc.target/i386/sse-22.c | 96 - 18 files changed, 458 insertions(+), 458 deletions(-) diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h index 6e864a9a6f81..a08f98c92a0f 100644 --- a/gcc/config/i386/avx10_2-512satcvtintrin.h +++ b/gcc/config/i386/avx10_2-512satcvtintrin.h @@ -36,7 +36,7 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtbf16_epi16 (__m512bh __A) +_mm512_ipcvtbf16_epi8 (__m512bh __A) { return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, @@ -47,7 +47,7 @@ _mm512_ipcvtbf16_epi16 (__m512bh __A) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtbf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A) +_mm512_mask_ipcvtbf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) { return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, (__v32hi) __W, @@ -56,7 +56,7 @@ _mm512_mask_ipcvtbf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtbf16_epi16 (__mmask32 __U, __m512bh __A) +_mm512_maskz_ipcvtbf16_epi8 (__mmask32 __U, __m512bh __A) { return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, @@ -67,7 +67,7 @@ _mm512_maskz_ipcvtbf16_epi16 (__mmask32 __U, __m512bh __A) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtbf16_epu16 (__m512bh __A) +_mm512_ipcvtbf16_epu8 (__m512bh __A) { return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, @@ -78,7 +78,7 @@ _mm512_ipcvtbf16_epu16 (__m512bh __A) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtbf16_epu16 (__m512i __W, __mmask32 __U, __m512bh __A) +_mm512_mask_ipcvtbf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) { return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, (__v32hi) __W, @@ -87,7 +87,7 @@ _mm51
[gcc r15-8463] i386: Fix AVX10.2 SAT CVT testcases.
https://gcc.gnu.org/g:8d236c53c679ca920092ce9200785fcccd97d971 commit r15-8463-g8d236c53c679ca920092ce9200785fcccd97d971 Author: Hu, Lin1 Date: Thu Mar 20 11:55:49 2025 +0800 i386: Fix AVX10.2 SAT CVT testcases. Init res_ref2 for rounding control intrinsics. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Fix testcase. * gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto. Diff: --- .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c| 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c| 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c| 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c | 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c| 17 +++-- .../gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c| 17 +++-- 15 files changed, 165 insertions(+), 90 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c index 0c860b02046f..523b3f0a4cb6 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c @@ -9,6 +9,7 @@ #endif #include "avx10-helper.h" #include +#include #define SIZE (AVX512F_LEN / 16) #include "avx512f-mask-type.h" @@ -37,7 +38,7 @@ TEST (void) UNION_TYPE (AVX512F_LEN, h) s; UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; MASK_TYPE mask = MASK_VALUE; - short res_ref[SIZE] = { 0 }; + short res_ref[SIZE] = { 0 }, res_ref2[SIZE] = { 0 }; int i, sign = 1; for (i = 0; i < SIZE; i++) @@ -54,6 +55,7 @@ TEST (void) res3.x = INTRINSIC (_maskz_ipcvts_ph_epi8) (mask, s.x); CALC (s.a, res_ref); + memcpy(res_ref2, res_ref, sizeof(res_ref)); if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) abort (); @@ -67,19 +69,22 @@ TEST (void) abort (); #if AVX512F_LEN != 128 + for (i = 0; i < SIZE; i++) +res2.a[i] = DEFAULT_VALUE; + res1.x = INTRINSIC (_ipcvts_roundph_epi8) (s.x, 8); res2.x = INTRINSIC (_mask_ipcvts_roundph_epi8) (res2.x, mask, s.x, 8); res3.x = INTRINSIC (_maskz_ipcvts_roundph_epi8) (mask, s.x, 8); - if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref2)) abort (); - MASK_MERGE (i_w) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + MASK_MERGE (i_w) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref2)) abort (); - MASK_ZERO (i_w) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + MASK_ZERO (i_w) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref2)) abort (); #endif } diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c index 75e4e1141be8..a8f6e57d46ab 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c @@ -9,6 +9,7 @@ #endif #include "avx10-helper.h" #include +#include #define SIZE (AVX512F_LEN / 16) #include "avx512f-mask-type.h" @@ -37,7 +38,7 @@ TEST (void) UNION_TYPE (AVX512F_LEN, h) s; UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3; MA
[gcc r15-9117] i386: Add attr_isa for vaes patterns to sync with attr gpr16. [pr119473]
https://gcc.gnu.org/g:e5cfa7f797b79613e5483786484567b9ca72db06 commit r15-9117-ge5cfa7f797b79613e5483786484567b9ca72db06 Author: Hu, Lin1 Date: Wed Mar 26 16:15:52 2025 +0800 i386: Add attr_isa for vaes patterns to sync with attr gpr16. [pr119473] For vaes patterns with jm constraint and gpr16 attr, it requires "isa" attr to distinct avx/avx512 alternatives in ix86_memory_address_reg_class. Also adds missing type and mode attributes for those vaes patterns. gcc/ChangeLog: PR target/119473 * config/i386/sse.md (vaesdec_): Set attr "isa" as "avx,vaes_avx512vl", "type" as "sselog1", "mode" as "TI". (vaesdeclast_): Ditto. (vaesenc_): Ditto. (vaesenclast_): Ditto. gcc/testsuite/ChangeLog: PR target/119473 * gcc.target/i386/pr119473.c: New test. Co-authored-by: Hongyu Wang Diff: --- gcc/config/i386/sse.md | 20 gcc/testsuite/gcc.target/i386/pr119473.c | 26 ++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 92dc93cb6532..b280676eee6b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -30849,7 +30849,10 @@ else return "vaesdec\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vaesdeclast_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30864,7 +30867,10 @@ else return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vaesenc_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30879,7 +30885,10 @@ else return "vaesenc\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vaesenclast_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30894,7 +30903,10 @@ else return "vaesenclast\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vpclmulqdq_" [(set (match_operand:VI8_FVL 0 "register_operand" "=v") diff --git a/gcc/testsuite/gcc.target/i386/pr119473.c b/gcc/testsuite/gcc.target/i386/pr119473.c new file mode 100644 index ..574c9217ac9e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr119473.c @@ -0,0 +1,26 @@ +/* PR target/119473 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mapxf -m64 -mvaes" } */ + +typedef char __v32qi __attribute__ ((__vector_size__(32))); +typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32))); + +typedef union +{ + __v32qi qi[8]; +} tmp_u; + + +void foo () +{ + register tmp_u *tdst __asm__("%rdx"); + register tmp_u *src1 __asm__("%rcx"); + register tmp_u *src2 __asm__("%r26"); + + tdst->qi[0] = __builtin_ia32_vaesdec_v32qi(src1->qi[0], src2->qi[0]); + tdst->qi[0] = __builtin_ia32_vaesdeclast_v32qi(src1->qi[0], src2->qi[0]); + tdst->qi[0] = __builtin_ia32_vaesenc_v32qi(src1->qi[0], src2->qi[0]); + tdst->qi[0] = __builtin_ia32_vaesenclast_v32qi(src1->qi[0], src2->qi[0]); +} + +/* { dg-final { scan-assembler-not "\\\(%r26\\\), " } } */
[gcc r15-8458] i386: Add AVX10.2 SAT CVT Intrinsics without Rounding Control
https://gcc.gnu.org/g:e35327242317282a4ff5e2d933719828a0285e81 commit r15-8458-ge35327242317282a4ff5e2d933719828a0285e81 Author: Hu, Lin1 Date: Thu Mar 13 16:36:15 2025 +0800 i386: Add AVX10.2 SAT CVT Intrinsics without Rounding Control gcc/ChangeLog: * config/i386/avx10_2-512satcvtintrin.h: Add new intrinsics. * config/i386/avx10_2satcvtintrin.h: Ditto. * config/i386/i386-builtin-types.def: Add DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI), (V16SI, V16SF, V16SI, UHI), (V8DI, V8SF, V8DI, UQI), (V8DI, V8DF, V8DI, UQI), (V8SI, V8DF, V8SI, UQI). * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-expand.cc: Handle V16SI_FTYPE_V16SF_V16SI_UHI, V32HI_FTYPE_V32HF_V32HI_USI, V8DI_FTYPE_V8SF_V8DI_UQI, V8DI_FTYPE_V8DF_V8DI_UQI, V8SI_FTYPE_V8DF_V8SI_UQI. Diff: --- gcc/config/i386/avx10_2-512satcvtintrin.h | 496 ++ gcc/config/i386/avx10_2satcvtintrin.h | 560 ++ gcc/config/i386/i386-builtin-types.def| 5 + gcc/config/i386/i386-builtin.def | 32 ++ gcc/config/i386/i386-expand.cc| 5 + 5 files changed, 1098 insertions(+) diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h index a08f98c92a0f..1cef1dae0b12 100644 --- a/gcc/config/i386/avx10_2-512satcvtintrin.h +++ b/gcc/config/i386/avx10_2-512satcvtintrin.h @@ -157,6 +157,502 @@ _mm512_maskz_ipcvttbf16_epu8 (__mmask32 __U, __m512bh __A) (__mmask32) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtph_epi8 (__m512h __A) +{ + return +(__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, +(__v32hi) __W, +(__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtph_epi8 (__mmask32 __U, __m512h __A) +{ + return +(__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtph_epu8 (__m512h __A) +{ + return +(__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, +(__v32hi) +_mm512_undefined_si512 (), +(__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtph_epu8 (__mmask32 __U, __m512h __A) +{ + return +(__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, +(__v32hi) +_mm512_setzero_si512 (), +(__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtps_epi8 (__m512 __A) +{ + return +(__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, +(__v16si) __W, +(__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtps_
[gcc r15-8915] i386: Add "s_" as Saturation for AVX10.2 Converting Intrinsics.
https://gcc.gnu.org/g:330df57938fe609a49c5cb047be443475cb9a3c3 commit r15-8915-g330df57938fe609a49c5cb047be443475cb9a3c3 Author: Hu, Lin1 Date: Fri Mar 21 10:43:10 2025 +0800 i386: Add "s_" as Saturation for AVX10.2 Converting Intrinsics. This patch aims to add "s_" after 'cvt' represent saturation. gcc/ChangeLog: * config/i386/avx10_2-512convertintrin.h (_mm512_mask_cvtx2ps_ph): Formatting fixes (_mm512_mask_cvtx_round2ps_ph): Ditto (_mm512_maskz_cvtx_round2ps_ph): Ditto (_mm512_cvtbiassph_bf8): Rename to _mm512_cvts_biasph_bf8. (_mm512_mask_cvtbiassph_bf8): Rename to _mm512_mask_cvts_biasph_bf8. (_mm512_maskz_cvtbiassph_bf8): Rename to _mm512_maskz_cvts_biasph_bf8. (_mm512_cvtbiassph_hf8): Rename to _mm512_cvts_biasph_hf8. (_mm512_mask_cvtbiassph_hf8): Rename to _mm512_mask_cvts_biasph_hf8. (_mm512_maskz_cvtbiassph_hf8): Rename to _mm512_maskz_cvts_biasph_hf8. (_mm512_cvts2ph_bf8): Rename to _mm512_cvts_2ph_bf8. (_mm512_mask_cvts2ph_bf8): Rename to _mm512_mask_cvts_2ph_bf8. (_mm512_maskz_cvts2ph_bf8): Rename to _mm512_maskz_cvts_2ph_bf8. (_mm512_cvts2ph_hf8): Rename to _mm512_cvts_2ph_hf8. (_mm512_mask_cvts2ph_hf8): Rename to _mm512_mask_cvts_2ph_hf8. (_mm512_maskz_cvts2ph_hf8): Rename to _mm512_maskz_cvts_2ph_hf8. (_mm512_cvtsph_bf8): Rename to _mm512_cvts_ph_bf8. (_mm512_mask_cvtsph_bf8): Rename to _mm512_mask_cvts_ph_bf8. (_mm512_maskz_cvtsph_bf8): Rename to _mm512_maskz_cvts_ph_bf8. (_mm512_cvtsph_hf8): Rename to _mm512_cvts_ph_hf8. (_mm512_mask_cvtsph_hf8): Rename to _mm512_mask_cvts_ph_hf8. (_mm512_maskz_cvtsph_hf8): Rename to _mm512_maskz_cvts_ph_hf8. * config/i386/avx10_2convertintrin.h (_mm_cvtbiassph_bf8): Rename to _mm_cvts_biasph_bf8. (_mm_mask_cvtbiassph_bf8): Rename to _mm_mask_cvts_biasph_bf8. (_mm_maskz_cvtbiassph_bf8): Rename to _mm_maskz_cvts_biasph_bf8. (_mm256_cvtbiassph_bf8): Rename to _mm256_cvts_biasph_bf8. (_mm256_mask_cvtbiassph_bf8): Rename to _mm256_mask_cvts_biasph_bf8. (_mm256_maskz_cvtbiassph_bf8): Rename to _mm256_maskz_cvts_biasph_bf8. (_mm_cvtbiassph_hf8): Rename to _mm_cvts_biasph_hf8. (_mm_mask_cvtbiassph_hf8): Rename to _mm_mask_cvts_biasph_hf8. (_mm_maskz_cvtbiassph_hf8): Rename to _mm_maskz_cvts_biasph_hf8. (_mm256_cvtbiassph_hf8): Rename to _mm256_cvts_biasph_hf8. (_mm256_mask_cvtbiassph_hf8): Rename to _mm256_mask_cvts_biasph_hf8. (_mm256_maskz_cvtbiassph_hf8): Rename to _mm256_maskz_cvts_biasph_hf8. (_mm_cvts2ph_bf8): Rename to _mm_cvts_2ph_bf8. (_mm_mask_cvts2ph_bf8): Rename to _mm_mask_cvts_2ph_bf8. (_mm_maskz_cvts2ph_bf8): Rename to _mm_maskz_cvts_2ph_bf8. (_mm256_cvts2ph_bf8): Rename to _mm256_cvts_2ph_bf8. (_mm256_mask_cvts2ph_bf8): Rename to _mm256_mask_cvts_2ph_bf8. (_mm256_maskz_cvts2ph_bf8): Rename to _mm256_maskz_cvts_2ph_bf8. (_mm_cvts2ph_hf8): Rename to _mm_cvts_2ph_hf8. (_mm_mask_cvts2ph_hf8): Rename to _mm_mask_cvts_2ph_hf8. (_mm_maskz_cvts2ph_hf8): Rename to _mm_maskz_cvts_2ph_hf8. (_mm256_cvts2ph_hf8): Rename to _mm256_cvts_2ph_hf8. (_mm256_mask_cvts2ph_hf8): Rename to _mm256_mask_cvts_2ph_hf8. (_mm256_maskz_cvts2ph_hf8): Rename to _mm256_maskz_cvts_2ph_hf8. (_mm_cvtsph_bf8): Rename to _mm_cvts_ph_bf8. (_mm_mask_cvtsph_bf8): Rename to _mm_mask_cvts_ph_bf8. (_mm_maskz_cvtsph_bf8): Rename to _mm_maskz_cvts_ph_bf8. (_mm256_cvtsph_bf8): Rename to _mm256_cvts_ph_bf8. (_mm256_mask_cvtsph_bf8): Rename to _mm256_mask_cvts_ph_bf8. (_mm256_maskz_cvtsph_bf8): Rename to _mm256_maskz_cvts_ph_bf8. (_mm_cvtsph_hf8): Rename to _mm_cvts_ph_hf8. (_mm_mask_cvtsph_hf8): Rename to _mm_mask_cvts_ph_hf8. (_mm_maskz_cvtsph_hf8): Rename to _mm_maskz_cvts_ph_hf8. (_mm256_cvtsph_hf8): Rename to _mm256_cvts_ph_hf8. (_mm256_mask_cvtsph_hf8): Rename to _mm256_mask_cvts_ph_hf8. (_mm256_maskz_cvtsph_hf8): Rename to _mm256_maskz_cvts_ph_hf8. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-convert-1.c: Modify function name to follow the latest version. * gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c: Ditto. * gcc.target/i386/avx
[gcc r15-8964] i386: Set attr "addr" as "gpr16" for constraint "jm". [PR 119425]
https://gcc.gnu.org/g:271745bafafbf3316d01ceb6430d67b894129a4c commit r15-8964-g271745bafafbf3316d01ceb6430d67b894129a4c Author: Hu, Lin1 Date: Mon Mar 24 15:36:13 2025 +0800 i386: Set attr "addr" as "gpr16" for constraint "jm". [PR 119425] "jm" should with "gpr16", otherwise maybe raise ICE in reload pass. gcc/ChangeLog: PR target/119425 * config/i386/sse.md: (vec_set_0): Set the alternative with constraint "jm"'s attribute "addr" to "gpr16". (avx512dq_shuf_64x2_1): Ditto. (avx512vl_shuf_32x4_1): Ditto. (avx2_pblendd): Ditto. (aesenc): Ditto. (aesenclast): Ditto. (aesdec): Ditto. (aesdeclast): Ditto. (vaesdec_): Ditto. (vaesdeclast_): Ditto. (vaesenc_):: Ditto. (vaesenclast_):: Ditto. (aesu8): Ditto. (*aesu8): Ditto. gcc/testsuite/ChangeLog: PR target/119425 * gcc.target/i386/pr119425.c: New test. Co-authered-by: Hongyu Wang Diff: --- gcc/config/i386/sse.md | 31 -- gcc/testsuite/gcc.target/i386/pr119425.c | 37 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ee2a482fb509..ed5ac1abe80d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11904,7 +11904,7 @@ ] (const_string "ssemov"))) (set (attr "addr") - (if_then_else (eq_attr "alternative" "8,9") + (if_then_else (eq_attr "alternative" "9,10") (const_string "gpr16") (const_string "*"))) (set (attr "prefix_extra") @@ -20173,6 +20173,7 @@ return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "addr" "gpr16,*") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) @@ -20334,6 +20335,7 @@ return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") + (set_attr "addr" "gpr16,*") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -24076,6 +24078,7 @@ "TARGET_AVX2" "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemov") + (set_attr "addr" "gpr16") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") @@ -27085,7 +27088,7 @@ vaesenc\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx,vaes_avx512vl") (set_attr "type" "sselog1") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,gpr16,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,maybe_evex,evex") (set_attr "btver2_decode" "double,double,double") @@ -27103,7 +27106,7 @@ vaesenclast\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx,vaes_avx512vl") (set_attr "type" "sselog1") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,gpr16,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,maybe_evex,evex") (set_attr "btver2_decode" "double,double,double") @@ -27121,7 +27124,7 @@ vaesdec\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx,vaes_avx512vl") (set_attr "type" "sselog1") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,gpr16,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,maybe_evex,evex") (set_attr "btver2_decode" "double,double,double") @@ -27138,7 +27141,7 @@ * return TARGET_AES ? \"vaesdeclast\t{%2, %1, %0|%0, %1, %2}\" : \"%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}\"; vaesdeclast\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx,vaes_avx512vl") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,gpr16,*") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,maybe_evex,evex") @@ -30841,7 +30844,8 @@ return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}"; else return "vaesdec\t{%2, %1, %0|%0, %1, %2}"; -}) +} +[(set_attr "addr" "gpr16,*")]) (define_insn "vaesdeclast_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30855,7 +30859,8 @@ return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; else return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; -}) +} +[(set_attr "addr" "gpr16,*")]) (define_insn "vaesenc_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30869,7 +30874,8 @@ return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}"; else return "vaesenc\t{%2, %1, %0|%0, %1, %2}"; -}) +} +[(set_attr "addr" "gpr16,*")]) (define_insn "vaesenclast_" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30883,7 +30889,8 @@ return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}"; else return "vaesenclast\t{%2, %1, %0|%0
[gcc r15-8459] i386: Fix AVX10.2 SAT CVT testcases.
https://gcc.gnu.org/g:2e7a92a68aab3aaee8872c1a59e1391e07517b05 commit r15-8459-g2e7a92a68aab3aaee8872c1a59e1391e07517b05 Author: Hu, Lin1 Date: Fri Mar 14 11:16:14 2025 +0800 i386: Fix AVX10.2 SAT CVT testcases. Add missing testcases. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-satcvt-1.c: Add testcase. * gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto * gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto * gcc.target/i386/avx10_2-satcvt-1.c: Ditto * gcc.target/i386/avx10_2-vcvttsd2sis-2.c: Ditto * gcc.target/i386/avx10_2-vcvttsd2usis-2.c: Ditto * gcc.target/i386/avx10_2-vcvttss2sis-2.c: Ditto * gcc.target/i386/avx10_2-vcvttss2usis-2.c: Ditto Diff: --- .../gcc.target/i386/avx10_2-512-satcvt-1.c | 104 -- .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c | 21 +++- .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c | 21 +++- .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c | 21 +++- .../gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c | 21 +++- .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c | 10 +- .../gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c| 21 +++- .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c | 21 +++- .../gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c| 22 ++-- .../gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c | 21 +++- .../gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c | 21 +++- gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c | 120 +++-- .../gcc.target/i386/avx10_2-vcvtps2iubs-2.c| 16 +++ .../gcc.target/i386/avx10_2-vcvttsd2sis-2.c| 24 + .../gcc.target/i386/avx10_2-vcvttsd2usis-2.c | 24 + .../gcc.target/i386/avx10_2-vcvttss2sis-2.c| 24 + .../gcc.target/i386/avx10_2-vcvttss2usis-2.c | 24 + 23 files changed, 565 insertions(+), 97 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c index 7f2f7caf4dbf..bf10ac2769c0 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c @@ -1,27 +1,43 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2ibs\[ \\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtph2iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-
[gcc r16-1125] i386: Fix vmovvdup's mem attribute
https://gcc.gnu.org/g:24cbcc49277a0ac40fc2d82831f6db5e8d6d890d commit r16-1125-g24cbcc49277a0ac40fc2d82831f6db5e8d6d890d Author: Hu, Lin1 Date: Tue May 27 19:09:04 2025 +0800 i386: Fix vmovvdup's mem attribute Some vmovvdup pattern's type attribute is sselog1 and then mem attribute is both. Modify type attribute according to other patterns about vmovvdup. gcc/ChangeLog: * config/i386/sse.md (avx512f_movddup512): Change sselog1 to ssemov. (avx_movddup256): Ditto. (*vec_dupv2di): Change alternative 4's type attribute from sselog1 to ssemov. Diff: --- gcc/config/i386/sse.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index aea5e2cad7e1..c40b0fd49978 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -13418,7 +13418,7 @@ (const_int 6) (const_int 14)])))] "TARGET_AVX512F" "vmovddup\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog1") + [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "V8DF")]) @@ -13449,7 +13449,7 @@ (const_int 2) (const_int 6)])))] "TARGET_AVX && " "vmovddup\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog1") + [(set_attr "type" "ssemov") (set_attr "prefix" "") (set_attr "mode" "V4DF")]) @@ -27839,7 +27839,7 @@ %vmovddup\t{%1, %0|%0, %1} movlhps\t%0, %0" [(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx") - (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov") + (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov") (set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig") (set (attr "mode") (cond [(and (eq_attr "alternative" "2")
[gcc r16-1093] i386: Add more forms peephole2 for adc/sbb
https://gcc.gnu.org/g:31b887bcc898787a228672d417ec0b33a15b2fb2 commit r16-1093-g31b887bcc898787a228672d417ec0b33a15b2fb2 Author: Hu, Lin1 Date: Wed Feb 19 15:51:40 2025 +0800 i386: Add more forms peephole2 for adc/sbb Enable -mapxf will change some patterns about adc/sbb. Hence gcc will raise an extra mov like movq8(%rdi), %rax adcq%rax, 8(%rsi), %rax movq%rax, 8(%rdi) rather than movq8(%rsi), %rax adcq%rax, 8(%rdi) The patch add more kinds of peephole2 to eliminate the extra mov. gcc/ChangeLog: * config/i386/i386.md: Add 4 new peephole2 by swap the original peephole2's operands' order to support new pattern. gcc/testsuite/ChangeLog: * gcc.target/i386/pr79173-13.c: New test. * gcc.target/i386/pr79173-14.c: Ditto. * gcc.target/i386/pr79173-15.c: Ditto. * gcc.target/i386/pr79173-16.c: Ditto. * gcc.target/i386/pr79173-17.c: Ditto. * gcc.target/i386/pr79173-18.c: Ditto. Diff: --- gcc/config/i386/i386.md| 186 + gcc/testsuite/gcc.target/i386/pr79173-13.c | 59 + gcc/testsuite/gcc.target/i386/pr79173-14.c | 59 + gcc/testsuite/gcc.target/i386/pr79173-15.c | 61 ++ gcc/testsuite/gcc.target/i386/pr79173-16.c | 61 ++ gcc/testsuite/gcc.target/i386/pr79173-17.c | 32 + gcc/testsuite/gcc.target/i386/pr79173-18.c | 33 + 7 files changed, 491 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b7a18d583da3..4c9cb81d5f9d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8719,6 +8719,34 @@ (set (match_dup 1) (minus:SWI (match_dup 1) (match_dup 0)))])]) +;; Under APX NDD, 'sub reg, mem, reg' is valid. +;; New format for +;; mov reg0, mem1 +;; sub reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sub mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWI 2 "memory_operand") + (match_dup 0))) + (set (match_dup 0) + (minus:SWI (match_dup 2) (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) (match_dup 0))) + (set (match_dup 2) + (minus:SWI (match_dup 2) (match_dup 0)))])]) + ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into ;; subl $1, %eax; jnc .Lxx; (define_peephole2 @@ -9166,6 +9194,118 @@ (match_dup 1)) (match_dup 0)))])]) +;; Under APX NDD, 'adc reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem1, reg0 +;; to +;; mov reg0, mem2 +;; adc mem1, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC +(zero_extend: + (plus:SWI48 +(plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" +[(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) +(match_dup 0))) +(plus: + (match_operator: 4 "ix86_carry_flag_operator" +[(match_dup 3) (const_int 0)]) + (zero_extend: (match_dup 0) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 +[(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 1) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC +(zero_extend: + (plus:SWI48 +(plus:SWI48 +
[gcc r16-1094] i386: Add more peephole2 for APX NDD
https://gcc.gnu.org/g:102b21f9ce7d7a30cdee7c729a152e95c96107ac commit r16-1094-g102b21f9ce7d7a30cdee7c729a152e95c96107ac Author: Hu, Lin1 Date: Mon Mar 10 16:52:22 2025 +0800 i386: Add more peephole2 for APX NDD The patch aims to optimize movb(%rdi), %al movq%rdi, %rbx xorl%esi, %eax, %edx movb%dl, (%rdi) cmpb%sil, %al jne to xorb%sil, (%rdi) movq%rdi, %rbx jne Reduce 2 mov and 1 cmp instructions. Due to APX NDD allowing the dest register and source register to be different, some original peephole2 are invalid. Add new peephole2 patterns for APX NDD. gcc/ChangeLog: * config/i386/i386.md (define_peephole2): Define some new peephole2 for APX NDD. gcc/testsuite/ChangeLog: * gcc.target/i386/pr49095-2.c: New test. Diff: --- gcc/config/i386/i386.md | 135 ++ gcc/testsuite/gcc.target/i386/pr49095-2.c | 73 2 files changed, 208 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4c9cb81d5f9d..40b43cf092ac 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -28398,6 +28398,41 @@ const0_rtx); }) +;; For APX NDD PLUS/MINUS/LOGIC +;; Like cmpelim optimized pattern. +;; Reduce an extra mov instruction like +;; decl (%rdi), %eax +;; mov %eax, (%rdi) +;; to +;; decl (%rdi) +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 2 "plusminuslogic_operator" + [(match_operand:SWI 0 "memory_operand") + (match_operand:SWI 1 "")]) + (const_int 0))) + (set (match_operand:SWI 3 "register_operand") (match_dup 2))]) + (set (match_dup 0) (match_dup 3))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[3]) + && !reg_overlap_mentioned_p (operands[3], operands[0]) + && ix86_match_ccmode (peep2_next_insn (0), +(GET_CODE (operands[2]) == PLUS + || GET_CODE (operands[2]) == MINUS) +? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (match_dup 5))])] +{ + operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); + operands[5] += gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + copy_rtx (operands[0]), operands[1]); + operands[6] += gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), + const0_rtx); +}) + ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") @@ -28491,6 +28526,54 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movq (%rdi), %rax +;; xorq %rsi, %rax, %rdx +;; movb %rdx, (%rdi) +;; cmpb %rsi, %rax +;; jne +;; to +;; xorb %rsi, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_operand:SWI 4 "register_operand") + (xor:SWI (match_operand:SWI 3 "register_operand") + (match_operand:SWI 2 ""))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 4)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI 5 "register_operand") +(match_operand:SWI 6 "")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && (rtx_equal_p (operands[0], operands[5]) + ? rtx_equal_p (operands[2], operands[6]) + : rtx_equal_p (operands[2], operands[5]) +&& rtx_equal_p (operands[0], operands[6])) + && peep2_reg_dead_p (3, operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (mode != QImode + || immediate_operand (operands[2], QImode) + || any_QIreg_operand (operands[2], QImode))" + [(parallel [(set (match_dup 7) (match_dup 9)) + (set (match_dup 1) (match_dup 8))])] +{ + operands[7] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[8] = gen_rtx_XOR (mode, copy_rtx (operands[1]), +operands[2]); + operands[9] += gen_rtx_COMPARE (GET_MODE (operands[7]), + copy_rtx (operands[8]), + const0_rtx); +}) + (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) @@ -28734,6 +28817,58 @@