[gcc r15-337] i386: Fix some intrinsics without alignment requirements.

2024-05-09 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:5967696c0f6300da4387fea5d102be5bc9f23233

commit r15-337-g5967696c0f6300da4387fea5d102be5bc9f23233
Author: Hu, Lin1 
Date:   Fri Jan 19 15:22:10 2024 +0800

i386: Fix some intrinsics without alignment requirements.

gcc/ChangeLog:

PR target/84508
* config/i386/emmintrin.h
(_mm_load_sd): Remove alignment requirement.
(_mm_store_sd): Ditto.
(_mm_loadh_pd): Ditto.
(_mm_loadl_pd): Ditto.
(_mm_storel_pd): Add alignment requirement.
* config/i386/xmmintrin.h
(_mm_loadh_pi): Remove alignment requirement.
(_mm_loadl_pi): Ditto.
(_mm_load_ss): Ditto.
(_mm_store_ss): Ditto.

gcc/testsuite/ChangeLog:

PR target/84508
* gcc.target/i386/pr84508-1.c: New test.
* gcc.target/i386/pr84508-2.c: Ditto.

Diff:
---
 gcc/config/i386/emmintrin.h   | 11 ++-
 gcc/config/i386/xmmintrin.h   |  9 +
 gcc/testsuite/gcc.target/i386/pr84508-1.c | 11 +++
 gcc/testsuite/gcc.target/i386/pr84508-2.c | 11 +++
 4 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 915a5234c38c..fa301103daf8 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -56,6 +56,7 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), 
__may_alias__));
 /* Unaligned version of the same types.  */
 typedef long long __m128i_u __attribute__ ((__vector_size__ (16), 
__may_alias__, __aligned__ (1)));
 typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, 
__aligned__ (1)));
+typedef double double_u __attribute__ ((__may_alias__, __aligned__ (1)));
 
 /* Create a selector for use with the SHUFPD instruction.  */
 #define _MM_SHUFFLE2(fp1,fp0) \
@@ -145,7 +146,7 @@ _mm_load1_pd (double const *__P)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_load_sd (double const *__P)
 {
-  return _mm_set_sd (*__P);
+  return __extension__ (__m128d) { *(double_u *)__P, 0.0 };
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -180,7 +181,7 @@ _mm_storeu_pd (double *__P, __m128d __A)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_store_sd (double *__P, __m128d __A)
 {
-  *__P = ((__v2df)__A)[0];
+  *(double_u *)__P = ((__v2df)__A)[0] ;
 }
 
 extern __inline double __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -192,7 +193,7 @@ _mm_cvtsd_f64 (__m128d __A)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_storel_pd (double *__P, __m128d __A)
 {
-  _mm_store_sd (__P, __A);
+  *__P = ((__v2df)__A)[0];
 }
 
 /* Stores the upper DPFP value.  */
@@ -973,13 +974,13 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B)
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_loadh_pd (__m128d __A, double const *__B)
 {
-  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+  return __extension__ (__m128d) { ((__v2df)__A)[0], *(double_u*)__B };
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_loadl_pd (__m128d __A, double const *__B)
 {
-  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+  return __extension__ (__m128d) { *(double_u*)__B, ((__v2df)__A)[1] };
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 71b9955b8438..9e20f262839b 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -73,6 +73,7 @@ typedef float __m128 __attribute__ ((__vector_size__ (16), 
__may_alias__));
 
 /* Unaligned version of the same type.  */
 typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, 
__aligned__ (1)));
+typedef float float_u __attribute__ ((__may_alias__, __aligned__ (1)));
 
 /* Internal data types for implementing the intrinsics.  */
 typedef float __v4sf __attribute__ ((__vector_size__ (16)));
@@ -774,7 +775,7 @@ _mm_unpacklo_ps (__m128 __A, __m128 __B)
 /* Sets the upper two SPFP values with 64-bits of data loaded from P;
the lower two values are passed through from A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_loadh_pi (__m128 __A, __m64 const *__P)
+_mm_loadh_pi (__m128 __A, __m64_u const *__P)
 {
   return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
 }
@@ -803,7 +804,7 @@ _mm_movelh_ps (__m128 __A, __m128 __B)
 /* Sets the lower two SPFP values with 64-bits of data loaded from P;
the upper two values are passed through from A.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__)

[gcc r15-3474] testsuite: Fix xorsign.c, vect-double-2.c fails with -march=x86-64-v2

2024-09-05 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:811204f52a111af24ba4b00df9e947a44c4c1161

commit r15-3474-g811204f52a111af24ba4b00df9e947a44c4c1161
Author: Hu, Lin1 
Date:   Thu Sep 5 14:51:42 2024 +0800

testsuite: Fix xorsign.c, vect-double-2.c fails with -march=x86-64-v2

These testcases raise fails with -march=x86-64-v2, so add -mno-sse4 to avoid
these unexpected fails.

gcc/testsuite/ChangeLog:

PR testsuite/116608
* gcc.target/i386/vect-double-2.c: Add extra option -mno-sse4
* gcc.target/i386/xorsign.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.target/i386/vect-double-2.c | 2 +-
 gcc/testsuite/gcc.target/i386/xorsign.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/vect-double-2.c 
b/gcc/testsuite/gcc.target/i386/vect-double-2.c
index eea53bfa6b1..065d2e5af08 100644
--- a/gcc/testsuite/gcc.target/i386/vect-double-2.c
+++ b/gcc/testsuite/gcc.target/i386/vect-double-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns 
-mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns 
-mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats -mno-sse4" } */
 
 extern void abort (void);
 
diff --git a/gcc/testsuite/gcc.target/i386/xorsign.c 
b/gcc/testsuite/gcc.target/i386/xorsign.c
index ebed5edccb6..f280dd20d7b 100644
--- a/gcc/testsuite/gcc.target/i386/xorsign.c
+++ b/gcc/testsuite/gcc.target/i386/xorsign.c
@@ -1,5 +1,5 @@
 /* { dg-do run { target sse2_runtime } } */
-/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize 
-fdump-tree-vect-details -save-temps" } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize 
-fdump-tree-vect-details -save-temps -mno-sse4" } */
 
 extern void abort ();


[gcc r15-1677] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:c320a7efcd35ba6c6be70dc9b2fe562a9673e363

commit r15-1677-gc320a7efcd35ba6c6be70dc9b2fe562a9673e363
Author: Hu, Lin1 
Date:   Thu Feb 1 15:15:01 2024 +0800

vect: generate suitable convert insn for int -> int, float -> float and int 
<-> float.

gcc/ChangeLog:

PR target/107432
* tree-vect-generic.cc
(expand_vector_conversion): Support convert for int -> int,
float -> float and int <-> float.
* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
indirect convert part.
(supportable_indirect_convert_operation): New function.
* tree-vectorizer.h (supportable_indirect_convert_operation):
Define the new function.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-1.c: New test.
* gcc.target/i386/pr107432-2.c: Ditto.
* gcc.target/i386/pr107432-3.c: Ditto.
* gcc.target/i386/pr107432-4.c: Ditto.
* gcc.target/i386/pr107432-5.c: Ditto.
* gcc.target/i386/pr107432-6.c: Ditto.
* gcc.target/i386/pr107432-7.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 
 gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +
 gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +++
 gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +++
 gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 +
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 +
 gcc/testsuite/gcc.target/i386/pr107432-7.c | 150 ++
 gcc/tree-vect-generic.cc   |  29 +++-
 gcc/tree-vect-stmts.cc | 241 ++---
 gcc/tree-vectorizer.h  |   4 +
 10 files changed, 990 insertions(+), 95 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include 
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__m128imm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
+}
+
+__m256imm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
+}
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi);
+}
+
+__m128imm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
+}
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4qi);
+}
+
+__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8di)a, __v8qi);
+}
+
+__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2hi);
+}
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__m128imm256_cvtepi32_epi16_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v8si)a, __v8hi);
+}
+
+__m256imm5

[gcc r15-1678] vect: Support v4hi -> v4qi.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:e5f8a39941f6f0f25dac88bd71fd368fb284a10f

commit r15-1678-ge5f8a39941f6f0f25dac88bd71fd368fb284a10f
Author: Hu, Lin1 
Date:   Wed Feb 28 18:11:55 2024 +0800

vect: Support v4hi -> v4qi.

gcc/ChangeLog:

PR target/107432
* config/i386/mmx.md
(VI2_32_64): New mode iterator.
(mmxhalfmode): New mode atter.
(mmxhalfmodelower): Ditto.
(truncv2hiv2qi2): Extend mode v4hi and change name from
truncv2hiv2qi to trunc2.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-1.c: Modify test.
* gcc.target/i386/pr107432-6.c: Add test.
* gcc.target/i386/pr108938-3.c: This patch supports
truncv4hiv4qi affect bswap optimization, so I added
the -mno-avx option for now, and open a bugzilla.

Diff:
---
 gcc/config/i386/mmx.md | 17 +
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 13 -
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 29 ++---
 gcc/testsuite/gcc.target/i386/pr108938-3.c |  2 +-
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ea53f516cbb..24c0516726c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -67,6 +67,9 @@
 ;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
 
+;; 8-byte and 4-byte HImode vector modes
+(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI])
+
 ;; 4-byte and 2-byte integer vector modes
 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 
@@ -106,6 +109,12 @@
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
+(define_mode_attr mmxhalfmode
+  [(V4HI "V4QI") (V2HI "V2QI")])
+
+(define_mode_attr mmxhalfmodelower
+  [(V4HI "v4qi") (V2HI "v2qi")])
+
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr mmxintvecmode
   [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
@@ -4880,10 +4889,10 @@
   DONE;
 })
 
-(define_insn "truncv2hiv2qi2"
-  [(set (match_operand:V2QI 0 "register_operand" "=v")
-   (truncate:V2QI
- (match_operand:V2HI 1 "register_operand" "v")))]
+(define_insn "trunc2"
+  [(set (match_operand: 0 "register_operand" "=v")
+   (truncate:
+ (match_operand:VI2_32_64 1 "register_operand" "v")))]
   "TARGET_AVX512VL && TARGET_AVX512BW"
   "vpmovwb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
index a4f37447eb4..afdf367afe2 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -7,7 +7,8 @@
 /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */
 
 #include 
 
@@ -113,6 +114,11 @@ __v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi 
a)
   return __builtin_convertvector((__v2hi)a, __v2qi);
 }
 
+__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a)
+{
+  return __builtin_convertvector((__v4hi)a, __v4qi);
+}
+
 __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hi)a, __v8qi);
@@ -218,6 +224,11 @@ __v2qu mm32_cvtepu16_epu8_builtin_convertvector(__v2hu 
a)
   return __builtin_convertvector((__v2hu)a, __v2qu);
 }
 
+__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a)
+{
+  return __builtin_convertvector((__v4hu)a, __v4qu);
+}
+
 __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hu)a, __v8qu);
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c 
b/gcc/testsuite/gcc.target/i386/pr107432-6.c
index 4a68a10b089..dd585b2a351 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-6.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -1,18 +1,15 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq 
-fno-trapping-math" } */
-/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } 
*/
-/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
-/* { dg-final { scan-assembl

[gcc r15-1679] vect: support direct conversion under x86-64-v3.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:4385dc97b0d28e54541eb2418d6e68fc672441d7

commit r15-1679-g4385dc97b0d28e54541eb2418d6e68fc672441d7
Author: Hu, Lin1 
Date:   Wed Mar 6 19:58:48 2024 +0800

vect: support direct conversion under x86-64-v3.

gcc/ChangeLog:

PR target/107432
* config/i386/i386-expand.cc 
(ix86_expand_trunc_with_avx2_noavx512f):
New function for generate a series of suitable insn.
* config/i386/i386-protos.h (ix86_expand_trunc_with_avx2_noavx512f):
Define new function.
* config/i386/sse.md: Extend trunc2 for x86-64-v3.
(ssebytemode) Add V8HI.
(PMOV_DST_MODE_2_AVX2): New mode iterator.
(PMOV_SRC_MODE_3_AVX2): Ditto.
* config/i386/mmx.md
(trunc2): Ditto.
(avx512vl_trunc2): Ditto.
(truncv2si2): Ditto.
(avx512vl_truncv2si2): Ditto.
(mmxbytemode): New mode attr.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-8.c: New test.
* gcc.target/i386/pr107432-9.c: Ditto.
* gcc.target/i386/pr92645-4.c: Modify test.

Diff:
---
 gcc/config/i386/i386-expand.cc |  44 --
 gcc/config/i386/i386-protos.h  |   3 +
 gcc/config/i386/mmx.md |  35 +++-
 gcc/config/i386/sse.md |  88 +++-
 gcc/testsuite/gcc.target/i386/pr107432-8.c |  94 +
 gcc/testsuite/gcc.target/i386/pr107432-9.c | 129 +
 gcc/testsuite/gcc.target/i386/pr92645-4.c  |   2 -
 7 files changed, 363 insertions(+), 32 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5dfa7d49f58..eccad080f7c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1898,10 +1898,6 @@ ix86_split_convert_uns_si_sse (rtx operands[])
   emit_insn (gen_xorv4si3 (value, value, large));
 }
 
-static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok,
-machine_mode mode, rtx target,
-rtx var, int one_var);
-
 /* Convert an unsigned DImode value into a DFmode, using only SSE.
Expects the 64-bit DImode to be supplied in a pair of integral
registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
@@ -16126,7 +16122,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
whose ONE_VAR element is VAR, and other elements are zero.  Return true
if successful.  */
 
-static bool
+bool
 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
 rtx target, rtx var, int one_var)
 {
@@ -26137,4 +26133,42 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx 
op1, rtx op2, int idx,
   return target;
 }
 
+/* Trunc a vector to a narrow vector, like v4di -> v4si.  */
+
+void
+ix86_expand_trunc_with_avx2_noavx512f (rtx output, rtx input, machine_mode 
cvt_mode)
+{
+  machine_mode out_mode = GET_MODE (output);
+  machine_mode in_mode = GET_MODE (input);
+  int len = GET_MODE_SIZE (in_mode);
+  gcc_assert (len == GET_MODE_SIZE (cvt_mode)
+ && GET_MODE_INNER (out_mode) == GET_MODE_INNER (cvt_mode)
+ && (REG_P (input) || SUBREG_P (input)));
+  scalar_mode inner_out_mode = GET_MODE_INNER (out_mode);
+  int in_innersize = GET_MODE_SIZE (GET_MODE_INNER (in_mode));
+  int out_innersize = GET_MODE_SIZE (inner_out_mode);
+
+  struct expand_vec_perm_d d;
+  d.target = gen_reg_rtx (cvt_mode);
+  d.op0 = lowpart_subreg (cvt_mode, force_reg(in_mode, input), in_mode);
+  d.op1 = d.op0;
+  d.vmode = cvt_mode;
+  d.nelt = GET_MODE_NUNITS (cvt_mode);
+  d.testing_p = false;
+  d.one_operand_p = true;
+
+  /* Init perm. Put the needed bits of input in order and
+ fill the rest of bits by default.  */
+  for (int i = 0; i < d.nelt; ++i)
+{
+  d.perm[i] = i;
+  if (i < GET_MODE_NUNITS (out_mode))
+   d.perm[i] = i * (in_innersize / out_innersize);
+}
+
+  bool ok = ix86_expand_vec_perm_const_1(&d);
+  gcc_assert (ok);
+  emit_move_insn (output, gen_lowpart (out_mode, d.target));
+}
+
 #include "gt-i386-expand.h"
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4f48dc0bf75..1a76090b9da 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -248,6 +248,7 @@ extern rtx ix86_gen_ccmp_first (rtx_insn **, rtx_insn **, 
enum rtx_code,
 extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx,
   enum rtx_code, tree, tree, enum rtx_code);
 extern int ix86_get_flags_cc (enum rtx_code);
+extern void ix86_expand_trunc_with_avx2_noavx512f (rtx, rtx, machine_mode);
 extern rtx ix86_memtag_untagged_pointer (rtx, rtx);
 extern bool ix86_memtag_can_tag_addresses (void);
 
@@ -298,6 +299,8 @@ extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx,

[gcc r15-1680] i386: Refactor vcvttps2qq/vcvtqq2ps patterns.

2024-06-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:94495247341bc05b77536271fe3dd789dad62624

commit r15-1680-g94495247341bc05b77536271fe3dd789dad62624
Author: Hu, Lin1 
Date:   Tue Jun 25 18:25:59 2024 +0800

i386: Refactor vcvttps2qq/vcvtqq2ps patterns.

Refactor vcvttps2qq/vcvtqq2ps patterns for remove redundant
round_*_modev8sf_condition.

gcc/ChangeLog:

* config/i386/sse.md
(float2
): Refactor the pattern.
(unspec_fix_trunc2
): Ditto.
(fix_trunc2
): Ditto.
* config/i386/subst.md (round_modev8sf_condition): Remove.
(round_saeonly_modev8sf_condition): Ditto.

Diff:
---
 gcc/config/i386/sse.md   | 51 +---
 gcc/config/i386/subst.md |  2 --
 2 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 56ee7119e7c..a94ec3c441f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1158,6 +1158,9 @@
 (define_mode_attr ssePSmode2
   [(V8DI "V8SF") (V4DI "V4SF")])
 
+(define_mode_attr ssePSmode2lower
+  [(V8DI "v8sf") (V4DI "v4sf")])
+
 ;; Mapping of vector modes back to the scalar modes
 (define_mode_attr ssescalarmode
   [(V64QI "QI") (V32QI "QI") (V16QI "QI")
@@ -8862,27 +8865,17 @@
 
 ;; For float insn patterns
 (define_mode_attr qq2pssuff
-  [(V8SF "") (V4SF "{y}")])
-
-(define_mode_attr sselongvecmode
-  [(V8SF "V8DI") (V4SF  "V4DI")])
-
-(define_mode_attr sselongvecmodelower
-  [(V8SF "v8di") (V4SF  "v4di")])
-
-(define_mode_attr sseintvecmode3
-  [(V8SF "XI") (V4SF "OI")
-   (V8DF "OI") (V4DF "TI")])
+  [(V8DI "") (V4DI "{y}")])
 
-(define_insn 
"float2"
-  [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
-(any_float:VF1_128_256VL
-  (match_operand: 1 "nonimmediate_operand" 
"")))]
-  "TARGET_AVX512DQ && "
+(define_insn 
"float2"
+  [(set (match_operand: 0 "register_operand" "=v")
+(any_float:
+  (match_operand:VI8_256_512 1 "nonimmediate_operand" 
"")))]
+  "TARGET_AVX512DQ && "
   "vcvtqq2ps\t{%1, 
%0|%0, %1}"
   [(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set_attr "mode" "")])
 
 (define_expand "avx512dq_floatv2div2sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
@@ -9417,26 +9410,26 @@
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
-(define_insn 
"unspec_fix_trunc2"
-  [(set (match_operand: 0 "register_operand" "=v")
-   (unspec:
- [(match_operand:VF1_128_256VL 1 "" 
"")]
+(define_insn 
"unspec_fix_trunc2"
+  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+   (unspec:VI8_256_512
+ [(match_operand: 1 "" 
"")]
  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512DQ && "
+  "TARGET_AVX512DQ && "
   "vcvttps2qq\t{%1, 
%0|%0, %1}"
   [(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set_attr "mode" "")])
 
-(define_insn 
"fix_trunc2"
-  [(set (match_operand: 0 "register_operand" "=v")
-   (any_fix:
- (match_operand:VF1_128_256VL 1 "" 
"")))]
-  "TARGET_AVX512DQ && "
+(define_insn 
"fix_trunc2"
+  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+   (any_fix:VI8_256_512
+ (match_operand: 1 "" 
"")))]
+  "TARGET_AVX512DQ && "
   "vcvttps2qq\t{%1, 
%0|%0, %1}"
   [(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
-   (set_attr "mode" "")])
+   (set_attr "mode" "")])
 
 (define_insn "unspec_avx512dq_fix_truncv2sfv2di2"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 7a9b697e0f6..40fb92094d2 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -211,7 +211,6 @@
  || mode == 
V16SImode
  || mode == 
V32HFmode)")
 
-(define_subst_attr "round_modev8sf_condition" "round" "1" "(mode == 
V8SFmode)")
 (define_subst_attr "round_modev4sf_condition" "round" "1" "(mode == 
V4SFmode)")
 (define_subst_attr "round_codefor" "round" "*" "")
 (define_subst_attr "round_opnum" "round" "5" "6")
@@ -257,7 +256,6 @@
  
|| mode == V16SImode
  
|| mode == V32HFmode)")
 
-(define_subst_attr "round_saeonly_modev8sf_condition" "round_saeonly" "1" 
"(mode == V8SFmode)")
 
 (define_subst "round_saeonly"
   [(set (match_operand:SUBST_A 0)


[gcc r15-1830] vect: Fix ICE caused by missing check for TREE_CODE == SSA_NAME

2024-07-03 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:d1eeafe40f263acdb5eb1b57f777e064a11ced2b

commit r15-1830-gd1eeafe40f263acdb5eb1b57f777e064a11ced2b
Author: Hu, Lin1 
Date:   Wed Jul 3 10:07:02 2024 +0800

vect: Fix ICE caused by missing check for TREE_CODE == SSA_NAME

Need to check if the tree's code is SSA_NAME before SSA_NAME_RANGE_INFO.

2024-07-03  Hu, Lin1 
Andrew Pinski 

gcc/ChangeLog:

PR tree-optimization/115753
* tree-vect-stmts.cc (supportable_indirect_convert_operation): Add
TYPE_CODE check before SSA_NAME_RANGE_INFO.

gcc/testsuite/ChangeLog:

PR tree-optimization/115753
* gcc.dg/vect/pr115753-1.c: New test.
* gcc.dg/vect/pr115753-2.c: Ditto.
* gcc.dg/vect/pr115753-3.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr115753-1.c | 12 
 gcc/testsuite/gcc.dg/vect/pr115753-2.c | 20 
 gcc/testsuite/gcc.dg/vect/pr115753-3.c | 15 +++
 gcc/tree-vect-stmts.cc |  2 +-
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-1.c 
b/gcc/testsuite/gcc.dg/vect/pr115753-1.c
new file mode 100644
index 000..2c1b6e5df63
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115753-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math" } */
+/* { dg-add-options float16  } */
+/* { dg-require-effective-target float16  } */
+
+void f(_Complex _Float16*);
+void
+foo1 (_Complex _Float16 *d)
+{
+_Complex _Float16 cf = 3967 + 3791 * 1i;
+f(&cf);
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-2.c 
b/gcc/testsuite/gcc.dg/vect/pr115753-2.c
new file mode 100644
index 000..ceacada2a76
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115753-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math" } */
+/* { dg-add-options float16  } */
+/* { dg-require-effective-target float16  } */
+
+void f(_Float16*);
+void
+foo1 ()
+{
+  int t0 = 3967;
+  int t1 = 3969;
+  int t2 = 3971;
+  int t3 = 3973;
+  _Float16 tt[4];
+  tt[0] = t0;
+  tt[1] = t1;
+  tt[2] = t2;
+  tt[3] = t3;
+  f(&tt[0]);
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-3.c 
b/gcc/testsuite/gcc.dg/vect/pr115753-3.c
new file mode 100644
index 000..8e95445897c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115753-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -frounding-math" } */
+
+void f(float*);
+void
+foo1 ()
+{
+  long long t0 = __LONG_LONG_MAX__;
+  long long t1 = __LONG_LONG_MAX__ - 1;
+  float tt[2];
+  tt[0] = t0;
+  tt[1] = t1;
+  f(&tt[0]);
+}
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 156c11fee82..fdcda0d2aba 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -14680,7 +14680,7 @@ supportable_indirect_convert_operation (code_helper 
code,
 In the future, if it is supported, changes may need to be made
 to this part, such as checking the RANGE of each element
 in the vector.  */
- if (!SSA_NAME_RANGE_INFO (op0)
+ if ((TREE_CODE (op0) == SSA_NAME && !SSA_NAME_RANGE_INFO (op0))
  || !vect_get_range_info (op0, &op_min_value, &op_max_value))
break;


[gcc r15-1853] i386: Refactor ssedoublemode

2024-07-05 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:319d3956b16b1270f27e9cbf749e881c4ff7dfb4

commit r15-1853-g319d3956b16b1270f27e9cbf749e881c4ff7dfb4
Author: Hu, Lin1 
Date:   Thu Jul 4 11:18:46 2024 +0800

i386: Refactor ssedoublemode

ssedoublemode's double should mean double type, like SI -> DI.
And we need to refactor some patterns with  instead of
.

gcc/ChangeLog:

* config/i386/sse.md (ssedoublemode): Remove mappings to twice
the number of same-sized elements. Add mappings to the same
number of double-sized elements.
(define_split for vec_concat_minus_plus): Change mode_attr from
ssedoublemode to ssedoublevecmode.
(define_split for vec_concat_plus_minus): Ditto.
(avx512dq_shuf_64x2_1):
Ditto.
(avx512f_shuf_64x2_1): Ditto.
(avx512vl_shuf_32x4_1): Ditto.
(avx512f_shuf_32x4_1): Ditto.

Diff:
---
 gcc/config/i386/sse.md | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d71b0f2567e..bda66d5e121 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -808,13 +808,12 @@
(V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
(V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
 
+;; Map vector mode to the same number of double sized elements.
 (define_mode_attr ssedoublemode
-  [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
-   (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
+  [(V4SF "V4DF") (V8SF "V8DF") (V16SF "V16DF")
(V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
(V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
-   (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
-   (V4DI "V8DI") (V8DI "V16DI")])
+   (V4SI "V4DI") (V8SI "V8DI") (V16SI "V16DI")])
 
 (define_mode_attr ssebytemode
   [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
@@ -3319,7 +3318,7 @@
 (define_split
   [(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 7 "addsub_vs_operator"
- [(vec_concat:
+ [(vec_concat:
 (minus:VF_128_256
   (match_operand:VF_128_256 1 "register_operand")
   (match_operand:VF_128_256 2 "vector_operand"))
@@ -3353,7 +3352,7 @@
 (define_split
   [(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 7 "addsub_vs_operator"
- [(vec_concat:
+ [(vec_concat:
 (plus:VF_128_256
   (match_operand:VF_128_256 1 "vector_operand")
   (match_operand:VF_128_256 2 "vector_operand"))
@@ -19869,7 +19868,7 @@
 (define_insn "avx512dq_shuf_64x2_1"
   [(set (match_operand:VI8F_256 0 "register_operand" "=x,v")
(vec_select:VI8F_256
- (vec_concat:
+ (vec_concat:
(match_operand:VI8F_256 1 "register_operand" "x,v")
(match_operand:VI8F_256 2 "nonimmediate_operand" "xjm,vm"))
  (parallel [(match_operand 3 "const_0_to_3_operand")
@@ -19922,7 +19921,7 @@
 (define_insn "avx512f_shuf_64x2_1"
   [(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_select:V8FI
- (vec_concat:
+ (vec_concat:
(match_operand:V8FI 1 "register_operand" "v")
(match_operand:V8FI 2 "nonimmediate_operand" "vm"))
  (parallel [(match_operand 3 "const_0_to_7_operand")
@@ -20020,7 +20019,7 @@
 (define_insn "avx512vl_shuf_32x4_1"
   [(set (match_operand:VI4F_256 0 "register_operand" "=x,v")
(vec_select:VI4F_256
- (vec_concat:
+ (vec_concat:
(match_operand:VI4F_256 1 "register_operand" "x,v")
(match_operand:VI4F_256 2 "nonimmediate_operand" "xjm,vm"))
  (parallel [(match_operand 3 "const_0_to_7_operand")
@@ -20091,7 +20090,7 @@
 (define_insn "avx512f_shuf_32x4_1"
   [(set (match_operand:V16FI 0 "register_operand" "=v")
(vec_select:V16FI
- (vec_concat:
+ (vec_concat:
(match_operand:V16FI 1 "register_operand" "v")
(match_operand:V16FI 2 "nonimmediate_operand" "vm"))
  (parallel [(match_operand 3 "const_0_to_15_operand")


[gcc r15-2052] i386: extend trunc{128}2{16,32,64}'s scope.

2024-07-15 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:a902e35396d68f10bd27477153fafa4f5ac9c319

commit r15-2052-ga902e35396d68f10bd27477153fafa4f5ac9c319
Author: Hu, Lin1 
Date:   Thu Jul 11 15:03:22 2024 +0800

i386: extend trunc{128}2{16,32,64}'s scope.

Based on actual usage, trunc{128}2{16,32,64} use some instructions from
sse/sse3, so extend their scope to extend the scope of optimization.

gcc/ChangeLog:

PR target/107432
* config/i386/sse.md
(PMOV_SRC_MODE_3_AVX2): Add TARGET_AVX2 for V4DI and V8SI.
(PMOV_SRC_MODE_4): Add TARGET_AVX2 for V4DI.
(trunc2): Change constraint from 
TARGET_AVX2 to
TARGET_SSSE3.
(trunc2): Ditto.
(truncv2div2si2): Change constraint from TARGET_AVX2 to TARGET_SSE.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-10.c: New test.

Diff:
---
 gcc/config/i386/sse.md  | 11 
 gcc/testsuite/gcc.target/i386/pr107432-10.c | 41 +
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c134494cd200..e44822f705b4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15000,7 +15000,8 @@
   "TARGET_AVX512VL")
 
 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI 
"TARGET_AVX512BW")])
-(define_mode_iterator PMOV_SRC_MODE_3_AVX2 [V4DI V2DI V8SI V4SI V8HI])
+(define_mode_iterator PMOV_SRC_MODE_3_AVX2
+ [(V4DI "TARGET_AVX2") V2DI (V8SI "TARGET_AVX2") V4SI V8HI])
 (define_mode_attr pmov_dst_3_lower
   [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
 (define_mode_attr pmov_dst_3
@@ -15014,7 +15015,7 @@
   [(set (match_operand: 0 "register_operand")
(truncate:
  (match_operand:PMOV_SRC_MODE_3_AVX2 1 "register_operand")))]
-  "TARGET_AVX2"
+  "TARGET_SSSE3"
 {
   if (TARGET_AVX512VL
   && (mode != V8HImode || TARGET_AVX512BW))
@@ -15390,7 +15391,7 @@
  (match_dup 2)))]
   "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
 
-(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
+(define_mode_iterator PMOV_SRC_MODE_4 [(V4DI "TARGET_AVX2") V2DI V4SI])
 (define_mode_attr pmov_dst_4
   [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
 (define_mode_attr pmov_dst_4_lower
@@ -15404,7 +15405,7 @@
   [(set (match_operand: 0 "register_operand")
(truncate:
  (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
-  "TARGET_AVX2"
+  "TARGET_SSSE3"
 {
   if (TARGET_AVX512VL)
 {
@@ -15659,7 +15660,7 @@
   [(set (match_operand:V2SI 0 "register_operand")
(truncate:V2SI
  (match_operand:V2DI 1 "register_operand")))]
-  "TARGET_AVX2"
+  "TARGET_SSE"
 {
   if (TARGET_AVX512VL)
 {
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-10.c 
b/gcc/testsuite/gcc.target/i386/pr107432-10.c
new file mode 100644
index ..57edf7cfc781
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-10.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v2 -O2" } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-times "pshufb" 5 } } */
+
+#include 
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__v2di a)
+{
+  return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4qi);
+}
+
+__v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v8hi)a, __v8qi);
+}


[gcc r15-974] i386: Optimize EQ/NE comparison between avx512 kmask and -1.

2024-06-02 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:bf7745f887c765e06f2e75508f263debb60aeb2e

commit r15-974-gbf7745f887c765e06f2e75508f263debb60aeb2e
Author: Hu, Lin1 
Date:   Thu May 9 09:29:07 2024 +0800

i386: Optimize EQ/NE comparison between avx512 kmask and -1.

Acheive EQ/NE comparison between avx512 kmask and -1 by using kxortest
with checking CF.

gcc/ChangeLog:

PR target/113609
* config/i386/sse.md
(*kortest_cmp_setcc): New define_insn_and_split.
(*kortest_cmp_jcc): Ditto.

gcc/testsuite/ChangeLog:

PR target/113609
* gcc.target/i386/pr113609-1.c: New test.
* gcc.target/i386/pr113609-2.c: Ditto.

Diff:
---
 gcc/config/i386/sse.md |  67 ++
 gcc/testsuite/gcc.target/i386/pr113609-1.c | 194 +
 gcc/testsuite/gcc.target/i386/pr113609-2.c | 161 
 3 files changed, 422 insertions(+)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7cd912eeeb1..a5a7347f23a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2201,6 +2201,73 @@
  UNSPEC_KORTEST))]
   "TARGET_AVX512F")
 
+;; Optimize cmp + setcc with mask register by kortest + setcc.
+(define_insn_and_split "*kortest_cmp_setcc"
+   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm")
+(match_operator:QI 1 "bt_comparison_operator"
+   [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k, 
")
+(const_int -1)]))
+  (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  if (MASK_REGNO_P (REGNO (operands[2])))
+{
+  emit_insn (gen_kortest_ccc (operands[2], operands[2]));
+  operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+}
+  else
+{
+  operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+  operands[2],
+  constm1_rtx)));
+}
+  ix86_expand_setcc (operands[0],
+GET_CODE (operands[1]),
+operands[4],
+const0_rtx);
+  DONE;
+})
+
+;; Optimize cmp + jcc with mask register by kortest + jcc.
+(define_insn_and_split "*kortest_cmp_jcc"
+   [(set (pc)
+  (if_then_else
+   (match_operator 0 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, ")
+  (const_int -1)])
+ (label_ref (match_operand 2))
+  (pc)))
+  (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  if (MASK_REGNO_P (REGNO (operands[1])))
+{
+  emit_insn (gen_kortest_ccc (operands[1], operands[1]));
+  operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+}
+  else
+{
+  operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+  operands[1],
+  constm1_rtx)));
+}
+  ix86_expand_branch (GET_CODE (operands[0]),
+ operands[4],
+ const0_rtx,
+ operands[2]);
+  DONE;
+})
+
 (define_insn "kunpckhi"
   [(set (match_operand:HI 0 "register_operand" "=k")
(ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-1.c 
b/gcc/testsuite/gcc.target/i386/pr113609-1.c
new file mode 100644
index 000..f0639b8500a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-1.c
@@ -0,0 +1,194 @@
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-not "^cmp" } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } } 
*/
+/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } } 
*/
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } } 
} */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } } 
*/
+/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */
+
+#include 
+
+unsigned int
+cmp_vector_sete_mask8(__m128i a, __m128i b)
+{
+__mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+if (k == (__mmask8) -1)
+  return 1;
+else
+  return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask16(__m128i a, __m128i b)
+{
+__mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+if (k 

[gcc r15-1370] i386: Refine all cvtt* instructions with UNSPEC instead of FIX/UNSIGNED_FIX.

2024-06-17 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:b5d3ad256afdfd891d37d8fdb126d599f150e78b

commit r15-1370-gb5d3ad256afdfd891d37d8fdb126d599f150e78b
Author: Hu, Lin1 
Date:   Wed Jun 12 16:25:34 2024 +0800

i386: Refine all cvtt* instructions with UNSPEC instead of FIX/UNSIGNED_FIX.

gcc/ChangeLog:

PR target/115161
* config/i386/i386-builtin.def: Change CODE_FOR_* for cvtt*'s 
builtins.
* config/i386/sse.md:
(unspec_avx512fp16_fix
_trunc2):
Use UNSPEC instead of FIX/UNSIGNED_FIX.
(unspec_avx512fp16_fix_trunc2):
Ditto.
(unspec_avx512fp16_fix_truncv2di2): 
Ditto.

(unspec_avx512fp16_fix_trunc2):
Ditto.
(unspec_sse_cvttps2pi): Ditto.
(unspec_sse_cvttss2si): Ditto.

(unspec_fix_truncv16sfv16si2):
Ditto.
(unspec_fix_truncv8sfv8si2): Ditto.
(unspec_fix_truncv4sfv4si2): Ditto.
(unspec_sse2_cvttpd2pi): Ditto.
(unspec_fixuns_truncv2dfv2si2): Ditto.
(unspec_avx512f_vcvttss2usi):
Ditto.
(unspec_avx512f_vcvttsd2usi):
Ditto.
(unspec_sse2_cvttsd2si): Ditto.

(unspec_fix_truncv8dfv8si2):
Ditto.
(*unspec_fixuns_truncv2dfv2si2): Ditto.
(unspec_fixuns_truncv2dfv2si2_mask): Ditto.
(unspec_fix_truncv4dfv4si2): Ditto.
(unspec_fixuns_truncv4dfv4si2): Ditto.
(unspec_fix
_trunc2):
Ditto.
(unspec_fix
_trunc2):
Ditto.
(unspec_avx512dq_fix_truncv2sfv2di2):
Ditto.

(unspec_fixuns_trunc2):
Ditto.
(unspec_sse2_cvttpd2dq): Ditto.

gcc/testsuite/ChangeLog:

PR target/115161
* gcc.target/i386/pr115161-1.c: New test.

Diff:
---
 gcc/config/i386/i386-builtin.def   | 128 +--
 gcc/config/i386/sse.md | 335 +
 gcc/testsuite/gcc.target/i386/pr115161-1.c |  65 ++
 3 files changed, 464 insertions(+), 64 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a28c48c75668..edb1d2f11b22 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -635,9 +635,9 @@ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_rcpv4sf2, 
"__builtin_ia32_rcpps", IX
 BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse_cvtps2pi, 
"__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) 
V2SI_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_cvtss2si, 
"__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtss2siq, 
"__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) 
INT64_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse_cvttps2pi, 
"__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) 
V2SI_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_cvttss2si, 
"__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) 
INT_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", 
IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, 0, 
CODE_FOR_unspec_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", 
IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_unspec_sse_cvttss2si, 
"__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) 
INT_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_unspec_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", 
IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF)
 
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", 
IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 
@@ -729,19 +729,19 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_floatv4siv4sf2, 
"__builtin_ia32_cvtdq2p
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_cvtpd2dq, 
"__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) 
V4SI_FTYPE_V2DF)
 BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse2_cvtpd2pi, 
"__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) 
V2SI_FTYPE_V2DF)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_cvtpd2ps, 
"__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) 
V4SF_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_cvttpd2dq, 
"__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) 
V4SI_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_sse2_cvttpd2pi, 
"__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) 
V2SI_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_unspec_sse2_cvttpd2dq

[gcc r15-1389] i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx

2024-06-17 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:7c6f79eea9febce3b21c5783bac9b0a36e08f003

commit r15-1389-g7c6f79eea9febce3b21c5783bac9b0a36e08f003
Author: Hu, Lin1 
Date:   Wed Mar 20 16:01:45 2024 +0800

i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into 
sse/sse2/avx

gcc/ChangeLog:

* config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h,
and move macros to xmmintrin.h
* config/i386/emmintrin.h: Add cmp[p|s]s intrins.
* config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d].
* config/i386/i386-expand.cc
(ix86_expand_args_builtin): Raise error when imm is in range of
[8, 32] without avx.
* config/i386/predicates.md (cmpps_imm_operand): New predicate.
* config/i386/sse.md (avx_cmp3): Modefy define_insn.
(avx_vmcmp3): Ditto.
* config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2.
(_CMP_LT_OS): Ditto
(_CMP_LE_OS): Ditto
(_CMP_UNORD_Q): Ditto
(_CMP_NEQ_UQ): Ditto
(_CMP_NLT_US): Ditto
(_CMP_NLE_US): Ditto
(_CMP_ORD_Q): Ditto
(_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h
(_mm_cmp_ss): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/sse-cmp-1.c: New test.
* gcc.target/i386/sse-cmp-2.c: Ditto.
* gcc.target/i386/sse-cmp-error.c: Ditto.

Diff:
---
 gcc/config/i386/avxintrin.h   | 56 
 gcc/config/i386/emmintrin.h   | 22 ++
 gcc/config/i386/i386-builtin.def  | 10 +--
 gcc/config/i386/i386-expand.cc|  6 ++
 gcc/config/i386/predicates.md |  5 ++
 gcc/config/i386/sse.md| 42 +++-
 gcc/config/i386/xmmintrin.h   | 41 
 gcc/testsuite/gcc.target/i386/sse-cmp-1.c | 20 ++
 gcc/testsuite/gcc.target/i386/sse-cmp-2.c | 96 +++
 gcc/testsuite/gcc.target/i386/sse-cmp-error.c | 16 +
 10 files changed, 236 insertions(+), 78 deletions(-)

diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index 802145408881..ec9b9905b5f6 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ 
(32),
 
 /* Compare predicates for scalar and packed compare intrinsics.  */
 
-/* Equal (ordered, non-signaling)  */
-#define _CMP_EQ_OQ 0x00
-/* Less-than (ordered, signaling)  */
-#define _CMP_LT_OS 0x01
-/* Less-than-or-equal (ordered, signaling)  */
-#define _CMP_LE_OS 0x02
-/* Unordered (non-signaling)  */
-#define _CMP_UNORD_Q   0x03
-/* Not-equal (unordered, non-signaling)  */
-#define _CMP_NEQ_UQ0x04
-/* Not-less-than (unordered, signaling)  */
-#define _CMP_NLT_US0x05
-/* Not-less-than-or-equal (unordered, signaling)  */
-#define _CMP_NLE_US0x06
-/* Ordered (nonsignaling)   */
-#define _CMP_ORD_Q 0x07
 /* Equal (unordered, non-signaling)  */
 #define _CMP_EQ_UQ 0x08
 /* Not-greater-than-or-equal (unordered, signaling)  */
@@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B)
 }
 
 #ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
-}
-
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
 {
@@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
   return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
   __P);
 }
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
-{
-  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
-{
-  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
-}
 #else
-#define _mm_cmp_pd(X, Y, P)\
-  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),   \
-  (__v2df)(__m128d)(Y), (int)(P)))
-
-#define _mm_cmp_ps(X, Y, P)\
-  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P)))
-
 #define _mm

[gcc r15-4245] i386: Fix some patterns's mem attribute.

2024-10-10 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:9f2f108a8a68c7b7b2de5350439a8ab8e17a54da

commit r15-4245-g9f2f108a8a68c7b7b2de5350439a8ab8e17a54da
Author: Hu, Lin1 
Date:   Wed Oct 9 10:20:05 2024 +0800

i386: Fix some patterns's mem attribute.

Hi, all

This is another patch to modify some pattern's type attr from ssemov to
ssemov2.

Some ssemov pattern's mem attr should be load when their 2 operand is a 
memory
operand.

Bootstrapped and regtested on x86-64-linux-pc, OK for trunk?

BRs,
Lin

gcc/ChangeLog:

* config/i386/sse.md
(sse_movhlps): Change type attr from ssemov to ssemov2.
(sse_loadhps): Ditto.
(*vec_concat): Ditto.
(vec_setv2df_0): Ditto.
(sse_loadlps): Change attr from ssemov to ssemov2 except for 2, 3.
(sse2_loadhps): Change attr from ssemov to ssemov2 except for 0, 1.
(sse2_loadlpd): Change attr from ssemov to ssemov2 except for 0, 1,
2.
(sse2_movsd_): Change attr from ssemov to ssemov2 except for 
5.
(vec_concatv2df): Change attr from ssemov to ssemov2 except for 0, 
1,
2.
(*vec_concat): Change attr from ssemov to ssemov2 for 3, 4.
(vec_concatv2di): Change attr from ssemov to ssemov2 except for 0, 
1,
2, 3, 4, 5.

Diff:
---
 gcc/config/i386/sse.md | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ccef3e063eca..a45b50ad7324 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10995,7 +10995,7 @@
vmovlps\t{%H2, %1, %0|%0, %1, %H2}
%vmovhps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
@@ -11557,7 +11557,7 @@
vmovlhps\t{%2, %1, %0|%0, %1, %2}
%vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
 
@@ -11610,7 +11610,7 @@
vmovlps\t{%2, %1, %0|%0, %1, %q2}
%vmovlps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+   (set_attr "type" "sseshuf,sseshuf,ssemov2,ssemov2,ssemov")
(set (attr "length_immediate")
  (if_then_else (eq_attr "alternative" "0,1")
   (const_string "1")
@@ -11766,7 +11766,7 @@
movhps\t{%2, %0|%0, %q2}
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
 
@@ -12214,7 +12214,7 @@
movlpd\t{%2, %0|%0, %2}
vmovlpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
(set_attr "mode" "DF")])
 
 (define_expand "vec_set"
@@ -14665,7 +14665,7 @@
#
#"
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
-   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "type" "ssemov2,ssemov2,sselog,sselog,ssemov,fmov,imov")
(set (attr "prefix_data16")
  (if_then_else (eq_attr "alternative" "0")
   (const_string "1")
@@ -14735,6 +14735,8 @@
  (const_string "fmov")
(eq_attr "alternative" "10")
  (const_string "imov")
+   (eq_attr "alternative" "0,1,2")
+ (const_string "ssemov2")
   ]
   (const_string "ssemov")))
(set (attr "prefix_data16")
@@ -14787,7 +14789,7 @@
  (if_then_else
(eq_attr "alternative" "5")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "prefix_data16")
  (if_then_else
(and (eq_attr "alternative" "2,4")
@@ -14859,7 +14861,7 @@
  (if_then_else
(eq_attr "alternative" "0,1,2")
(const_string "sselog")
-   (const_string "ssemov")))
+   (const_string "ssemov2")))
(set (attr "prefix_data16")
(if_then_else (eq_attr "alternative" "3")
  (const_string "1")
@@ -21545,7 +21547,7 @@
movhps\t{%2, %0|%0, %q2}
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
-   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov2,ssemov2")
(set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
(set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
 
@@ -21653,7 +21655,7 @@
  (if_then_else
(eq_attr "alternative" "0,1,2,3,4,5")
(const_string "sselog")
-   (const_string "ssemov")))

[gcc r15-4952] i386: Handling exception input of __builtin_ia32_prefetch. [PR117416]

2024-11-05 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:ea46a216d48597b220ae69e79f6513c763f953be

commit r15-4952-gea46a216d48597b220ae69e79f6513c763f953be
Author: Hu, Lin1 
Date:   Mon Nov 4 14:52:56 2024 +0800

i386: Handling exception input of __builtin_ia32_prefetch. [PR117416]

op1 should be between 0 and 2. Add an error handler, and op3 should be 0
or 1, raise a warning, when op3 is an invalid value.

gcc/ChangeLog:

PR target/117416
* config/i386/i386-expand.cc (ix86_expand_builtin): Raise warning 
when
op1 isn't in range of [0, 2] and set op1 as const0_rtx, and raise
warning when op3 isn't in range of [0, 1].

gcc/testsuite/ChangeLog:

PR target/117416
* gcc.target/i386/pr117416-1.c: New test.
* gcc.target/i386/pr117416-2.c: Ditto.

Diff:
---
 gcc/config/i386/i386-expand.cc | 11 +++
 gcc/testsuite/gcc.target/i386/pr117416-1.c | 13 +
 gcc/testsuite/gcc.target/i386/pr117416-2.c | 13 +
 3 files changed, 37 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6eef27f3fcda..ff07ab40848e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -14202,6 +14202,13 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
return const0_rtx;
  }
 
+   if (!IN_RANGE (INTVAL (op1), 0, 2))
+ {
+   warning (0, "invalid second argument to"
+" %<__builtin_ia32_prefetch%>; using zero");
+   op1 = const0_rtx;
+ }
+
if (INTVAL (op3) == 1)
  {
if (INTVAL (op2) < 2 || INTVAL (op2) > 3)
@@ -14224,6 +14231,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
  }
else
  {
+   if (INTVAL (op3) != 0)
+ warning (0, "invalid forth argument to"
+ " %<__builtin_ia32_prefetch%>; using zero");
+
if (!address_operand (op0, VOIDmode))
  {
op0 = convert_memory_address (Pmode, op0);
diff --git a/gcc/testsuite/gcc.target/i386/pr117416-1.c 
b/gcc/testsuite/gcc.target/i386/pr117416-1.c
new file mode 100644
index ..65788f268d9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117416-1.c
@@ -0,0 +1,13 @@
+/* PR target/117416 */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+#include 
+
+void* p;
+
+void extern
+prefetch_test (void)
+{
+  __builtin_ia32_prefetch (p, 5, 0, 0); /* { dg-warning "invalid second 
argument to '__builtin_ia32_prefetch'; using zero" } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr117416-2.c 
b/gcc/testsuite/gcc.target/i386/pr117416-2.c
new file mode 100644
index ..07799e36cfe1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117416-2.c
@@ -0,0 +1,13 @@
+/* PR target/117416 */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+#include 
+
+void* p;
+
+void extern
+prefetch_test (void)
+{
+  __builtin_ia32_prefetch (p, 0, 0, 2); /* { dg-warning "invalid forth 
argument to '__builtin_ia32_prefetch'; using zero" } */
+}


[gcc r15-4973] i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions.

2024-11-05 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:8ac694ae67e24a798dce368587bed4c40b90fbc0

commit r15-4973-g8ac694ae67e24a798dce368587bed4c40b90fbc0
Author: Hu, Lin1 
Date:   Tue Nov 5 15:49:57 2024 +0800

i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions.

gcc/ChangeLog:

PR target/117304
* config/i386/i386-builtin.def: Add OPTION_MASK_ISA2_EVEX512 for 
some
AVX512 512-bits instructions.

gcc/testsuite/ChangeLog:

PR target/117304
* gcc.target/i386/pr117304-1.c: New test.

Diff:
---
 gcc/config/i386/i386-builtin.def   | 10 +-
 gcc/testsuite/gcc.target/i386/pr117304-1.c | 28 
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index c484e6dc29e4..26c23780b1c6 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3357,11 +3357,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", 
IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, 
"__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) 
V2DF_FTYPE_V2DF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, 
"__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, 
UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, 
"__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) 
V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, 
"__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) 
V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, 
"__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) 
V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, 
"__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) 
V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, 
"__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) 
V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", 
IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", 
IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, 
"__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) 
V4SF_FTYPE_V4SF_UINT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", 
IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT)
diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c 
b/gcc/testsuite/gcc.target/i386/pr117304-1.c
new file mode 100644
index ..fc1c5bfd3e35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c
@@ -0,0 +1,28 @@
+/* PR target/117304 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-evex512" } */
+
+typedef __attribute__((__vector_size__(32))) int __v8si;
+typedef __attribute__((__vector_size__(32))) unsigned int __v8su;
+typedef __attribute__((__vector_size__(64))) double __v8df;
+typedef __attribute__((__vector_size__(64))) int __v16si;
+typedef __attribute__((__vector_size__(64))) unsigned int __v16su;
+typedef __attribute__((__vector_size__(64))) float __v16sf;
+typedef float __m512 __attribute__ ((__vector_size__ (6

[gcc r15-5184] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]

2024-11-12 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:2272cd2508f1854c880082f792de15e76ec09a99

commit r15-5184-g2272cd2508f1854c880082f792de15e76ec09a99
Author: Hu, Lin1 
Date:   Wed Nov 6 15:42:13 2024 +0800

i386: Zero extend 32-bit address to 64-bit with option -mx32 
-maddress-mode=long. [PR 117418]

-maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to
64-bit and uses a 64-bit register as a pointer for avoid raise an ICE.

gcc/ChangeLog:

PR target/117418
* config/i386/i386-expand.cc (ix86_expand_builtin): Convert
pointer's mode according to Pmode.

gcc/testsuite/ChangeLog:

PR target/117418
* gcc.target/i386/pr117418-1.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc | 12 
 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 
 2 files changed, 36 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5c4a8e07d621..a6e6e738a524 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -14064,6 +14064,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   op1 = expand_normal (arg1);
   op2 = expand_normal (arg2);
 
+  if (GET_MODE (op1) != Pmode)
+   op1 = convert_to_mode (Pmode, op1, 1);
+
   if (!address_operand (op2, VOIDmode))
{
  op2 = convert_memory_address (Pmode, op2);
@@ -14099,6 +14102,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   emit_label (ok_label);
   emit_insn (gen_rtx_SET (target, pat));
 
+  if (GET_MODE (op0) != Pmode)
+   op0 = convert_to_mode (Pmode, op0, 1);
+
   for (i = 0; i < 8; i++)
{
  op = gen_rtx_MEM (V2DImode,
@@ -14123,6 +14129,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op2) != Pmode)
+ op2 = convert_to_mode (Pmode, op2, 1);
+
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
 
@@ -14160,6 +14169,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op3) != Pmode)
+ op3 = convert_to_mode (Pmode, op3, 1);
+
/* Force to use xmm0, xmm1 for keylow, keyhi*/
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c 
b/gcc/testsuite/gcc.target/i386/pr117418-1.c
new file mode 100644
index ..4839b139b79a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c
@@ -0,0 +1,24 @@
+/* PR target/117418 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */
+/* { dg-require-effective-target maybe_x32  } */
+/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey128" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey256" 1 } } */
+
+typedef __attribute__((__vector_size__(16))) long long V;
+V a;
+
+void
+foo()
+{
+__builtin_ia32_aesdec128kl_u8 (&a, a, &a);
+__builtin_ia32_aesdec256kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc128kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc256kl_u8 (&a, a, &a);
+__builtin_ia32_encodekey128_u32 (0, a, &a); 
+__builtin_ia32_encodekey256_u32 (0, a, a, &a); 
+}


[gcc r13-9183] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]

2024-11-12 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:4758f8d410e961b09c8be619d6d0a71d5e7e4aa5

commit r13-9183-g4758f8d410e961b09c8be619d6d0a71d5e7e4aa5
Author: Hu, Lin1 
Date:   Wed Nov 6 15:42:13 2024 +0800

i386: Zero extend 32-bit address to 64-bit with option -mx32 
-maddress-mode=long. [PR 117418]

-maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to
64-bit and uses a 64-bit register as a pointer for avoid raise an ICE.

gcc/ChangeLog:

PR target/117418
* config/i386/i386-expand.cc (ix86_expand_builtin): Convert
pointer's mode according to Pmode.

gcc/testsuite/ChangeLog:

PR target/117418
* gcc.target/i386/pr117418-1.c: New test.

(cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99)

Diff:
---
 gcc/config/i386/i386-expand.cc | 12 
 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 
 2 files changed, 36 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index dc85103f3a81..aba810f3fa2d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -13096,6 +13096,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   op1 = expand_normal (arg1);
   op2 = expand_normal (arg2);
 
+  if (GET_MODE (op1) != Pmode)
+   op1 = convert_to_mode (Pmode, op1, 1);
+
   if (!address_operand (op2, VOIDmode))
{
  op2 = convert_memory_address (Pmode, op2);
@@ -13131,6 +13134,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   emit_label (ok_label);
   emit_insn (gen_rtx_SET (target, pat));
 
+  if (GET_MODE (op0) != Pmode)
+   op0 = convert_to_mode (Pmode, op0, 1);
+
   for (i = 0; i < 8; i++)
{
  op = gen_rtx_MEM (V2DImode,
@@ -13155,6 +13161,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op2) != Pmode)
+ op2 = convert_to_mode (Pmode, op2, 1);
+
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
 
@@ -13192,6 +13201,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op3) != Pmode)
+ op3 = convert_to_mode (Pmode, op3, 1);
+
/* Force to use xmm0, xmm1 for keylow, keyhi*/
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c 
b/gcc/testsuite/gcc.target/i386/pr117418-1.c
new file mode 100644
index ..4839b139b79a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c
@@ -0,0 +1,24 @@
+/* PR target/117418 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */
+/* { dg-require-effective-target maybe_x32  } */
+/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey128" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey256" 1 } } */
+
+typedef __attribute__((__vector_size__(16))) long long V;
+V a;
+
+void
+foo()
+{
+__builtin_ia32_aesdec128kl_u8 (&a, a, &a);
+__builtin_ia32_aesdec256kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc128kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc256kl_u8 (&a, a, &a);
+__builtin_ia32_encodekey128_u32 (0, a, &a); 
+__builtin_ia32_encodekey256_u32 (0, a, a, &a); 
+}


[gcc r12-10813] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]

2024-11-12 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:e41fdca8a290c4d72b1972af8cdfd1dd60af31df

commit r12-10813-ge41fdca8a290c4d72b1972af8cdfd1dd60af31df
Author: Hu, Lin1 
Date:   Wed Nov 6 15:42:13 2024 +0800

i386: Zero extend 32-bit address to 64-bit with option -mx32 
-maddress-mode=long. [PR 117418]

-maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to
64-bit and uses a 64-bit register as a pointer for avoid raise an ICE.

gcc/ChangeLog:

PR target/117418
* config/i386/i386-expand.cc (ix86_expand_builtin): Convert
pointer's mode according to Pmode.

gcc/testsuite/ChangeLog:

PR target/117418
* gcc.target/i386/pr117418-1.c: New test.

(cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99)

Diff:
---
 gcc/config/i386/i386-expand.cc | 12 
 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 
 2 files changed, 36 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 909c11e4195b..5c8d9c556af2 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12747,6 +12747,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   op1 = expand_normal (arg1);
   op2 = expand_normal (arg2);
 
+  if (GET_MODE (op1) != Pmode)
+   op1 = convert_to_mode (Pmode, op1, 1);
+
   if (!address_operand (op2, VOIDmode))
{
  op2 = convert_memory_address (Pmode, op2);
@@ -12782,6 +12785,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   emit_label (ok_label);
   emit_insn (gen_rtx_SET (target, pat));
 
+  if (GET_MODE (op0) != Pmode)
+   op0 = convert_to_mode (Pmode, op0, 1);
+
   for (i = 0; i < 8; i++)
{
  op = gen_rtx_MEM (V2DImode,
@@ -12806,6 +12812,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op2) != Pmode)
+ op2 = convert_to_mode (Pmode, op2, 1);
+
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
 
@@ -12843,6 +12852,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op3) != Pmode)
+ op3 = convert_to_mode (Pmode, op3, 1);
+
/* Force to use xmm0, xmm1 for keylow, keyhi*/
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c 
b/gcc/testsuite/gcc.target/i386/pr117418-1.c
new file mode 100644
index ..4839b139b79a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c
@@ -0,0 +1,24 @@
+/* PR target/117418 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */
+/* { dg-require-effective-target maybe_x32  } */
+/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey128" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey256" 1 } } */
+
+typedef __attribute__((__vector_size__(16))) long long V;
+V a;
+
+void
+foo()
+{
+__builtin_ia32_aesdec128kl_u8 (&a, a, &a);
+__builtin_ia32_aesdec256kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc128kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc256kl_u8 (&a, a, &a);
+__builtin_ia32_encodekey128_u32 (0, a, &a); 
+__builtin_ia32_encodekey256_u32 (0, a, a, &a); 
+}


[gcc r15-3704] i386: Add ssemov2, sseicvt2 for some load instructions that use memory on operand2

2024-09-18 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:1cf1bf7899985df31e1ebccb5d6f1ca762991dcf

commit r15-3704-g1cf1bf7899985df31e1ebccb5d6f1ca762991dcf
Author: Hu, Lin1 
Date:   Wed Sep 11 10:10:40 2024 +0800

i386: Add ssemov2, sseicvt2 for some load instructions that use memory on 
operand2

The memory attr of some instructions should be 'load', but these are
'none', currently.

gcc/ChangeLog:

* config/i386/i386.md: Add ssemov2, sseicvt2.
* config/i386/sse.md (sse2_cvtsi2sd): Apply sseicvt2.
(sse2_cvtsi2sdq): Ditto.
(vec_set_0): Apply ssemov2 for 4, 6.

Diff:
---
 gcc/config/i386/i386.md | 11 +++
 gcc/config/i386/sse.md  |  6 --
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c04415149490..9c2a0aa61126 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -539,10 +539,10 @@
str,bitmanip,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
fxch,fistp,fisttp,frndint,
-   sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+   sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1,
ssemul,sseimul,ssediv,sselog,sselog1,
sseishft,sseishft1,ssecmp,ssecomi,
-   ssecvt,ssecvt1,sseicvt,sseins,
+   ssecvt,ssecvt1,sseicvt,sseicvt2,sseins,
sseshuf,sseshuf1,ssemuladd,sse4arg,
lwp,mskmov,msklog,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
@@ -560,10 +560,10 @@
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
  fxch,fistp,fisttp,frndint")
   (const_string "i387")
-(eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+(eq_attr "type" "sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1,
  ssemul,sseimul,ssediv,sselog,sselog1,
  sseishft,sseishft1,ssecmp,ssecomi,
- ssecvt,ssecvt1,sseicvt,sseins,
+ ssecvt,ssecvt1,sseicvt,sseicvt2,sseins,
  sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
   (const_string "sse")
 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
@@ -858,6 +858,9 @@
   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
  (match_operand 2 "memory_operand"))
   (const_string "load")
+(and (eq_attr "type" "ssemov2,sseicvt2")
+ (match_operand 2 "memory_operand"))
+  (const_string "load")
 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
  (match_operand 3 "memory_operand"))
   (const_string "load")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ae61182d0cc..ff4f33b7b637 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8876,7 +8876,7 @@
cvtsi2sd{l}\t{%2, %0|%0, %2}
vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
-   (set_attr "type" "sseicvt")
+   (set_attr "type" "sseicvt2")
(set_attr "athlon_decode" "double,direct,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "double,direct,*")
@@ -8898,7 +8898,7 @@
cvtsi2sd{q}\t{%2, %0|%0, %2}
vcvtsi2sd{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
-   (set_attr "type" "sseicvt")
+   (set_attr "type" "sseicvt2")
(set_attr "athlon_decode" "double,direct,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "double,direct,*")
@@ -11808,6 +11808,8 @@
  (const_string "imov")
(eq_attr "alternative" "14")
  (const_string "fmov")
+   (eq_attr "alternative" "4,6")
+ (const_string "ssemov2")
   ]
   (const_string "ssemov")))
(set (attr "addr")


[gcc r14-10895] i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions.

2024-11-06 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:05fd99e3d5e9f00e4e23596ed15a3cec2aaba128

commit r14-10895-g05fd99e3d5e9f00e4e23596ed15a3cec2aaba128
Author: Hu, Lin1 
Date:   Tue Nov 5 15:49:57 2024 +0800

i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 instructions.

gcc/ChangeLog:

PR target/117304
* config/i386/i386-builtin.def: Add OPTION_MASK_ISA2_EVEX512 for 
some
AVX512 512-bits instructions.

gcc/testsuite/ChangeLog:

PR target/117304
* gcc.target/i386/pr117304-1.c: New test.

(cherry picked from commit 8ac694ae67e24a798dce368587bed4c40b90fbc0)

Diff:
---
 gcc/config/i386/i386-builtin.def   | 10 +-
 gcc/testsuite/gcc.target/i386/pr117304-1.c | 28 
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index fdd9dba6e542..ee34e0a14979 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3065,11 +3065,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", 
IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, 
"__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) 
V2DF_FTYPE_V2DF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, 
"__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, 
UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, 
"__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) 
V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, 
"__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) 
V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, 
CODE_FOR_fixuns_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, 
"__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) 
V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", 
IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", 
IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", 
IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_fixuns_truncv16sfv16si2_mask_round, 
"__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) 
V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, 
CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", 
IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", 
IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, 
"__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) 
V4SF_FTYPE_V4SF_UINT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, 
CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", 
IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT)
diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c 
b/gcc/testsuite/gcc.target/i386/pr117304-1.c
new file mode 100644
index ..da26f4bd1b78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c
@@ -0,0 +1,28 @@
+/* PR target/117304 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-evex512 -mavx512vl" } */
+
+typedef __attribute__((__vector_size__(32))) int __v8si;
+typedef __attribute__((__vector_size__(32))) unsigned int __v8su;
+typedef __attribute__((__vector_size__(64))) double __v8df;
+typedef __attribute__((__vector_size__(64))) int __v16si;
+typedef __attribute__((__vector_size__(64))) unsigned int __v16su;
+typedef __attribute__((__vector_size__(64))) float __v16sf;
+typedef float __m512 __attr

[gcc r14-10896] i386: Modify regexp of pr117304-1.c

2024-11-06 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:6a0e143a6449bcc250af13642263f671f756500b

commit r14-10896-g6a0e143a6449bcc250af13642263f671f756500b
Author: Hu, Lin1 
Date:   Thu Nov 7 10:13:15 2024 +0800

i386: Modify regexp of pr117304-1.c

Since the test doesn't care if the hint is correct,
modify the regexp of the hint part to avoid future
changes to the hint that would cause the test to fail.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr117304-1.c: Modify regexp.

(cherry picked from commit 4473cf8409f4db19ad91bd784e32dc54eccf02a3)

Diff:
---
 gcc/testsuite/gcc.target/i386/pr117304-1.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c 
b/gcc/testsuite/gcc.target/i386/pr117304-1.c
index da26f4bd1b78..4f00ff7c92a1 100644
--- a/gcc/testsuite/gcc.target/i386/pr117304-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c
@@ -20,9 +20,9 @@ volatile __v16su ui;
 void
 foo()
 {
-  hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you 
mean '__builtin_ia32_cvttpd2dq128_mask'?" } */
-  hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you 
mean '__builtin_ia32_cvttpd2udq128_mask'?" } */
-  ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you 
mean '__builtin_ia32_cvttps2dq128_mask'?" } */
-  ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you 
mean '__builtin_ia32_cvttps2udq128_mask'?" } */
-  __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit 
declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean 
'__builtin_ia32_cvtudq2ps128_mask'?" } */
+  hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit 
declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean 
'__builtin_ia32_\[^\n\r]*'?" } */
 }


[gcc r15-5006] i386: Modify regexp of pr117304-1.c

2024-11-06 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:4473cf8409f4db19ad91bd784e32dc54eccf02a3

commit r15-5006-g4473cf8409f4db19ad91bd784e32dc54eccf02a3
Author: Hu, Lin1 
Date:   Thu Nov 7 10:13:15 2024 +0800

i386: Modify regexp of pr117304-1.c

Since the test doesn't care if the hint is correct,
modify the regexp of the hint part to avoid future
changes to the hint that would cause the test to fail.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr117304-1.c: Modify regexp.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr117304-1.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c 
b/gcc/testsuite/gcc.target/i386/pr117304-1.c
index fc1c5bfd3e35..ec75f271447a 100644
--- a/gcc/testsuite/gcc.target/i386/pr117304-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c
@@ -20,9 +20,9 @@ volatile __v16su ui;
 void
 foo()
 {
-  hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you 
mean '__builtin_ia32_cvttpd2dq128_mask'?" } */
-  hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you 
mean '__builtin_ia32_cvttpd2udq128_mask'?" } */
-  ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you 
mean '__builtin_ia32_cvttps2dq128_mask'?" } */
-  ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you 
mean '__builtin_ia32_cvttps2udq128_mask'?" } */
-  __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit 
declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean 
'__builtin_ia32_cvtudq2ps128_mask'?" } */
+  hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error 
"implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you 
mean '__builtin_ia32_\[^\n\r]*'?" } */
+  __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit 
declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean 
'__builtin_ia32_\[^\n\r]*'?" } */
 }


[gcc r14-10937] i386: Zero extend 32-bit address to 64-bit with option -mx32 -maddress-mode=long. [PR 117418]

2024-11-17 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:8b4bb54e6c45411845ec559c49f594a6239c3969

commit r14-10937-g8b4bb54e6c45411845ec559c49f594a6239c3969
Author: Hu, Lin1 
Date:   Wed Nov 6 15:42:13 2024 +0800

i386: Zero extend 32-bit address to 64-bit with option -mx32 
-maddress-mode=long. [PR 117418]

-maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to
64-bit and uses a 64-bit register as a pointer for avoid raise an ICE.

gcc/ChangeLog:

PR target/117418
* config/i386/i386-expand.cc (ix86_expand_builtin): Convert
pointer's mode according to Pmode.

gcc/testsuite/ChangeLog:

PR target/117418
* gcc.target/i386/pr117418-1.c: New test.

(cherry picked from commit 2272cd2508f1854c880082f792de15e76ec09a99)

Diff:
---
 gcc/config/i386/i386-expand.cc | 12 
 gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 
 2 files changed, 36 insertions(+)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7019116fcac1..8e9dde145cfb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -13477,6 +13477,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   op1 = expand_normal (arg1);
   op2 = expand_normal (arg2);
 
+  if (GET_MODE (op1) != Pmode)
+   op1 = convert_to_mode (Pmode, op1, 1);
+
   if (!address_operand (op2, VOIDmode))
{
  op2 = convert_memory_address (Pmode, op2);
@@ -13512,6 +13515,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
   emit_label (ok_label);
   emit_insn (gen_rtx_SET (target, pat));
 
+  if (GET_MODE (op0) != Pmode)
+   op0 = convert_to_mode (Pmode, op0, 1);
+
   for (i = 0; i < 8; i++)
{
  op = gen_rtx_MEM (V2DImode,
@@ -13536,6 +13542,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op2) != Pmode)
+ op2 = convert_to_mode (Pmode, op2, 1);
+
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
 
@@ -13573,6 +13582,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx 
subtarget,
if (!REG_P (op0))
  op0 = copy_to_mode_reg (SImode, op0);
 
+   if (GET_MODE (op3) != Pmode)
+ op3 = convert_to_mode (Pmode, op3, 1);
+
/* Force to use xmm0, xmm1 for keylow, keyhi*/
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c 
b/gcc/testsuite/gcc.target/i386/pr117418-1.c
new file mode 100644
index ..4839b139b79a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c
@@ -0,0 +1,24 @@
+/* PR target/117418 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */
+/* { dg-require-effective-target maybe_x32  } */
+/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */
+/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey128" 1 } } */
+/* { dg-final { scan-assembler-times "encodekey256" 1 } } */
+
+typedef __attribute__((__vector_size__(16))) long long V;
+V a;
+
+void
+foo()
+{
+__builtin_ia32_aesdec128kl_u8 (&a, a, &a);
+__builtin_ia32_aesdec256kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc128kl_u8 (&a, a, &a);
+__builtin_ia32_aesenc256kl_u8 (&a, a, &a);
+__builtin_ia32_encodekey128_u32 (0, a, &a); 
+__builtin_ia32_encodekey256_u32 (0, a, a, &a); 
+}


[gcc r15-8876] i386: Fix AVX10.2 sat cvt intrinsic.

2025-03-24 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:90ab42f92b876b74056db297557e8c3d51cdd773

commit r15-8876-g90ab42f92b876b74056db297557e8c3d51cdd773
Author: Hu, Lin1 
Date:   Tue Mar 25 09:24:59 2025 +0800

i386: Fix AVX10.2 sat cvt intrinsic.

The patch aims to modify the missed fixed for vcvttph2iubs's testcase.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Modify testcase.

Diff:
---
 .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c   | 28 +-
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c
index d057c83831a0..1db5a891c216 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c
@@ -9,6 +9,7 @@
 #endif
 #include "avx10-helper.h"
 #include 
+#include 
 
 #define SIZE (AVX512F_LEN / 16)
 #include "avx512f-mask-type.h"
@@ -37,7 +38,7 @@ TEST (void)
   UNION_TYPE (AVX512F_LEN, h) s;
   UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
   MASK_TYPE mask = MASK_VALUE;
-  short res_ref[SIZE] = { 0 };
+  short res_ref[SIZE] = { 0 }, res_ref2[SIZE] = { 0 };
   int i, sign = 1;
 
   for (i = 0; i < SIZE; i++)
@@ -54,11 +55,7 @@ TEST (void)
   res3.x = INTRINSIC (_maskz_ipcvtts_ph_epu8) (mask, s.x);
 
   CALC (s.a, res_ref);
-
-#if AVX512F_LEN == 512
-  res1.x = INTRINSIC (_ipcvtts_roundph_epu8) (s.x, 8);
-  res2.x = INTRINSIC (_mask_ipcvtts_roundph_epu8) (res2.x, mask, s.x, 8);
-  res3.x = INTRINSIC (_maskz_ipcvtts_roundph_epu8) (mask, s.x, 8);
+  memcpy(res_ref2, res_ref, sizeof(res_ref));
 
   if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
 abort ();
@@ -70,5 +67,24 @@ TEST (void)
   MASK_ZERO (i_w) (res_ref, mask, SIZE);
   if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
 abort ();
+
+#if AVX512F_LEN == 512
+  for (i = 0; i < SIZE; i++)
+res2.a[i] = DEFAULT_VALUE;
+
+  res1.x = INTRINSIC (_ipcvtts_roundph_epu8) (s.x, 8);
+  res2.x = INTRINSIC (_mask_ipcvtts_roundph_epu8) (res2.x, mask, s.x, 8);
+  res3.x = INTRINSIC (_maskz_ipcvtts_roundph_epu8) (mask, s.x, 8);
+
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref2))
+abort ();
+
+  MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref2))
+abort ();
+
+  MASK_ZERO (i_w) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref2))
+abort ();
 #endif
 }


[gcc r15-8457] i386: Update Suffix for AVX10.2 SAT CVT Intrinsics

2025-03-19 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:82bbc9da2c7a24a38916158eaff767cc82a7b6bf

commit r15-8457-g82bbc9da2c7a24a38916158eaff767cc82a7b6bf
Author: Hu, Lin1 
Date:   Tue Mar 18 10:03:22 2025 +0800

i386: Update Suffix for AVX10.2 SAT CVT Intrinsics

The intrinsic names for *[i|u]bs instructions in AVX10.2 are missing the
required _ep[i|u]8 suffix.

This patch aims to fix the issue.

gcc/ChangeLog:

* config/i386/avx10_2-512satcvtintrin.h: Change *i[u]bs's type 
suffix
of intrin name.
* config/i386/avx10_2satcvtintrin.h: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-satcvt-1.c: Modify intrin name.
* gcc.target/i386/avx10_2-512-vcvtbf162ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbf162iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttbf162ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttbf162iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-satcvt-1.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.

Diff:
---
 gcc/config/i386/avx10_2-512satcvtintrin.h  | 152 ++---
 gcc/config/i386/avx10_2satcvtintrin.h  | 236 ++---
 .../gcc.target/i386/avx10_2-512-satcvt-1.c |  72 +++
 .../gcc.target/i386/avx10_2-512-vcvtbf162ibs-2.c   |   6 +-
 .../gcc.target/i386/avx10_2-512-vcvtbf162iubs-2.c  |   6 +-
 .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c |  12 +-
 .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c|  12 +-
 .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c |  12 +-
 .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c|  12 +-
 .../gcc.target/i386/avx10_2-512-vcvttbf162ibs-2.c  |   6 +-
 .../gcc.target/i386/avx10_2-512-vcvttbf162iubs-2.c |   6 +-
 .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c|  12 +-
 .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c   |  12 +-
 .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c|  12 +-
 .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c   |  12 +-
 gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c   | 144 ++---
 gcc/testsuite/gcc.target/i386/sse-14.c |  96 -
 gcc/testsuite/gcc.target/i386/sse-22.c |  96 -
 18 files changed, 458 insertions(+), 458 deletions(-)

diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h 
b/gcc/config/i386/avx10_2-512satcvtintrin.h
index 6e864a9a6f81..a08f98c92a0f 100644
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ b/gcc/config/i386/avx10_2-512satcvtintrin.h
@@ -36,7 +36,7 @@
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtbf16_epi16 (__m512bh __A)
+_mm512_ipcvtbf16_epi8 (__m512bh __A)
 {
   return
 (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
@@ -47,7 +47,7 @@ _mm512_ipcvtbf16_epi16 (__m512bh __A)
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtbf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A)
+_mm512_mask_ipcvtbf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
 {
   return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
   (__v32hi) __W,
@@ -56,7 +56,7 @@ _mm512_mask_ipcvtbf16_epi16 (__m512i __W, __mmask32 __U, 
__m512bh __A)
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtbf16_epi16 (__mmask32 __U, __m512bh __A)
+_mm512_maskz_ipcvtbf16_epi8 (__mmask32 __U, __m512bh __A)
 {
   return
 (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
@@ -67,7 +67,7 @@ _mm512_maskz_ipcvtbf16_epi16 (__mmask32 __U, __m512bh __A)
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtbf16_epu16 (__m512bh __A)
+_mm512_ipcvtbf16_epu8 (__m512bh __A)
 {
   return
 (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
@@ -78,7 +78,7 @@ _mm512_ipcvtbf16_epu16 (__m512bh __A)
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtbf16_epu16 (__m512i __W, __mmask32 __U, __m512bh __A)
+_mm512_mask_ipcvtbf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
 {
   return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
(__v32hi) __W,
@@ -87,7 +87,7 @@ _mm51

[gcc r15-8463] i386: Fix AVX10.2 SAT CVT testcases.

2025-03-20 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:8d236c53c679ca920092ce9200785fcccd97d971

commit r15-8463-g8d236c53c679ca920092ce9200785fcccd97d971
Author: Hu, Lin1 
Date:   Thu Mar 20 11:55:49 2025 +0800

i386: Fix AVX10.2 SAT CVT testcases.

Init res_ref2 for rounding control intrinsics.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Fix testcase.
* gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto.

Diff:
---
 .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c  | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c  | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c| 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c| 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c| 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c | 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c| 17 +++--
 .../gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c| 17 +++--
 15 files changed, 165 insertions(+), 90 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c
index 0c860b02046f..523b3f0a4cb6 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c
@@ -9,6 +9,7 @@
 #endif
 #include "avx10-helper.h"
 #include 
+#include 
 
 #define SIZE (AVX512F_LEN / 16)
 #include "avx512f-mask-type.h"
@@ -37,7 +38,7 @@ TEST (void)
   UNION_TYPE (AVX512F_LEN, h) s;
   UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
   MASK_TYPE mask = MASK_VALUE;
-  short res_ref[SIZE] = { 0 };
+  short res_ref[SIZE] = { 0 }, res_ref2[SIZE] = { 0 };
   int i, sign = 1;
 
   for (i = 0; i < SIZE; i++)
@@ -54,6 +55,7 @@ TEST (void)
   res3.x = INTRINSIC (_maskz_ipcvts_ph_epi8) (mask, s.x);
 
   CALC (s.a, res_ref);
+  memcpy(res_ref2, res_ref, sizeof(res_ref));
 
   if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
 abort ();
@@ -67,19 +69,22 @@ TEST (void)
 abort ();
 
 #if AVX512F_LEN != 128
+  for (i = 0; i < SIZE; i++)
+res2.a[i] = DEFAULT_VALUE;
+
   res1.x = INTRINSIC (_ipcvts_roundph_epi8) (s.x, 8);
   res2.x = INTRINSIC (_mask_ipcvts_roundph_epi8) (res2.x, mask, s.x, 8);
   res3.x = INTRINSIC (_maskz_ipcvts_roundph_epi8) (mask, s.x, 8);
 
-  if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref2))
 abort ();
 
-  MASK_MERGE (i_w) (res_ref, mask, SIZE);
-  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+  MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref2))
 abort ();
 
-  MASK_ZERO (i_w) (res_ref, mask, SIZE);
-  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+  MASK_ZERO (i_w) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref2))
 abort ();
 #endif
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c
index 75e4e1141be8..a8f6e57d46ab 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c
@@ -9,6 +9,7 @@
 #endif
 #include "avx10-helper.h"
 #include 
+#include 
 
 #define SIZE (AVX512F_LEN / 16)
 #include "avx512f-mask-type.h"
@@ -37,7 +38,7 @@ TEST (void)
   UNION_TYPE (AVX512F_LEN, h) s;
   UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
   MA

[gcc r15-9117] i386: Add attr_isa for vaes patterns to sync with attr gpr16. [pr119473]

2025-04-04 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:e5cfa7f797b79613e5483786484567b9ca72db06

commit r15-9117-ge5cfa7f797b79613e5483786484567b9ca72db06
Author: Hu, Lin1 
Date:   Wed Mar 26 16:15:52 2025 +0800

i386: Add attr_isa for vaes patterns to sync with attr gpr16. [pr119473]

For vaes patterns with jm constraint and gpr16 attr, it requires "isa"
attr to distinct avx/avx512 alternatives in ix86_memory_address_reg_class.
Also adds missing type and mode attributes for those vaes patterns.

gcc/ChangeLog:

PR target/119473
* config/i386/sse.md
(vaesdec_): Set attr "isa" as "avx,vaes_avx512vl", "type" as
"sselog1", "mode" as "TI".
(vaesdeclast_): Ditto.
(vaesenc_): Ditto.
(vaesenclast_): Ditto.

gcc/testsuite/ChangeLog:

PR target/119473
* gcc.target/i386/pr119473.c: New test.

Co-authored-by: Hongyu Wang 

Diff:
---
 gcc/config/i386/sse.md   | 20 
 gcc/testsuite/gcc.target/i386/pr119473.c | 26 ++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92dc93cb6532..b280676eee6b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30849,7 +30849,10 @@
   else
 return "vaesdec\t{%2, %1, %0|%0, %1, %2}";
 }
-[(set_attr "addr" "gpr16,*")])
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vaesdeclast_"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30864,7 +30867,10 @@
   else
 return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
 }
-[(set_attr "addr" "gpr16,*")])
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vaesenc_"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30879,7 +30885,10 @@
   else
 return "vaesenc\t{%2, %1, %0|%0, %1, %2}";
 }
-[(set_attr "addr" "gpr16,*")])
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vaesenclast_"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30894,7 +30903,10 @@
   else
 return "vaesenclast\t{%2, %1, %0|%0, %1, %2}";
 }
-[(set_attr "addr" "gpr16,*")])
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vpclmulqdq_"
   [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
diff --git a/gcc/testsuite/gcc.target/i386/pr119473.c 
b/gcc/testsuite/gcc.target/i386/pr119473.c
new file mode 100644
index ..574c9217ac9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119473.c
@@ -0,0 +1,26 @@
+/* PR target/119473  */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mapxf -m64 -mvaes" } */
+
+typedef char __v32qi __attribute__ ((__vector_size__(32)));
+typedef long long __m256i __attribute__((__vector_size__(32), 
__aligned__(32)));
+
+typedef union
+{
+  __v32qi qi[8];
+} tmp_u;
+
+
+void foo ()
+{
+  register tmp_u *tdst __asm__("%rdx");
+  register tmp_u *src1 __asm__("%rcx");
+  register tmp_u *src2 __asm__("%r26");
+
+  tdst->qi[0] = __builtin_ia32_vaesdec_v32qi(src1->qi[0], src2->qi[0]);
+  tdst->qi[0] = __builtin_ia32_vaesdeclast_v32qi(src1->qi[0], src2->qi[0]);
+  tdst->qi[0] = __builtin_ia32_vaesenc_v32qi(src1->qi[0], src2->qi[0]);
+  tdst->qi[0] = __builtin_ia32_vaesenclast_v32qi(src1->qi[0], src2->qi[0]);
+}
+
+/* { dg-final { scan-assembler-not "\\\(%r26\\\), " } } */


[gcc r15-8458] i386: Add AVX10.2 SAT CVT Intrinsics without Rounding Control

2025-04-05 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:e35327242317282a4ff5e2d933719828a0285e81

commit r15-8458-ge35327242317282a4ff5e2d933719828a0285e81
Author: Hu, Lin1 
Date:   Thu Mar 13 16:36:15 2025 +0800

i386: Add AVX10.2 SAT CVT Intrinsics without Rounding Control

gcc/ChangeLog:

* config/i386/avx10_2-512satcvtintrin.h: Add new intrinsics.
* config/i386/avx10_2satcvtintrin.h: Ditto.
* config/i386/i386-builtin-types.def:
Add DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI),
(V16SI, V16SF, V16SI, UHI), (V8DI, V8SF, V8DI, UQI),
(V8DI, V8DF, V8DI, UQI), (V8SI, V8DF, V8SI, UQI).
* config/i386/i386-builtin.def: Add new builtins.
* config/i386/i386-expand.cc: Handle V16SI_FTYPE_V16SF_V16SI_UHI,
V32HI_FTYPE_V32HF_V32HI_USI, V8DI_FTYPE_V8SF_V8DI_UQI,
V8DI_FTYPE_V8DF_V8DI_UQI, V8SI_FTYPE_V8DF_V8SI_UQI.

Diff:
---
 gcc/config/i386/avx10_2-512satcvtintrin.h | 496 ++
 gcc/config/i386/avx10_2satcvtintrin.h | 560 ++
 gcc/config/i386/i386-builtin-types.def|   5 +
 gcc/config/i386/i386-builtin.def  |  32 ++
 gcc/config/i386/i386-expand.cc|   5 +
 5 files changed, 1098 insertions(+)

diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h 
b/gcc/config/i386/avx10_2-512satcvtintrin.h
index a08f98c92a0f..1cef1dae0b12 100644
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ b/gcc/config/i386/avx10_2-512satcvtintrin.h
@@ -157,6 +157,502 @@ _mm512_maskz_ipcvttbf16_epu8 (__mmask32 __U, __m512bh __A)
  (__mmask32) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtph_epi8 (__m512h __A)
+{
+  return
+(__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+   (__v32hi)
+   _mm512_undefined_si512 (),
+   (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+(__v32hi) __W,
+(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtph_epi8 (__mmask32 __U, __m512h __A)
+{
+  return
+(__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+   (__v32hi)
+   _mm512_setzero_si512 (),
+   (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtph_epu8 (__m512h __A)
+{
+  return
+(__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+(__v32hi)
+_mm512_undefined_si512 (),
+(__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtph_epu8 (__mmask32 __U, __m512h __A)
+{
+  return
+(__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+(__v32hi)
+_mm512_setzero_si512 (),
+(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtps_epi8 (__m512 __A)
+{
+  return
+(__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+   (__v16si)
+   _mm512_undefined_si512 (),
+   (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+(__v16si) __W,
+(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtps_

[gcc r15-8915] i386: Add "s_" as Saturation for AVX10.2 Converting Intrinsics.

2025-03-25 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:330df57938fe609a49c5cb047be443475cb9a3c3

commit r15-8915-g330df57938fe609a49c5cb047be443475cb9a3c3
Author: Hu, Lin1 
Date:   Fri Mar 21 10:43:10 2025 +0800

i386: Add "s_" as Saturation for AVX10.2 Converting Intrinsics.

This patch aims to add "s_" after 'cvt' represent saturation.

gcc/ChangeLog:

* config/i386/avx10_2-512convertintrin.h (_mm512_mask_cvtx2ps_ph): 
Formatting fixes
(_mm512_mask_cvtx_round2ps_ph): Ditto
(_mm512_maskz_cvtx_round2ps_ph): Ditto
(_mm512_cvtbiassph_bf8): Rename to _mm512_cvts_biasph_bf8.
(_mm512_mask_cvtbiassph_bf8): Rename to _mm512_mask_cvts_biasph_bf8.
(_mm512_maskz_cvtbiassph_bf8): Rename to 
_mm512_maskz_cvts_biasph_bf8.
(_mm512_cvtbiassph_hf8): Rename to _mm512_cvts_biasph_hf8.
(_mm512_mask_cvtbiassph_hf8): Rename to _mm512_mask_cvts_biasph_hf8.
(_mm512_maskz_cvtbiassph_hf8): Rename to 
_mm512_maskz_cvts_biasph_hf8.
(_mm512_cvts2ph_bf8): Rename to _mm512_cvts_2ph_bf8.
(_mm512_mask_cvts2ph_bf8): Rename to _mm512_mask_cvts_2ph_bf8.
(_mm512_maskz_cvts2ph_bf8): Rename to _mm512_maskz_cvts_2ph_bf8.
(_mm512_cvts2ph_hf8): Rename to _mm512_cvts_2ph_hf8.
(_mm512_mask_cvts2ph_hf8): Rename to _mm512_mask_cvts_2ph_hf8.
(_mm512_maskz_cvts2ph_hf8): Rename to _mm512_maskz_cvts_2ph_hf8.
(_mm512_cvtsph_bf8): Rename to _mm512_cvts_ph_bf8.
(_mm512_mask_cvtsph_bf8): Rename to _mm512_mask_cvts_ph_bf8.
(_mm512_maskz_cvtsph_bf8): Rename to _mm512_maskz_cvts_ph_bf8.
(_mm512_cvtsph_hf8): Rename to _mm512_cvts_ph_hf8.
(_mm512_mask_cvtsph_hf8): Rename to _mm512_mask_cvts_ph_hf8.
(_mm512_maskz_cvtsph_hf8): Rename to _mm512_maskz_cvts_ph_hf8.
* config/i386/avx10_2convertintrin.h
(_mm_cvtbiassph_bf8): Rename to _mm_cvts_biasph_bf8.
(_mm_mask_cvtbiassph_bf8): Rename to _mm_mask_cvts_biasph_bf8.
(_mm_maskz_cvtbiassph_bf8): Rename to _mm_maskz_cvts_biasph_bf8.
(_mm256_cvtbiassph_bf8): Rename to _mm256_cvts_biasph_bf8.
(_mm256_mask_cvtbiassph_bf8): Rename to _mm256_mask_cvts_biasph_bf8.
(_mm256_maskz_cvtbiassph_bf8): Rename to 
_mm256_maskz_cvts_biasph_bf8.
(_mm_cvtbiassph_hf8): Rename to _mm_cvts_biasph_hf8.
(_mm_mask_cvtbiassph_hf8): Rename to _mm_mask_cvts_biasph_hf8.
(_mm_maskz_cvtbiassph_hf8): Rename to _mm_maskz_cvts_biasph_hf8.
(_mm256_cvtbiassph_hf8): Rename to _mm256_cvts_biasph_hf8.
(_mm256_mask_cvtbiassph_hf8): Rename to _mm256_mask_cvts_biasph_hf8.
(_mm256_maskz_cvtbiassph_hf8): Rename to 
_mm256_maskz_cvts_biasph_hf8.
(_mm_cvts2ph_bf8): Rename to _mm_cvts_2ph_bf8.
(_mm_mask_cvts2ph_bf8): Rename to _mm_mask_cvts_2ph_bf8.
(_mm_maskz_cvts2ph_bf8): Rename to _mm_maskz_cvts_2ph_bf8.
(_mm256_cvts2ph_bf8): Rename to _mm256_cvts_2ph_bf8.
(_mm256_mask_cvts2ph_bf8): Rename to _mm256_mask_cvts_2ph_bf8.
(_mm256_maskz_cvts2ph_bf8): Rename to _mm256_maskz_cvts_2ph_bf8.
(_mm_cvts2ph_hf8): Rename to _mm_cvts_2ph_hf8.
(_mm_mask_cvts2ph_hf8): Rename to _mm_mask_cvts_2ph_hf8.
(_mm_maskz_cvts2ph_hf8): Rename to _mm_maskz_cvts_2ph_hf8.
(_mm256_cvts2ph_hf8): Rename to _mm256_cvts_2ph_hf8.
(_mm256_mask_cvts2ph_hf8): Rename to _mm256_mask_cvts_2ph_hf8.
(_mm256_maskz_cvts2ph_hf8): Rename to _mm256_maskz_cvts_2ph_hf8.
(_mm_cvtsph_bf8): Rename to _mm_cvts_ph_bf8.
(_mm_mask_cvtsph_bf8): Rename to _mm_mask_cvts_ph_bf8.
(_mm_maskz_cvtsph_bf8): Rename to _mm_maskz_cvts_ph_bf8.
(_mm256_cvtsph_bf8): Rename to _mm256_cvts_ph_bf8.
(_mm256_mask_cvtsph_bf8): Rename to _mm256_mask_cvts_ph_bf8.
(_mm256_maskz_cvtsph_bf8): Rename to _mm256_maskz_cvts_ph_bf8.
(_mm_cvtsph_hf8): Rename to _mm_cvts_ph_hf8.
(_mm_mask_cvtsph_hf8): Rename to _mm_mask_cvts_ph_hf8.
(_mm_maskz_cvtsph_hf8): Rename to _mm_maskz_cvts_ph_hf8.
(_mm256_cvtsph_hf8): Rename to _mm256_cvts_ph_hf8.
(_mm256_mask_cvtsph_hf8): Rename to _mm256_mask_cvts_ph_hf8.
(_mm256_maskz_cvtsph_hf8): Rename to _mm256_maskz_cvts_ph_hf8.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-convert-1.c: Modify function name
to follow the latest version.
* gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c: Ditto.
* gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c: Ditto.
* gcc.target/i386/avx

[gcc r15-8964] i386: Set attr "addr" as "gpr16" for constraint "jm". [PR 119425]

2025-03-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:271745bafafbf3316d01ceb6430d67b894129a4c

commit r15-8964-g271745bafafbf3316d01ceb6430d67b894129a4c
Author: Hu, Lin1 
Date:   Mon Mar 24 15:36:13 2025 +0800

i386: Set attr "addr" as "gpr16" for constraint "jm". [PR 119425]

"jm" should with "gpr16", otherwise maybe raise ICE in reload pass.

gcc/ChangeLog:

PR target/119425
* config/i386/sse.md:
(vec_set_0): Set the alternative with constraint "jm"'s
attribute "addr" to "gpr16".
(avx512dq_shuf_64x2_1):
Ditto.
(avx512vl_shuf_32x4_1): Ditto.
(avx2_pblendd): Ditto.
(aesenc): Ditto.
(aesenclast): Ditto.
(aesdec): Ditto.
(aesdeclast): Ditto.
(vaesdec_): Ditto.
(vaesdeclast_): Ditto.
(vaesenc_):: Ditto.
(vaesenclast_):: Ditto.
(aesu8): Ditto.
(*aesu8): Ditto.

gcc/testsuite/ChangeLog:

PR target/119425
* gcc.target/i386/pr119425.c: New test.

Co-authered-by: Hongyu Wang 

Diff:
---
 gcc/config/i386/sse.md   | 31 --
 gcc/testsuite/gcc.target/i386/pr119425.c | 37 
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee2a482fb509..ed5ac1abe80d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11904,7 +11904,7 @@
   ]
   (const_string "ssemov")))
(set (attr "addr")
- (if_then_else (eq_attr "alternative" "8,9")
+ (if_then_else (eq_attr "alternative" "9,10")
   (const_string "gpr16")
   (const_string "*")))
(set (attr "prefix_extra")
@@ -20173,6 +20173,7 @@
   return "vshuf64x2\t{%3, %2, %1, 
%0|%0, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
+   (set_attr "addr" "gpr16,*")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -20334,6 +20335,7 @@
   return "vshuf32x4\t{%3, %2, %1, 
%0|%0, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
+   (set_attr "addr" "gpr16,*")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "")])
@@ -24076,6 +24078,7 @@
   "TARGET_AVX2"
   "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemov")
+   (set_attr "addr" "gpr16")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
@@ -27085,7 +27088,7 @@
vaesenc\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
(set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,maybe_evex,evex")
(set_attr "btver2_decode" "double,double,double")
@@ -27103,7 +27106,7 @@
vaesenclast\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
(set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,maybe_evex,evex")
(set_attr "btver2_decode" "double,double,double")
@@ -27121,7 +27124,7 @@
vaesdec\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
(set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,maybe_evex,evex")
(set_attr "btver2_decode" "double,double,double") 
@@ -27138,7 +27141,7 @@
* return TARGET_AES ? \"vaesdeclast\t{%2, %1, %0|%0, %1, %2}\" : \"%{evex%} 
vaesdeclast\t{%2, %1, %0|%0, %1, %2}\";
vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,maybe_evex,evex")
@@ -30841,7 +30844,8 @@
 return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}";
   else
 return "vaesdec\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "addr" "gpr16,*")])
 
 (define_insn "vaesdeclast_"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30855,7 +30859,8 @@
 return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
   else
 return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "addr" "gpr16,*")])
 
 (define_insn "vaesenc_"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30869,7 +30874,8 @@
 return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}";
   else
 return "vaesenc\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "addr" "gpr16,*")])
 
 (define_insn "vaesenclast_"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30883,7 +30889,8 @@
 return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}";
   else
 return "vaesenclast\t{%2, %1, %0|%0

[gcc r15-8459] i386: Fix AVX10.2 SAT CVT testcases.

2025-03-27 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:2e7a92a68aab3aaee8872c1a59e1391e07517b05

commit r15-8459-g2e7a92a68aab3aaee8872c1a59e1391e07517b05
Author: Hu, Lin1 
Date:   Fri Mar 14 11:16:14 2025 +0800

i386: Fix AVX10.2 SAT CVT testcases.

Add missing testcases.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-512-satcvt-1.c: Add testcase.
* gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto
* gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto
* gcc.target/i386/avx10_2-satcvt-1.c: Ditto
* gcc.target/i386/avx10_2-vcvttsd2sis-2.c: Ditto
* gcc.target/i386/avx10_2-vcvttsd2usis-2.c: Ditto
* gcc.target/i386/avx10_2-vcvttss2sis-2.c: Ditto
* gcc.target/i386/avx10_2-vcvttss2usis-2.c: Ditto

Diff:
---
 .../gcc.target/i386/avx10_2-512-satcvt-1.c | 104 --
 .../gcc.target/i386/avx10_2-512-vcvtph2ibs-2.c |  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvtph2iubs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvtps2ibs-2.c |  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvtps2iubs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c   |  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c   |  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttph2ibs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttph2iubs-2.c   |  10 +-
 .../gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttps2ibs-2.c|  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttps2iubs-2.c   |  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c|  22 ++--
 .../gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c   |  21 +++-
 .../gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c   |  21 +++-
 gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c   | 120 +++--
 .../gcc.target/i386/avx10_2-vcvtps2iubs-2.c|  16 +++
 .../gcc.target/i386/avx10_2-vcvttsd2sis-2.c|  24 +
 .../gcc.target/i386/avx10_2-vcvttsd2usis-2.c   |  24 +
 .../gcc.target/i386/avx10_2-vcvttss2sis-2.c|  24 +
 .../gcc.target/i386/avx10_2-vcvttss2usis-2.c   |  24 +
 23 files changed, 565 insertions(+), 97 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c 
b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c
index 7f2f7caf4dbf..bf10ac2769c0 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c
@@ -1,27 +1,43 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */
-/* { dg-final { scan-assembler-times "vcvtph2ibs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1  }  } */
+/* { dg-final { scan-assembler-times "vcvtph2ibs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2  }  } */
+/* { dg-final { scan-assembler-times "vcvtph2ibs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[
 \\t\]+#)" 1  }  } */
+/* { dg-final { scan-assembler-times "vcvtph2ibs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[
 \\t\]+#)" 1  }  } */
 /* { dg-final { scan-assembler-times "vcvtph2ibs\[ 
\\t\]+\{rn-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[
 \\t\]+#)" 1  }  } */
 /* { dg-final { scan-assembler-times "vcvtph2ibs\[ 
\\t\]+\{rz-sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[
 \\t\]+#)" 1  }  } */
-/* { dg-final { scan-assembler-times "vcvtph2iubs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1  }  } */
+/* { dg-final { scan-assembler-times "vcvtph2iubs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2  }  } */
+/* { dg-final { scan-assembler-times "vcvtph2iubs\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[
 \\t\]+#)" 1  }  } */
+/* { dg-final { scan-

[gcc r16-1125] i386: Fix vmovvdup's mem attribute

2025-06-04 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:24cbcc49277a0ac40fc2d82831f6db5e8d6d890d

commit r16-1125-g24cbcc49277a0ac40fc2d82831f6db5e8d6d890d
Author: Hu, Lin1 
Date:   Tue May 27 19:09:04 2025 +0800

i386: Fix vmovvdup's mem attribute

Some vmovvdup pattern's type attribute is sselog1 and then mem attribute is
both. Modify type attribute according to other patterns about vmovvdup.

gcc/ChangeLog:

* config/i386/sse.md
(avx512f_movddup512): Change sselog1 to ssemov.
(avx_movddup256): Ditto.
(*vec_dupv2di): Change alternative 4's type attribute from sselog1
to ssemov.

Diff:
---
 gcc/config/i386/sse.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index aea5e2cad7e1..c40b0fd49978 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -13418,7 +13418,7 @@
 (const_int 6) (const_int 14)])))]
   "TARGET_AVX512F"
   "vmovddup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sselog1")
+  [(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
 
@@ -13449,7 +13449,7 @@
 (const_int 2) (const_int 6)])))]
   "TARGET_AVX && "
   "vmovddup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sselog1")
+  [(set_attr "type" "ssemov")
(set_attr "prefix" "")
(set_attr "mode" "V4DF")])
 
@@ -27839,7 +27839,7 @@
%vmovddup\t{%1, %0|%0, %1}
movlhps\t%0, %0"
   [(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx")
-   (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov")
+   (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov")
(set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig")
(set (attr "mode")
(cond [(and (eq_attr "alternative" "2")


[gcc r16-1093] i386: Add more forms peephole2 for adc/sbb

2025-06-03 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:31b887bcc898787a228672d417ec0b33a15b2fb2

commit r16-1093-g31b887bcc898787a228672d417ec0b33a15b2fb2
Author: Hu, Lin1 
Date:   Wed Feb 19 15:51:40 2025 +0800

i386: Add more forms peephole2 for adc/sbb

Enable -mapxf will change some patterns about adc/sbb.

Hence gcc will raise an extra mov like
 movq8(%rdi), %rax
 adcq%rax, 8(%rsi), %rax
 movq%rax, 8(%rdi)
rather than
 movq8(%rsi), %rax
 adcq%rax, 8(%rdi)

The patch add more kinds of peephole2 to eliminate the extra mov.

gcc/ChangeLog:

* config/i386/i386.md: Add 4 new peephole2 by swap the original
peephole2's operands' order to support new pattern.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr79173-13.c: New test.
* gcc.target/i386/pr79173-14.c: Ditto.
* gcc.target/i386/pr79173-15.c: Ditto.
* gcc.target/i386/pr79173-16.c: Ditto.
* gcc.target/i386/pr79173-17.c: Ditto.
* gcc.target/i386/pr79173-18.c: Ditto.

Diff:
---
 gcc/config/i386/i386.md| 186 +
 gcc/testsuite/gcc.target/i386/pr79173-13.c |  59 +
 gcc/testsuite/gcc.target/i386/pr79173-14.c |  59 +
 gcc/testsuite/gcc.target/i386/pr79173-15.c |  61 ++
 gcc/testsuite/gcc.target/i386/pr79173-16.c |  61 ++
 gcc/testsuite/gcc.target/i386/pr79173-17.c |  32 +
 gcc/testsuite/gcc.target/i386/pr79173-18.c |  33 +
 7 files changed, 491 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b7a18d583da3..4c9cb81d5f9d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8719,6 +8719,34 @@
  (set (match_dup 1)
   (minus:SWI (match_dup 1) (match_dup 0)))])])
 
+;; Under APX NDD, 'sub reg, mem, reg' is valid.
+;; New format for
+;; mov reg0, mem1
+;; sub reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sub mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI 0 "general_reg_operand")
+   (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (reg:CC FLAGS_REG)
+  (compare:CC (match_operand:SWI 2 "memory_operand")
+  (match_dup 0)))
+ (set (match_dup 0)
+  (minus:SWI (match_dup 2) (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CC FLAGS_REG)
+  (compare:CC (match_dup 2) (match_dup 0)))
+ (set (match_dup 2)
+  (minus:SWI (match_dup 2) (match_dup 0)))])])
+
 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
 ;; subl $1, %eax; jnc .Lxx;
 (define_peephole2
@@ -9166,6 +9194,118 @@
   (match_dup 1))
   (match_dup 0)))])])
 
+;; Under APX NDD, 'adc reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem1, reg0
+;; to
+;; mov reg0, mem2
+;; adc mem1, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+   (match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+  (compare:CCC
+(zero_extend:
+  (plus:SWI48
+(plus:SWI48
+  (match_operator:SWI48 5 "ix86_carry_flag_operator"
+[(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+  (match_operand:SWI48 2 "memory_operand"))
+(match_dup 0)))
+(plus:
+  (match_operator: 4 "ix86_carry_flag_operator"
+[(match_dup 3) (const_int 0)])
+  (zero_extend: (match_dup 0)
+ (set (match_dup 0)
+  (plus:SWI48 (plus:SWI48 (match_op_dup 5
+[(match_dup 3) (const_int 0)])
+  (match_dup 2))
+  (match_dup 0)))])
+   (set (match_dup 1) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel [(set (reg:CCC FLAGS_REG)
+  (compare:CCC
+(zero_extend:
+  (plus:SWI48
+(plus:SWI48
+  

[gcc r16-1094] i386: Add more peephole2 for APX NDD

2025-06-03 Thread Hu via Gcc-cvs
https://gcc.gnu.org/g:102b21f9ce7d7a30cdee7c729a152e95c96107ac

commit r16-1094-g102b21f9ce7d7a30cdee7c729a152e95c96107ac
Author: Hu, Lin1 
Date:   Mon Mar 10 16:52:22 2025 +0800

i386: Add more peephole2 for APX NDD

The patch aims to optimize
 movb(%rdi), %al
 movq%rdi, %rbx
 xorl%esi, %eax, %edx
 movb%dl, (%rdi)
 cmpb%sil, %al
 jne
to
 xorb%sil, (%rdi)
 movq%rdi, %rbx
 jne

Reduce 2 mov and 1 cmp instructions.

Due to APX NDD allowing the dest register and source register to be 
different,
some original peephole2 are invalid. Add new peephole2 patterns for APX NDD.

gcc/ChangeLog:

* config/i386/i386.md (define_peephole2): Define some new peephole2 
for
APX NDD.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr49095-2.c: New test.

Diff:
---
 gcc/config/i386/i386.md   | 135 ++
 gcc/testsuite/gcc.target/i386/pr49095-2.c |  73 
 2 files changed, 208 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c9cb81d5f9d..40b43cf092ac 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -28398,6 +28398,41 @@
   const0_rtx);
 })
 
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG)
+  (compare (match_operator:SWI 2 "plusminuslogic_operator"
+ [(match_operand:SWI 0 "memory_operand")
+  (match_operand:SWI 1 "")])
+   (const_int 0)))
+ (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+   (set (match_dup 0) (match_dup 3))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (2, operands[3])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && ix86_match_ccmode (peep2_next_insn (0),
+(GET_CODE (operands[2]) == PLUS
+ || GET_CODE (operands[2]) == MINUS)
+? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 6))
+ (set (match_dup 0) (match_dup 5))])]
+{
+  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+  operands[5]
+= gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ copy_rtx (operands[0]), operands[1]);
+  operands[6]
+= gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+  const0_rtx);
+})
+
 ;; Likewise for instances where we have a lea pattern.
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
@@ -28491,6 +28526,54 @@
   const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+   (match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+  (xor:SWI (match_operand:SWI 3 "register_operand")
+   (match_operand:SWI 2 "")))
+ (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+   (compare:CCZ (match_operand:SWI 5 "register_operand")
+(match_operand:SWI 6 "")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && (rtx_equal_p (operands[0], operands[5])
+   ? rtx_equal_p (operands[2], operands[6])
+   : rtx_equal_p (operands[2], operands[5])
+&& rtx_equal_p (operands[0], operands[6]))
+   && peep2_reg_dead_p (3, operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (mode != QImode
+   || immediate_operand (operands[2], QImode)
+   || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+ (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[8] = gen_rtx_XOR (mode, copy_rtx (operands[1]),
+operands[2]);
+  operands[9]
+= gen_rtx_COMPARE (GET_MODE (operands[7]),
+  copy_rtx (operands[8]),
+  const0_rtx);
+})
+
 (define_peephole2
   [(set (match_operand:SWI12 0 "register_operand")
(match_operand:SWI12 1 "memory_operand"))
@@ -28734,6 +28817,58 @@