https://gcc.gnu.org/g:919f073ae5f45e9cc328be8a914cd80b3a0bc12d
commit r16-1620-g919f073ae5f45e9cc328be8a914cd80b3a0bc12d Author: H.J. Lu <hjl.to...@gmail.com> Date: Fri Jun 20 16:07:18 2025 +0800 x86: Don't use vmovdqu16/vmovdqu8 with non-EVEX registers Don't use vmovdqu16/vmovdqu8 with non-EVEX register operands just because AVX512BW is available. gcc/ PR target/120728 * config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8 only with EVEX register operands. gcc/testsuite/ PR target/120728 * gcc.target/i386/avx512bw-vmovdqu16-1.c: Scan vmovdqu for non-EVEX register operands. * gcc.target/i386/avx512bw-vmovdqu8-1.c: Likewise. * gcc.target/i386/avx512fp16-13.c: Likewise. * gcc.target/i386/pr100865-10b.c: Likewise. * gcc.target/i386/pr100865-3.c: Likewise. * gcc.target/i386/pr100865-4b.c: Likewise. * gcc.target/i386/pr100865-5b.c: Likewise. * gcc.target/i386/pr90773-15.c: Likewise. * gcc.target/i386/pr90773-16.c: Likewise. * gcc.target/i386/pr90773-17.c: Likewise. * gcc.target/i386/pr95483-5.c: Likewise. * gcc.target/i386/pr120728.c: New test. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> Diff: --- gcc/config/i386/i386.cc | 6 ++--- .../gcc.target/i386/avx512bw-vmovdqu16-1.c | 8 +++---- .../gcc.target/i386/avx512bw-vmovdqu8-1.c | 4 ++-- gcc/testsuite/gcc.target/i386/avx512fp16-13.c | 10 ++++---- gcc/testsuite/gcc.target/i386/pr100865-10b.c | 2 +- gcc/testsuite/gcc.target/i386/pr100865-3.c | 2 +- gcc/testsuite/gcc.target/i386/pr100865-4b.c | 2 +- gcc/testsuite/gcc.target/i386/pr100865-5b.c | 2 +- gcc/testsuite/gcc.target/i386/pr120728.c | 27 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr90773-15.c | 2 +- gcc/testsuite/gcc.target/i386/pr90773-16.c | 2 +- gcc/testsuite/gcc.target/i386/pr90773-17.c | 2 +- gcc/testsuite/gcc.target/i386/pr95483-5.c | 4 ++-- 13 files changed, 50 insertions(+), 23 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 77853297a2fa..fc3105919f45 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -5703,7 +5703,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); @@ -5745,7 +5745,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu8" : "%vmovdqu") : "%vmovdqa"); @@ -5765,7 +5765,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c index 8603a1909c79..ee8e5cfc81ad 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu16|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu16|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c index d1e33926c81f..4c4cddb17f1c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c @@ -16,9 +16,9 @@ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c index 2416c67de53a..92ac197e1066 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c @@ -71,7 +71,7 @@ load256u_ph (void const *p) return _mm256_loadu_ph (p); } -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */ __m128h __attribute__ ((noinline, noclone)) @@ -80,7 +80,7 @@ load128u_ph (void const *p) return _mm_loadu_ph (p); } -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */ void __attribute__ ((noinline, noclone)) @@ -89,7 +89,7 @@ store512u_ph (void *p, __m512h a) return _mm512_storeu_ph (p, a); } -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], *\[^,\]*" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], *\[^,\]*" 1 } } */ void __attribute__ ((noinline, noclone)) @@ -98,7 +98,7 @@ store256u_ph (void *p, __m256h a) return _mm256_storeu_ph (p, a); } -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], *\[^,\]*" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], *\[^,\]*" 1 } } */ void __attribute__ ((noinline, noclone)) @@ -107,7 +107,7 @@ storeu_ph (void *p, __m128h a) return _mm_storeu_ph (p, a); } -/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], *\[^,\]*" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], *\[^,\]*" 1 } } */ __m512h __attribute__ ((noinline, noclone)) diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c b/gcc/testsuite/gcc.target/i386/pr100865-10b.c index f60d1bfd2c78..17847ac97272 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c @@ -4,4 +4,4 @@ #include "pr100865-10a.c" /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-3.c b/gcc/testsuite/gcc.target/i386/pr100865-3.c index 433fd81cb0db..caa083c8b8a1 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-3.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-3.c @@ -11,6 +11,6 @@ foo (void) } /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" } } */ /* { dg-final { scan-assembler-not "vmovdqa" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c b/gcc/testsuite/gcc.target/i386/pr100865-4b.c index 6fd703e8049f..7017de9032d9 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c @@ -5,7 +5,7 @@ #include "pr100865-4a.c" /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */ /* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */ /* { dg-final { scan-assembler-not "vmovdqa" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-5b.c b/gcc/testsuite/gcc.target/i386/pr100865-5b.c index 6c2b33d6c69d..8b8ed9356df4 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-5b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-5b.c @@ -5,6 +5,6 @@ #include "pr100865-5a.c" /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu16\[\\t \]%ymm\[0-9\]+, " 4 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 4 } } */ /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */ /* { dg-final { scan-assembler-not "vmovdqa" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr120728.c b/gcc/testsuite/gcc.target/i386/pr120728.c new file mode 100644 index 000000000000..93d2cd07e2fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120728.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v4" } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+, " 3 } } */ +/* { dg-final { scan-assembler-not "vmovdqu8" } } */ +/* { dg-final { scan-assembler-not "vmovdqu16" } } */ + +typedef char __v32qi __attribute__ ((__vector_size__ (32))); +typedef char __v32qi_u __attribute__ ((__vector_size__ (32), + __aligned__ (1))); +typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef short __v16hi_u __attribute__ ((__vector_size__ (32), + __aligned__ (1))); +typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32))); +typedef _Float16 __v16hf_u __attribute__ ((__vector_size__ (32), + __aligned__ (1))); + +extern __v32qi_u v1; +extern __v16hi_u v2; +extern __v16hf_u v3; + +void +foo (__v32qi x1, __v16hi x2, __v16hf x3) +{ + v1 = x1; + v2 = x2; + v3 = x3; +} diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c b/gcc/testsuite/gcc.target/i386/pr90773-15.c index 403cdb248a20..880f71d1567b 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-15.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c @@ -10,5 +10,5 @@ foo (int c) } /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%.*, %xmm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ /* { dg-final { scan-assembler-times "movb\[\\t \]+%.*, 16\\(%\[\^,\]+\\)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c b/gcc/testsuite/gcc.target/i386/pr90773-16.c index bb0aadbc77e9..77d584018b53 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-16.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c @@ -10,5 +10,5 @@ foo (void) } /* { dg-final { scan-assembler-times "(?:vpcmpeqd|vpternlogd)" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ /* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$-1, 16\\(%\[\^,\]+\\)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c b/gcc/testsuite/gcc.target/i386/pr90773-17.c index 61b2bfd7485a..68ff7e091e59 100644 --- a/gcc/testsuite/gcc.target/i386/pr90773-17.c +++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c @@ -11,5 +11,5 @@ foo (void) } /* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */ /* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 16\\(%\[\^,\]+\\)" 1 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr95483-5.c b/gcc/testsuite/gcc.target/i386/pr95483-5.c index b52e39dff79f..a21ad01b15d2 100644 --- a/gcc/testsuite/gcc.target/i386/pr95483-5.c +++ b/gcc/testsuite/gcc.target/i386/pr95483-5.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu8|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "(?:vmovdqu8|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "(?:vmovdqu|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h>