https://gcc.gnu.org/g:919f073ae5f45e9cc328be8a914cd80b3a0bc12d

commit r16-1620-g919f073ae5f45e9cc328be8a914cd80b3a0bc12d
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Fri Jun 20 16:07:18 2025 +0800

    x86: Don't use vmovdqu16/vmovdqu8 with non-EVEX registers
    
    Don't use vmovdqu16/vmovdqu8 with non-EVEX register operands just because
    AVX512BW is available.
    
    gcc/
    
            PR target/120728
            * config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8
            only with EVEX register operands.
    
    gcc/testsuite/
    
            PR target/120728
            * gcc.target/i386/avx512bw-vmovdqu16-1.c: Scan vmovdqu for
            non-EVEX register operands.
            * gcc.target/i386/avx512bw-vmovdqu8-1.c: Likewise.
            * gcc.target/i386/avx512fp16-13.c: Likewise.
            * gcc.target/i386/pr100865-10b.c: Likewise.
            * gcc.target/i386/pr100865-3.c: Likewise.
            * gcc.target/i386/pr100865-4b.c: Likewise.
            * gcc.target/i386/pr100865-5b.c: Likewise.
            * gcc.target/i386/pr90773-15.c: Likewise.
            * gcc.target/i386/pr90773-16.c: Likewise.
            * gcc.target/i386/pr90773-17.c: Likewise.
            * gcc.target/i386/pr95483-5.c: Likewise.
            * gcc.target/i386/pr120728.c: New test.
    
    Signed-off-by: H.J. Lu <hjl.to...@gmail.com>

Diff:
---
 gcc/config/i386/i386.cc                            |  6 ++---
 .../gcc.target/i386/avx512bw-vmovdqu16-1.c         |  8 +++----
 .../gcc.target/i386/avx512bw-vmovdqu8-1.c          |  4 ++--
 gcc/testsuite/gcc.target/i386/avx512fp16-13.c      | 10 ++++----
 gcc/testsuite/gcc.target/i386/pr100865-10b.c       |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-3.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-4b.c        |  2 +-
 gcc/testsuite/gcc.target/i386/pr100865-5b.c        |  2 +-
 gcc/testsuite/gcc.target/i386/pr120728.c           | 27 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr90773-15.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-16.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-17.c         |  2 +-
 gcc/testsuite/gcc.target/i386/pr95483-5.c          |  4 ++--
 13 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 77853297a2fa..fc3105919f45 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5703,7 +5703,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
                      : "%vmovaps");
          else
            opcode = (misaligned_p
-                     ? (TARGET_AVX512BW
+                     ? (TARGET_AVX512BW && evex_reg_p
                         ? "vmovdqu16"
                         : "%vmovdqu")
                      : "%vmovdqa");
@@ -5745,7 +5745,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
                      : "%vmovaps");
          else
            opcode = (misaligned_p
-                     ? (TARGET_AVX512BW
+                     ? (TARGET_AVX512BW && evex_reg_p
                         ? "vmovdqu8"
                         : "%vmovdqu")
                      : "%vmovdqa");
@@ -5765,7 +5765,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
                      : "%vmovaps");
          else
            opcode = (misaligned_p
-                     ? (TARGET_AVX512BW
+                     ? (TARGET_AVX512BW && evex_reg_p
                         ? "vmovdqu16"
                         : "%vmovdqu")
                      : "%vmovdqa");
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
index 8603a1909c79..ee8e5cfc81ad 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
@@ -16,11 +16,11 @@
 /* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vinserti128)\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vinserti128)\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vextracti128)\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vextracti128)\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
index d1e33926c81f..4c4cddb17f1c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
@@ -16,9 +16,9 @@
 /* { dg-final { scan-assembler-times "vmovdqu8\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ 
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ 
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
index 2416c67de53a..92ac197e1066 100644
--- a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
@@ -71,7 +71,7 @@ load256u_ph (void const *p)
   return _mm256_loadu_ph (p);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ 
\\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */
 
 __m128h
 __attribute__ ((noinline, noclone))
@@ -80,7 +80,7 @@ load128u_ph (void const *p)
   return _mm_loadu_ph (p);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ 
\\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ 
\\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */
 
 void
 __attribute__ ((noinline, noclone))
@@ -89,7 +89,7 @@ store512u_ph (void *p, __m512h a)
   return _mm512_storeu_ph (p, a);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], 
*\[^,\]*" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], 
*\[^,\]*" 1 } } */
 
 void
 __attribute__ ((noinline, noclone))
@@ -98,7 +98,7 @@ store256u_ph (void *p, __m256h a)
   return _mm256_storeu_ph (p, a);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], 
*\[^,\]*" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], 
*\[^,\]*" 1 } } */
 
 void
 __attribute__ ((noinline, noclone))
@@ -107,7 +107,7 @@ storeu_ph (void *p, __m128h a)
   return _mm_storeu_ph (p, a);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], 
*\[^,\]*" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], 
*\[^,\]*" 1 } } */
 
 __m512h
 __attribute__ ((noinline, noclone))
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
index f60d1bfd2c78..17847ac97272 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
@@ -4,4 +4,4 @@
 #include "pr100865-10a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, 
%ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-3.c 
b/gcc/testsuite/gcc.target/i386/pr100865-3.c
index 433fd81cb0db..caa083c8b8a1 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-3.c
@@ -11,6 +11,6 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, 
%xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, 
%xmm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
index 6fd703e8049f..7017de9032d9 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
@@ -5,7 +5,7 @@
 #include "pr100865-4a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, 
%ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
 /* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, 
%ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-5b.c 
b/gcc/testsuite/gcc.target/i386/pr100865-5b.c
index 6c2b33d6c69d..8b8ed9356df4 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-5b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-5b.c
@@ -5,6 +5,6 @@
 #include "pr100865-5a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, 
%ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[\\t \]%ymm\[0-9\]+, " 4 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 4 } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, 
%ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr120728.c 
b/gcc/testsuite/gcc.target/i386/pr120728.c
new file mode 100644
index 000000000000..93d2cd07e2fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120728.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+, 
" 3 } } */
+/* { dg-final { scan-assembler-not "vmovdqu8" } } */
+/* { dg-final { scan-assembler-not "vmovdqu16" } } */
+
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi_u __attribute__ ((__vector_size__ (32),
+                                      __aligned__ (1)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi_u __attribute__ ((__vector_size__ (32),
+                                          __aligned__ (1)));
+typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
+typedef _Float16 __v16hf_u __attribute__ ((__vector_size__ (32),
+                                          __aligned__ (1)));
+
+extern __v32qi_u v1;
+extern __v16hi_u v2;
+extern __v16hf_u v3;
+
+void
+foo (__v32qi x1, __v16hi x2, __v16hf x3)
+{
+  v1 = x1;
+  v2 = x2;
+  v3 = x3;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-15.c 
b/gcc/testsuite/gcc.target/i386/pr90773-15.c
index 403cdb248a20..880f71d1567b 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-15.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-15.c
@@ -10,5 +10,5 @@ foo (int c)
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%.*, %xmm\[0-9\]+" 
1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-times "movb\[\\t \]+%.*, 16\\(%\[\^,\]+\\)" 1 } 
} */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-16.c 
b/gcc/testsuite/gcc.target/i386/pr90773-16.c
index bb0aadbc77e9..77d584018b53 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-16.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-16.c
@@ -10,5 +10,5 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "(?:vpcmpeqd|vpternlogd)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$-1, 16\\(%\[\^,\]+\\)" 
1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-17.c 
b/gcc/testsuite/gcc.target/i386/pr90773-17.c
index 61b2bfd7485a..68ff7e091e59 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-17.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-17.c
@@ -11,5 +11,5 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, 
\\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 
16\\(%\[\^,\]+\\)" 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95483-5.c 
b/gcc/testsuite/gcc.target/i386/pr95483-5.c
index b52e39dff79f..a21ad01b15d2 100644
--- a/gcc/testsuite/gcc.target/i386/pr95483-5.c
+++ b/gcc/testsuite/gcc.target/i386/pr95483-5.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vinserti128)\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vextracti128)\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vinserti128)\[ 
\\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vextracti128)\[ 
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>

Reply via email to