https://gcc.gnu.org/g:b4bc34db3f2948e37ad55a09870635e88c54c7d3

commit r12-10682-gb4bc34db3f2948e37ad55a09870635e88c54c7d3
Author: liuhongt <hongtao....@intel.com>
Date:   Thu Aug 15 12:54:07 2024 +0800

    Align ix86_{move_max,store_max} with vectorizer.
    
    When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
    avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
    set ix86_{move_max,store_max} as max available vector length except
    for AVX part.
    
                  if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
                      && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
                    opts->x_ix86_move_max = PVW_AVX512;
                  else
                    opts->x_ix86_move_max = PVW_AVX128;
    
    So for -mavx2, vectorizer will choose 256-bit for vectorization, but
    128-bit is used for struct copy, there could be a potential STLF issue
    due to this "misalign".
    
    The patch fixes that.
    
    gcc/ChangeLog:
    
            * config/i386/i386-options.cc (ix86_option_override_internal):
            set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
            instead of PVW_AVX128.
    
    gcc/testsuite/ChangeLog:
            * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
            * gcc.target/i386/pieces-memcpy-6.c: Ditto.
            * gcc.target/i386/pieces-memset-38.c: Ditto.
            * gcc.target/i386/pieces-memset-40.c: Ditto.
            * gcc.target/i386/pieces-memset-41.c: Ditto.
            * gcc.target/i386/pieces-memset-42.c: Ditto.
            * gcc.target/i386/pieces-memset-43.c: Ditto.
            * gcc.target/i386/pieces-strcpy-2.c: Ditto.
            * gcc.target/i386/pieces-memcpy-22.c: New test.
            * gcc.target/i386/pieces-memset-51.c: New test.
            * gcc.target/i386/pieces-strcpy-3.c: New test.
    
    (cherry picked from commit aea374238cec1a1e53fb79575d2f998e16926999)

Diff:
---
 gcc/config/i386/i386-options.cc                  |  6 ++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-38.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-40.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-41.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-42.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-43.c |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++
 gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c  | 15 +++++++++++++++
 12 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 318f6c614551..ad496ea5a8eb 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2766,6 +2766,9 @@ ix86_option_override_internal (bool main_args_p,
            {
              if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
                opts->x_ix86_move_max = PVW_AVX512;
+             /* Align with vectorizer to avoid potential STLF issue.  */
+             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+               opts->x_ix86_move_max = PVW_AVX256;
              else
                opts->x_ix86_move_max = PVW_AVX128;
            }
@@ -2787,6 +2790,9 @@ ix86_option_override_internal (bool main_args_p,
            {
              if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
                opts->x_ix86_store_max = PVW_AVX512;
+             /* Align with vectorizer to avoid potential STLF issue.  */
+             else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+               opts->x_ix86_store_max = PVW_AVX256;
              else
                opts->x_ix86_store_max = PVW_AVX128;
            }
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c 
b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
index 5faee21f9b99..53ad0b3be443 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c 
b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
new file mode 100644
index 000000000000..605b3623ffc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c 
b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
index 5f99cc98c472..cfd2a86cf33b 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *dst, *src;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c 
b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
index ed4a24a54fda..ddd194debd57 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c 
b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
index 4eda73ead592..9c206465d465 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c 
b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
index 93df8101e4d0..b0756182e355 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 
-mtune=sandybridge -mno-stackrealign" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c 
b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
index df0c122aae71..103da699ae52 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c 
b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
index 2f2179c2df9e..f1494e176105 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *dst;
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c 
b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
new file mode 100644
index 000000000000..192ec0d1647d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+  __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c 
b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
index 90446edb4f35..9bb94b7419b3 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 
-mtune=sandybridge" } */
 
 extern char *strcpy (char *, const char *);
 
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c 
b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
new file mode 100644
index 000000000000..df7571b547fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+  strcpy (s,
+         "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+         "1234567");
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */

Reply via email to