https://gcc.gnu.org/g:d073bb6cfc219d4b6c283a0b527ee88b42e640e0

commit r16-1643-gd073bb6cfc219d4b6c283a0b527ee88b42e640e0
Author: H.J. Lu <hjl.to...@gmail.com>
Date:   Thu Mar 18 18:43:10 2021 -0700

    x86: Update memcpy/memset inline strategies for -mtune=generic
    
    Update memcpy and memset inline strategies for -mtune=generic:
    
    1. Don't align memory.
    2. For known sizes, prefer vector loop, unroll loop with 4 moves or
       stores per iteration without aligning the loop, up to 256 bytes.
    3. For unknown sizes, use memcpy/memset.
    4. Since each loop iteration has 4 stores and 8 stores for zeroing with
       unroll loop may be needed, change CLEAR_RATIO to 10 so that zeroing
       up to 72 bytes are fully unrolled with 9 stores without SSE.
    
    gcc/
    
            PR target/70308
            PR target/101366
            PR target/102294
            PR target/108585
            PR target/118276
            PR target/119596
            PR target/119703
            PR target/119704
            * config/i386/x86-tune-costs.h (generic_memcpy): Updated.
            (generic_memset): Likewise.
            (generic_cost): Change CLEAR_RATIO to 10.
    
    gcc/testsuite/
    
            PR target/70308
            PR target/101366
            PR target/102294
            PR target/108585
            PR target/118276
            PR target/119596
            PR target/119703
            PR target/119704
            * g++.target/i386/memset-pr101366-1.C: New test.
            * g++.target/i386/memset-pr101366-2.C: Likewise.
            * g++.target/i386/memset-pr108585-1a.C: Likewise.
            * g++.target/i386/memset-pr108585-1b.C: Likewise.
            * g++.target/i386/memset-pr118276-1a.C: Likewise.
            * g++.target/i386/memset-pr118276-1b.C: Likewise.
            * g++.target/i386/memset-pr118276-1c.C: Likewise.
            * gcc.target/i386/memcpy-strategy-12.c: Likewise.
            * gcc.target/i386/memcpy-strategy-13.c: Likewise.
            * gcc.target/i386/memset-pr70308-1a.c: Likewise.
            * gcc.target/i386/memset-pr70308-1b.c: Likewise.
            * gcc.target/i386/memset-strategy-25.c: Likewise.
            * gcc.target/i386/memset-strategy-26.c: Likewise.
            * gcc.target/i386/memset-strategy-27.c: Likewise.
            * gcc.target/i386/memset-strategy-28.c: Likewise.
            * gcc.target/i386/memset-strategy-29.c: Likewise.
            * gcc.target/i386/memset-strategy-30.c: Likewise.
            * gcc.target/i386/memset-strategy-31.c: Likewise.
            * gcc.target/i386/auto-init-padding-3.c: Expect XMM stores.
            * gcc.target/i386/auto-init-padding-9.c: Likewise.
            * gcc.target/i386/mvc17.c: Fail with "rep mov"
            * gcc.target/i386/pr111657-1.c: Scan for unrolled loop.  Fail
            with "rep mov".
            * gcc.target/i386/shrink_wrap_1.c: Also pass
            -mmemset-strategy=rep_8byte:-1:align.
            * gcc.target/i386/sw-1.c: Also pass -mstringop-strategy=rep_byte.
    
    Signed-off-by: H.J. Lu <hjl.to...@gmail.com>

Diff:
---
 gcc/config/i386/x86-tune-costs.h                   | 39 ++++++++++----
 gcc/testsuite/g++.target/i386/memset-pr101366-1.C  | 30 +++++++++++
 gcc/testsuite/g++.target/i386/memset-pr101366-2.C  | 26 +++++++++
 gcc/testsuite/g++.target/i386/memset-pr108585-1a.C | 43 +++++++++++++++
 gcc/testsuite/g++.target/i386/memset-pr108585-1b.C | 43 +++++++++++++++
 gcc/testsuite/g++.target/i386/memset-pr118276-1a.C | 35 +++++++++++++
 gcc/testsuite/g++.target/i386/memset-pr118276-1b.C | 24 +++++++++
 gcc/testsuite/g++.target/i386/memset-pr118276-1c.C | 24 +++++++++
 .../gcc.target/i386/auto-init-padding-3.c          |  7 +--
 .../gcc.target/i386/auto-init-padding-9.c          | 25 +++++++--
 gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c | 34 ++++++++++++
 gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c | 11 ++++
 gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c  | 46 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c  | 61 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-25.c | 29 ++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-26.c | 15 ++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-27.c | 11 ++++
 gcc/testsuite/gcc.target/i386/memset-strategy-28.c | 29 ++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-29.c | 30 +++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-30.c | 30 +++++++++++
 gcc/testsuite/gcc.target/i386/memset-strategy-31.c | 30 +++++++++++
 gcc/testsuite/gcc.target/i386/mvc17.c              |  2 +-
 gcc/testsuite/gcc.target/i386/pr111657-1.c         | 24 ++++++++-
 gcc/testsuite/gcc.target/i386/shrink_wrap_1.c      |  2 +-
 gcc/testsuite/gcc.target/i386/sw-1.c               |  2 +-
 25 files changed, 626 insertions(+), 26 deletions(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index b08081e37cfb..a5b99d1f9629 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -4065,19 +4065,36 @@ struct processor_costs shijidadao_cost = {
 
 
 
-/* Generic should produce code tuned for Core-i7 (and newer chips)
-   and btver1 (and newer chips).  */
+/* Generic should produce code tuned for Haswell (and newer chips)
+   and znver1 (and newer chips):
+   1. Don't align memory.
+   2. For known sizes, prefer vector loop, unroll loop with 4 moves or
+      stores per iteration without aligning the loop, up to 256 bytes.
+   3. For unknown sizes, use memcpy/memset.
+   4. Since each loop iteration has 4 stores and 8 stores for zeroing
+      with unroll loop may be needed, change CLEAR_RATIO to 10 so that
+      zeroing up to 72 bytes are fully unrolled with 9 stores without
+      SSE.
+ */
 
 static stringop_algs generic_memcpy[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}},
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}}};
 static stringop_algs generic_memset[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}},
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}}};
 static const
 struct processor_costs generic_cost = {
   {
@@ -4134,7 +4151,7 @@ struct processor_costs generic_cost = {
   COSTS_N_INSNS (1),                   /* cost of movzx */
   8,                                   /* "large" insn */
   17,                                  /* MOVE_RATIO */
-  6,                                   /* CLEAR_RATIO */
+  10,                                  /* CLEAR_RATIO */
   {6, 6, 6},                           /* cost of loading integer registers
                                           in QImode, HImode and SImode.
                                           Relative to reg-reg move (2).  */
diff --git a/gcc/testsuite/g++.target/i386/memset-pr101366-1.C 
b/gcc/testsuite/g++.target/i386/memset-pr101366-1.C
new file mode 100644
index 000000000000..96d529b7c819
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr101366-1.C
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64-v2 -std=gnu++17" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z4TestPc:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     movzbl  -1\(%rdi\), %eax
+**     pxor    %xmm1, %xmm1
+**     movd    %eax, %xmm0
+**     pshufb  %xmm1, %xmm0
+**     movups  %xmm0, \(%rdi\)
+**     movups  %xmm0, 16\(%rdi\)
+**     movups  %xmm0, 32\(%rdi\)
+**     movups  %xmm0, 48\(%rdi\)
+**     ret
+**...
+*/
+
+#include <x86intrin.h>
+
+void 
+Test (char* dst)
+{
+  __m128i pattern = _mm_set1_epi8(dst[-1]);
+  for (int i = 0; i < 4; i++)
+    _mm_storeu_si128(reinterpret_cast<__m128i*>(dst + 16 * i), pattern);
+}
diff --git a/gcc/testsuite/g++.target/i386/memset-pr101366-2.C 
b/gcc/testsuite/g++.target/i386/memset-pr101366-2.C
new file mode 100644
index 000000000000..b966d1f841dd
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr101366-2.C
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64-v2 -std=gnu++17" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z4TestPc:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     movsbl  -1\(%rdi\), %eax
+**     pxor    %xmm1, %xmm1
+**     movd    %eax, %xmm0
+**     pshufb  %xmm1, %xmm0
+**     movups  %xmm0, \(%rdi\)
+**     movups  %xmm0, 16\(%rdi\)
+**     movups  %xmm0, 32\(%rdi\)
+**     movups  %xmm0, 48\(%rdi\)
+**     ret
+**...
+*/
+
+void 
+Test (char*s)
+{
+  __builtin_memset (s, s[-1], 64);
+}
diff --git a/gcc/testsuite/g++.target/i386/memset-pr108585-1a.C 
b/gcc/testsuite/g++.target/i386/memset-pr108585-1a.C
new file mode 100644
index 000000000000..464f61c70503
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr108585-1a.C
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64 -std=c++20 -DUSE_CHAR" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z6squarei:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     subq    \$104, %rsp
+**     .cfi_def_cfa_offset 112
+**     pxor    %xmm0, %xmm0
+**     movq    %rsp, %rdi
+**     movaps  %xmm0, \(%rsp\)
+**     movaps  %xmm0, 16\(%rsp\)
+**     movaps  %xmm0, 32\(%rsp\)
+**     movaps  %xmm0, 48\(%rsp\)
+**     movaps  %xmm0, 64\(%rsp\)
+**     movaps  %xmm0, 80\(%rsp\)
+**     call    _Z3fooPc
+**     addq    \$104, %rsp
+**     .cfi_def_cfa_offset 8
+**     ret
+**...
+*/
+
+#include <cstddef>
+
+#ifdef USE_CHAR
+# define TYPE char
+#else
+# define TYPE std::byte
+#endif
+
+extern int foo(TYPE *arr);
+
+int square(int num)
+{
+    TYPE arr[96] = {};
+    return foo(arr);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/g++.target/i386/memset-pr108585-1b.C 
b/gcc/testsuite/g++.target/i386/memset-pr108585-1b.C
new file mode 100644
index 000000000000..c14a110fa713
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr108585-1b.C
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64 -std=c++20" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z6squarei:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     subq    \$104, %rsp
+**     .cfi_def_cfa_offset 112
+**     pxor    %xmm0, %xmm0
+**     movq    %rsp, %rdi
+**     movaps  %xmm0, \(%rsp\)
+**     movaps  %xmm0, 16\(%rsp\)
+**     movaps  %xmm0, 32\(%rsp\)
+**     movaps  %xmm0, 48\(%rsp\)
+**     movaps  %xmm0, 64\(%rsp\)
+**     movaps  %xmm0, 80\(%rsp\)
+**     call    _Z3fooPSt4byte
+**     addq    \$104, %rsp
+**     .cfi_def_cfa_offset 8
+**     ret
+**...
+*/
+
+#include <cstddef>
+
+#ifdef USE_CHAR
+# define TYPE char
+#else
+# define TYPE std::byte
+#endif
+
+extern int foo(TYPE *arr);
+
+int square(int num)
+{
+    TYPE arr[96] = {};
+    return foo(arr);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/g++.target/i386/memset-pr118276-1a.C 
b/gcc/testsuite/g++.target/i386/memset-pr118276-1a.C
new file mode 100644
index 000000000000..1df6646957e6
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr118276-1a.C
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64 -std=c++17 -DMODE=0" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z22makeDefaultConstructedv:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     pxor    %xmm0, %xmm0
+**     movq    \$0, 80\(%rdi\)
+**     movq    %rdi, %rax
+**     movups  %xmm0, \(%rdi\)
+**     movups  %xmm0, 16\(%rdi\)
+**     movups  %xmm0, 32\(%rdi\)
+**     movups  %xmm0, 48\(%rdi\)
+**     movups  %xmm0, 64\(%rdi\)
+**     ret
+**...
+*/
+
+struct S {
+    long int c[10] = {};
+    int x{};
+#if MODE == 0
+#elif MODE == 1
+    S() = default;
+#elif MODE == 2
+    S() noexcept {}
+#endif
+};
+
+S makeDefaultConstructed() { return S{}; }
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/g++.target/i386/memset-pr118276-1b.C 
b/gcc/testsuite/g++.target/i386/memset-pr118276-1b.C
new file mode 100644
index 000000000000..9fd77df15fe3
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr118276-1b.C
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64 -std=c++17 -DMODE=1" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z22makeDefaultConstructedv:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     pxor    %xmm0, %xmm0
+**     movq    \$0, 80\(%rdi\)
+**     movq    %rdi, %rax
+**     movups  %xmm0, \(%rdi\)
+**     movups  %xmm0, 16\(%rdi\)
+**     movups  %xmm0, 32\(%rdi\)
+**     movups  %xmm0, 48\(%rdi\)
+**     movups  %xmm0, 64\(%rdi\)
+**     ret
+**...
+*/
+
+#include "memset-pr118276-1a.C"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/g++.target/i386/memset-pr118276-1c.C 
b/gcc/testsuite/g++.target/i386/memset-pr118276-1c.C
new file mode 100644
index 000000000000..624c785f49b6
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/memset-pr118276-1c.C
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64 -std=c++17 -DMODE=2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**_Z22makeDefaultConstructedv:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     pxor    %xmm0, %xmm0
+**     movl    \$0, 80\(%rdi\)
+**     movq    %rdi, %rax
+**     movups  %xmm0, \(%rdi\)
+**     movups  %xmm0, 16\(%rdi\)
+**     movups  %xmm0, 32\(%rdi\)
+**     movups  %xmm0, 48\(%rdi\)
+**     movups  %xmm0, 64\(%rdi\)
+**     ret
+**...
+*/
+
+#include "memset-pr118276-1a.C"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c 
b/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c
index 7c20a28508ff..a12069a039d4 100644
--- a/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c
+++ b/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c
@@ -23,8 +23,5 @@ int foo ()
   return var.four.internal1;
 }
 
-/* { dg-final { scan-assembler "movl\t\\\$0," } } */
-/* { dg-final { scan-assembler "movl\t\\\$16," { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler "rep stosq" { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler "movl\t\\\$32," { target ia32 } } } */
-/* { dg-final { scan-assembler "rep stosl" { target ia32 } } } */
+/* { dg-final { scan-assembler-times "pxor\t%xmm0, %xmm0" 1 } } */
+/* { dg-final { scan-assembler-times "movaps\t%xmm0, " 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c 
b/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c
index a87b68b255b0..102217ce2c1b 100644
--- a/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c
+++ b/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c
@@ -2,6 +2,25 @@
    padding.  */ 
 /* { dg-do compile } */
 /* { dg-options "-ftrivial-auto-var-init=zero -march=x86-64" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**...
+**     pxor    %xmm0, %xmm0
+**...
+**.L[0-9]+:
+**     movl    %esi, %ecx
+**     movaps  %xmm0, \(%rdx,%rcx\)
+**     movaps  %xmm0, 16\(%rdx,%rcx\)
+**     movaps  %xmm0, 32\(%rdx,%rcx\)
+**     movaps  %xmm0, 48\(%rdx,%rcx\)
+**     addl    \$64, %esi
+**     cmpl    %edi, %esi
+**     jb      .L[0-9]+
+**...
+*/
 
 struct test_trailing_hole {
         int one;
@@ -18,8 +37,4 @@ int foo ()
   return var[2].four;
 }
 
-/* { dg-final { scan-assembler "movl\t\\\$0," } } */
-/* { dg-final { scan-assembler "movl\t\\\$20," { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler "rep stosq" { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler "movl\t\\\$40," { target ia32} } } */
-/* { dg-final { scan-assembler "rep stosl" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c 
b/gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c
new file mode 100644
index 000000000000..d0316efc8eea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-sse" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**.LFB[0-9]+:
+**...
+**     xorl    %edx, %edx
+**...
+**.L[0-9]+:
+**     movl    %edx, %eax
+**     addl    \$32, %edx
+**     movq    \(%rsi,%rax\), %r10
+**     movq    8\(%rsi,%rax\), %r9
+**     movq    16\(%rsi,%rax\), %r8
+**     movq    24\(%rsi,%rax\), %rcx
+**     movq    %r10, \(%rdi,%rax\)
+**     movq    %r9, 8\(%rdi,%rax\)
+**     movq    %r8, 16\(%rdi,%rax\)
+**     movq    %rcx, 24\(%rdi,%rax\)
+**     cmpl    \$224, %edx
+**     jb      .L[0-9]+
+**...
+*/
+
+void
+foo (char *dest, char *src)
+{
+  __builtin_memcpy (dest, src, 253);
+}
+
+/* { dg-final { scan-assembler-not "rep mov" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c 
b/gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c
new file mode 100644
index 000000000000..109bd675a51b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-avx" } */
+/* { dg-final { scan-assembler "jmp\tmemcpy" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\tmemcpy" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
+
+void
+foo (char *dest, char *src)
+{
+  __builtin_memcpy (dest, src, 257);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c 
b/gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c
new file mode 100644
index 000000000000..e3494413b2d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     subq    \$16, %rsp
+**     .cfi_def_cfa_offset 24
+**     pxor    %xmm0, %xmm0
+**     movaps  %xmm0, -120\(%rsp\)
+**     movaps  %xmm0, -104\(%rsp\)
+**     movaps  %xmm0, -88\(%rsp\)
+**     movaps  %xmm0, -72\(%rsp\)
+**     movaps  %xmm0, -56\(%rsp\)
+**     movaps  %xmm0, -40\(%rsp\)
+**     movaps  %xmm0, -24\(%rsp\)
+**     movaps  %xmm0, -8\(%rsp\)
+**     xorl    %eax, %eax
+**     addq    \$16, %rsp
+**     .cfi_def_cfa_offset 8
+**     ret
+**...
+*/
+
+extern int scanf (const char *, ...);
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+int
+foo (void)
+{
+  char buf[128];
+
+#if USE_SCANF
+  if (scanf("%s", buf) != 1)
+    return 42;
+#endif
+
+  memset (buf,0, sizeof (buf));
+  asm volatile("": : :"memory");
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c 
b/gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c
new file mode 100644
index 000000000000..52f7b8ce242f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -DUSE_SCANF" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     subq    \$136, %rsp
+**     .cfi_def_cfa_offset 144
+**     xorl    %eax, %eax
+**     movl    \$.LC[0-9]+, %edi
+**     movq    %rsp, %rsi
+**     call    scanf
+**     cmpl    \$1, %eax
+**     je      .L[0-9]+
+**     movl    \$42, %eax
+**     addq    \$136, %rsp
+**     .cfi_remember_state
+**     .cfi_def_cfa_offset 8
+**     ret
+**     .p2align 4,,10
+**     .p2align 3
+**.L[0-9]+:
+**     .cfi_restore_state
+**     pxor    %xmm0, %xmm0
+**     movaps  %xmm0, \(%rsp\)
+**     movaps  %xmm0, 16\(%rsp\)
+**     movaps  %xmm0, 32\(%rsp\)
+**     movaps  %xmm0, 48\(%rsp\)
+**     movaps  %xmm0, 64\(%rsp\)
+**     movaps  %xmm0, 80\(%rsp\)
+**     movaps  %xmm0, 96\(%rsp\)
+**     movaps  %xmm0, 112\(%rsp\)
+**     xorl    %eax, %eax
+**     addq    \$136, %rsp
+**     .cfi_def_cfa_offset 8
+**     ret
+**...
+*/
+
+extern int scanf (const char *, ...);
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+int
+foo (void)
+{
+  char buf[128];
+
+#if USE_SCANF
+  if (scanf("%s", buf) != 1)
+    return 42;
+#endif
+
+  memset (buf,0, sizeof (buf));
+  asm volatile("": : :"memory");
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-25.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-25.c
new file mode 100644
index 000000000000..040439d1671c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-25.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-sse" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     xorl    %eax, %eax
+**.L[0-9]+:
+**     movl    %eax, %edx
+**     addl    \$32, %eax
+**     movq    \$0, \(%rdi,%rdx\)
+**     movq    \$0, 8\(%rdi,%rdx\)
+**     movq    \$0, 16\(%rdi,%rdx\)
+**     movq    \$0, 24\(%rdi,%rdx\)
+**     cmpl    \$224, %eax
+**     jb      .L[0-9]+
+**...
+*/
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 253);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-26.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-26.c
new file mode 100644
index 000000000000..c53bce52e178
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-26.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-sse" } */
+/* { dg-final { scan-assembler-not "jmp\tmemset" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
+
+struct foo
+{
+  char buf[41];
+};
+
+void
+zero(struct foo *f)
+{
+  __builtin_memset(f->buf, 0, sizeof(f->buf));
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-27.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-27.c
new file mode 100644
index 000000000000..685d6e5a5c21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-27.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-avx" } */
+/* { dg-final { scan-assembler "jmp\tmemset" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "call\tmemset" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 257);
+}
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-28.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-28.c
new file mode 100644
index 000000000000..1d173edf9306
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-28.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-sse" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     movq    \$0, \(%rdi\)
+**     movq    \$0, 8\(%rdi\)
+**     movq    \$0, 16\(%rdi\)
+**     movq    \$0, 24\(%rdi\)
+**     movq    \$0, 32\(%rdi\)
+**     movq    \$0, 40\(%rdi\)
+**     movq    \$0, 48\(%rdi\)
+**     movq    \$0, 56\(%rdi\)
+**     movb    \$0, 64\(%rdi\)
+**     ret
+**...
+*/
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 65);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-29.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-29.c
new file mode 100644
index 000000000000..50470eaba6d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-29.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-sse" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**...
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     xorl    %eax, %eax
+**.L[0-9]+:
+**     movl    %eax, %edx
+**     addl    \$32, %eax
+**     movq    \$0, \(%rdi,%rdx\)
+**     movq    \$0, 8\(%rdi,%rdx\)
+**     movq    \$0, 16\(%rdi,%rdx\)
+**     movq    \$0, 24\(%rdi,%rdx\)
+**     cmpl    \$64, %eax
+**     jb      .L[0-9]+
+**...
+*/
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 81);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-30.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-30.c
new file mode 100644
index 000000000000..ef32a9ce4cdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-30.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-sse" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**...
+**.LFB[0-9]+:
+**     .cfi_startproc
+**     xorl    %eax, %eax
+**.L[0-9]+:
+**     movl    %eax, %edx
+**     addl    \$32, %eax
+**     movq    \$0, \(%rdi,%rdx\)
+**     movq    \$0, 8\(%rdi,%rdx\)
+**     movq    \$0, 16\(%rdi,%rdx\)
+**     movq    \$0, 24\(%rdi,%rdx\)
+**     cmpl    \$64, %eax
+**     jb      .L[0-9]+
+**...
+*/
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 95);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-31.c 
b/gcc/testsuite/gcc.target/i386/memset-strategy-31.c
new file mode 100644
index 000000000000..17a4df25bb28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memset-strategy-31.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -mno-avx -msse2" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**foo:
+**.LFB[0-9]+:
+**...
+**     pxor    %xmm0, %xmm0
+**     xorl    %eax, %eax
+**.L[0-9]+:
+**     movl    %eax, %edx
+**     addl    \$64, %eax
+**     movups  %xmm0, \(%rdi,%rdx\)
+**     movups  %xmm0, 16\(%rdi,%rdx\)
+**     movups  %xmm0, 32\(%rdi,%rdx\)
+**     movups  %xmm0, 48\(%rdi,%rdx\)
+**     cmpl    \$192, %eax
+**     jb      .L[0-9]+
+**...
+*/
+
+void
+foo (char *dest)
+{
+  __builtin_memset (dest, 0, 254);
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c 
b/gcc/testsuite/gcc.target/i386/mvc17.c
index 8b83c1aecb36..dbf35ac36dc4 100644
--- a/gcc/testsuite/gcc.target/i386/mvc17.c
+++ b/gcc/testsuite/gcc.target/i386/mvc17.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-ifunc "" } */
 /* { dg-options "-O2 -march=x86-64" } */
-/* { dg-final { scan-assembler-times "rep mov" 1 } } */
+/* { dg-final { scan-assembler-not "rep mov" } } */
 
 __attribute__((target_clones("default","arch=icelake-server")))
 void
diff --git a/gcc/testsuite/gcc.target/i386/pr111657-1.c 
b/gcc/testsuite/gcc.target/i386/pr111657-1.c
index a4ba21073f52..fa9f4cfe5c53 100644
--- a/gcc/testsuite/gcc.target/i386/pr111657-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr111657-1.c
@@ -1,5 +1,26 @@
 /* { dg-do assemble } */
 /* { dg-options "-O2 -mno-sse -mtune=generic -save-temps" } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } 
*/
+
+/*
+**bar:
+**...
+**.L[0-9]+:
+**     movl    %edx, %eax
+**     addl    \$32, %edx
+**     movq    %gs:m\(%rax\), %r9
+**     movq    %gs:m\+8\(%rax\), %r8
+**     movq    %gs:m\+16\(%rax\), %rsi
+**     movq    %gs:m\+24\(%rax\), %rcx
+**     movq    %r9, \(%rdi,%rax\)
+**     movq    %r8, 8\(%rdi,%rax\)
+**     movq    %rsi, 16\(%rdi,%rax\)
+**     movq    %rcx, 24\(%rdi,%rax\)
+**     cmpl    \$224, %edx
+**     jb      .L[0-9]+
+**...
+*/
 
 typedef unsigned long uword __attribute__ ((mode (word)));
 
@@ -8,5 +29,4 @@ struct a { uword arr[30]; };
 __seg_gs struct a m;
 void bar (struct a *dst) { *dst = m; }
 
-/* { dg-final { scan-assembler "gs\[ \t\]+rep\[; \t\]+movs(l|q)" { target { ! 
x32 } } } } */
-/* { dg-final { scan-assembler-not "gs\[ \t\]+rep\[; \t\]+movs(l|q)" { target 
x32 } } } */
+/* { dg-final { scan-assembler-not "rep movs" } } */
diff --git a/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c 
b/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c
index 4b286671e90b..30b82ab695aa 100644
--- a/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c
+++ b/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue -fno-stack-protector" } */
+/* { dg-options "-O2 -mmemset-strategy=rep_8byte:-1:align 
-fdump-rtl-pro_and_epilogue -fno-stack-protector" } */
 
 enum machine_mode
 {
diff --git a/gcc/testsuite/gcc.target/i386/sw-1.c 
b/gcc/testsuite/gcc.target/i386/sw-1.c
index b0432279644a..14db3cee206a 100644
--- a/gcc/testsuite/gcc.target/i386/sw-1.c
+++ b/gcc/testsuite/gcc.target/i386/sw-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mtune=generic -fshrink-wrap -fdump-rtl-pro_and_epilogue 
-fno-stack-protector" } */
+/* { dg-options "-O2 -mtune=generic -mstringop-strategy=rep_byte -fshrink-wrap 
-fdump-rtl-pro_and_epilogue -fno-stack-protector" } */
 /* { dg-additional-options "-mno-avx" { target ia32 } } */
 /* { dg-skip-if "No shrink-wrapping preformed" { x86_64-*-mingw* } } */

Reply via email to