https://gcc.gnu.org/g:d073bb6cfc219d4b6c283a0b527ee88b42e640e0
commit r16-1643-gd073bb6cfc219d4b6c283a0b527ee88b42e640e0 Author: H.J. Lu <hjl.to...@gmail.com> Date: Thu Mar 18 18:43:10 2021 -0700 x86: Update memcpy/memset inline strategies for -mtune=generic Update memcpy and memset inline strategies for -mtune=generic: 1. Don't align memory. 2. For known sizes, prefer vector loop, unroll loop with 4 moves or stores per iteration without aligning the loop, up to 256 bytes. 3. For unknown sizes, use memcpy/memset. 4. Since each loop iteration has 4 stores and 8 stores for zeroing with unroll loop may be needed, change CLEAR_RATIO to 10 so that zeroing up to 72 bytes are fully unrolled with 9 stores without SSE. gcc/ PR target/70308 PR target/101366 PR target/102294 PR target/108585 PR target/118276 PR target/119596 PR target/119703 PR target/119704 * config/i386/x86-tune-costs.h (generic_memcpy): Updated. (generic_memset): Likewise. (generic_cost): Change CLEAR_RATIO to 10. gcc/testsuite/ PR target/70308 PR target/101366 PR target/102294 PR target/108585 PR target/118276 PR target/119596 PR target/119703 PR target/119704 * g++.target/i386/memset-pr101366-1.C: New test. * g++.target/i386/memset-pr101366-2.C: Likewise. * g++.target/i386/memset-pr108585-1a.C: Likewise. * g++.target/i386/memset-pr108585-1b.C: Likewise. * g++.target/i386/memset-pr118276-1a.C: Likewise. * g++.target/i386/memset-pr118276-1b.C: Likewise. * g++.target/i386/memset-pr118276-1c.C: Likewise. * gcc.target/i386/memcpy-strategy-12.c: Likewise. * gcc.target/i386/memcpy-strategy-13.c: Likewise. * gcc.target/i386/memset-pr70308-1a.c: Likewise. * gcc.target/i386/memset-pr70308-1b.c: Likewise. * gcc.target/i386/memset-strategy-25.c: Likewise. * gcc.target/i386/memset-strategy-26.c: Likewise. * gcc.target/i386/memset-strategy-27.c: Likewise. * gcc.target/i386/memset-strategy-28.c: Likewise. * gcc.target/i386/memset-strategy-29.c: Likewise. * gcc.target/i386/memset-strategy-30.c: Likewise. * gcc.target/i386/memset-strategy-31.c: Likewise. * gcc.target/i386/auto-init-padding-3.c: Expect XMM stores. * gcc.target/i386/auto-init-padding-9.c: Likewise. * gcc.target/i386/mvc17.c: Fail with "rep mov" * gcc.target/i386/pr111657-1.c: Scan for unrolled loop. Fail with "rep mov". * gcc.target/i386/shrink_wrap_1.c: Also pass -mmemset-strategy=rep_8byte:-1:align. * gcc.target/i386/sw-1.c: Also pass -mstringop-strategy=rep_byte. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> Diff: --- gcc/config/i386/x86-tune-costs.h | 39 ++++++++++---- gcc/testsuite/g++.target/i386/memset-pr101366-1.C | 30 +++++++++++ gcc/testsuite/g++.target/i386/memset-pr101366-2.C | 26 +++++++++ gcc/testsuite/g++.target/i386/memset-pr108585-1a.C | 43 +++++++++++++++ gcc/testsuite/g++.target/i386/memset-pr108585-1b.C | 43 +++++++++++++++ gcc/testsuite/g++.target/i386/memset-pr118276-1a.C | 35 +++++++++++++ gcc/testsuite/g++.target/i386/memset-pr118276-1b.C | 24 +++++++++ gcc/testsuite/g++.target/i386/memset-pr118276-1c.C | 24 +++++++++ .../gcc.target/i386/auto-init-padding-3.c | 7 +-- .../gcc.target/i386/auto-init-padding-9.c | 25 +++++++-- gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c | 34 ++++++++++++ gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c | 11 ++++ gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c | 46 ++++++++++++++++ gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c | 61 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/memset-strategy-25.c | 29 ++++++++++ gcc/testsuite/gcc.target/i386/memset-strategy-26.c | 15 ++++++ gcc/testsuite/gcc.target/i386/memset-strategy-27.c | 11 ++++ gcc/testsuite/gcc.target/i386/memset-strategy-28.c | 29 ++++++++++ gcc/testsuite/gcc.target/i386/memset-strategy-29.c | 30 +++++++++++ gcc/testsuite/gcc.target/i386/memset-strategy-30.c | 30 +++++++++++ gcc/testsuite/gcc.target/i386/memset-strategy-31.c | 30 +++++++++++ gcc/testsuite/gcc.target/i386/mvc17.c | 2 +- gcc/testsuite/gcc.target/i386/pr111657-1.c | 24 ++++++++- gcc/testsuite/gcc.target/i386/shrink_wrap_1.c | 2 +- gcc/testsuite/gcc.target/i386/sw-1.c | 2 +- 25 files changed, 626 insertions(+), 26 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index b08081e37cfb..a5b99d1f9629 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -4065,19 +4065,36 @@ struct processor_costs shijidadao_cost = { -/* Generic should produce code tuned for Core-i7 (and newer chips) - and btver1 (and newer chips). */ +/* Generic should produce code tuned for Haswell (and newer chips) + and znver1 (and newer chips): + 1. Don't align memory. + 2. For known sizes, prefer vector loop, unroll loop with 4 moves or + stores per iteration without aligning the loop, up to 256 bytes. + 3. For unknown sizes, use memcpy/memset. + 4. Since each loop iteration has 4 stores and 8 stores for zeroing + with unroll loop may be needed, change CLEAR_RATIO to 10 so that + zeroing up to 72 bytes are fully unrolled with 9 stores without + SSE. + */ static stringop_algs generic_memcpy[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static stringop_algs generic_memset[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static const struct processor_costs generic_cost = { { @@ -4134,7 +4151,7 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ + 10, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ diff --git a/gcc/testsuite/g++.target/i386/memset-pr101366-1.C b/gcc/testsuite/g++.target/i386/memset-pr101366-1.C new file mode 100644 index 000000000000..96d529b7c819 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr101366-1.C @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64-v2 -std=gnu++17" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z4TestPc: +**.LFB[0-9]+: +** .cfi_startproc +** movzbl -1\(%rdi\), %eax +** pxor %xmm1, %xmm1 +** movd %eax, %xmm0 +** pshufb %xmm1, %xmm0 +** movups %xmm0, \(%rdi\) +** movups %xmm0, 16\(%rdi\) +** movups %xmm0, 32\(%rdi\) +** movups %xmm0, 48\(%rdi\) +** ret +**... +*/ + +#include <x86intrin.h> + +void +Test (char* dst) +{ + __m128i pattern = _mm_set1_epi8(dst[-1]); + for (int i = 0; i < 4; i++) + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst + 16 * i), pattern); +} diff --git a/gcc/testsuite/g++.target/i386/memset-pr101366-2.C b/gcc/testsuite/g++.target/i386/memset-pr101366-2.C new file mode 100644 index 000000000000..b966d1f841dd --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr101366-2.C @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64-v2 -std=gnu++17" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z4TestPc: +**.LFB[0-9]+: +** .cfi_startproc +** movsbl -1\(%rdi\), %eax +** pxor %xmm1, %xmm1 +** movd %eax, %xmm0 +** pshufb %xmm1, %xmm0 +** movups %xmm0, \(%rdi\) +** movups %xmm0, 16\(%rdi\) +** movups %xmm0, 32\(%rdi\) +** movups %xmm0, 48\(%rdi\) +** ret +**... +*/ + +void +Test (char*s) +{ + __builtin_memset (s, s[-1], 64); +} diff --git a/gcc/testsuite/g++.target/i386/memset-pr108585-1a.C b/gcc/testsuite/g++.target/i386/memset-pr108585-1a.C new file mode 100644 index 000000000000..464f61c70503 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr108585-1a.C @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64 -std=c++20 -DUSE_CHAR" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z6squarei: +**.LFB[0-9]+: +** .cfi_startproc +** subq \$104, %rsp +** .cfi_def_cfa_offset 112 +** pxor %xmm0, %xmm0 +** movq %rsp, %rdi +** movaps %xmm0, \(%rsp\) +** movaps %xmm0, 16\(%rsp\) +** movaps %xmm0, 32\(%rsp\) +** movaps %xmm0, 48\(%rsp\) +** movaps %xmm0, 64\(%rsp\) +** movaps %xmm0, 80\(%rsp\) +** call _Z3fooPc +** addq \$104, %rsp +** .cfi_def_cfa_offset 8 +** ret +**... +*/ + +#include <cstddef> + +#ifdef USE_CHAR +# define TYPE char +#else +# define TYPE std::byte +#endif + +extern int foo(TYPE *arr); + +int square(int num) +{ + TYPE arr[96] = {}; + return foo(arr); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/g++.target/i386/memset-pr108585-1b.C b/gcc/testsuite/g++.target/i386/memset-pr108585-1b.C new file mode 100644 index 000000000000..c14a110fa713 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr108585-1b.C @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64 -std=c++20" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z6squarei: +**.LFB[0-9]+: +** .cfi_startproc +** subq \$104, %rsp +** .cfi_def_cfa_offset 112 +** pxor %xmm0, %xmm0 +** movq %rsp, %rdi +** movaps %xmm0, \(%rsp\) +** movaps %xmm0, 16\(%rsp\) +** movaps %xmm0, 32\(%rsp\) +** movaps %xmm0, 48\(%rsp\) +** movaps %xmm0, 64\(%rsp\) +** movaps %xmm0, 80\(%rsp\) +** call _Z3fooPSt4byte +** addq \$104, %rsp +** .cfi_def_cfa_offset 8 +** ret +**... +*/ + +#include <cstddef> + +#ifdef USE_CHAR +# define TYPE char +#else +# define TYPE std::byte +#endif + +extern int foo(TYPE *arr); + +int square(int num) +{ + TYPE arr[96] = {}; + return foo(arr); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/g++.target/i386/memset-pr118276-1a.C b/gcc/testsuite/g++.target/i386/memset-pr118276-1a.C new file mode 100644 index 000000000000..1df6646957e6 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr118276-1a.C @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64 -std=c++17 -DMODE=0" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z22makeDefaultConstructedv: +**.LFB[0-9]+: +** .cfi_startproc +** pxor %xmm0, %xmm0 +** movq \$0, 80\(%rdi\) +** movq %rdi, %rax +** movups %xmm0, \(%rdi\) +** movups %xmm0, 16\(%rdi\) +** movups %xmm0, 32\(%rdi\) +** movups %xmm0, 48\(%rdi\) +** movups %xmm0, 64\(%rdi\) +** ret +**... +*/ + +struct S { + long int c[10] = {}; + int x{}; +#if MODE == 0 +#elif MODE == 1 + S() = default; +#elif MODE == 2 + S() noexcept {} +#endif +}; + +S makeDefaultConstructed() { return S{}; } + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/g++.target/i386/memset-pr118276-1b.C b/gcc/testsuite/g++.target/i386/memset-pr118276-1b.C new file mode 100644 index 000000000000..9fd77df15fe3 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr118276-1b.C @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64 -std=c++17 -DMODE=1" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z22makeDefaultConstructedv: +**.LFB[0-9]+: +** .cfi_startproc +** pxor %xmm0, %xmm0 +** movq \$0, 80\(%rdi\) +** movq %rdi, %rax +** movups %xmm0, \(%rdi\) +** movups %xmm0, 16\(%rdi\) +** movups %xmm0, 32\(%rdi\) +** movups %xmm0, 48\(%rdi\) +** movups %xmm0, 64\(%rdi\) +** ret +**... +*/ + +#include "memset-pr118276-1a.C" + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/g++.target/i386/memset-pr118276-1c.C b/gcc/testsuite/g++.target/i386/memset-pr118276-1c.C new file mode 100644 index 000000000000..624c785f49b6 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/memset-pr118276-1c.C @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64 -std=c++17 -DMODE=2" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**_Z22makeDefaultConstructedv: +**.LFB[0-9]+: +** .cfi_startproc +** pxor %xmm0, %xmm0 +** movl \$0, 80\(%rdi\) +** movq %rdi, %rax +** movups %xmm0, \(%rdi\) +** movups %xmm0, 16\(%rdi\) +** movups %xmm0, 32\(%rdi\) +** movups %xmm0, 48\(%rdi\) +** movups %xmm0, 64\(%rdi\) +** ret +**... +*/ + +#include "memset-pr118276-1a.C" + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c b/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c index 7c20a28508ff..a12069a039d4 100644 --- a/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c +++ b/gcc/testsuite/gcc.target/i386/auto-init-padding-3.c @@ -23,8 +23,5 @@ int foo () return var.four.internal1; } -/* { dg-final { scan-assembler "movl\t\\\$0," } } */ -/* { dg-final { scan-assembler "movl\t\\\$16," { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler "rep stosq" { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler "movl\t\\\$32," { target ia32 } } } */ -/* { dg-final { scan-assembler "rep stosl" { target ia32 } } } */ +/* { dg-final { scan-assembler-times "pxor\t%xmm0, %xmm0" 1 } } */ +/* { dg-final { scan-assembler-times "movaps\t%xmm0, " 8 } } */ diff --git a/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c b/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c index a87b68b255b0..102217ce2c1b 100644 --- a/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c +++ b/gcc/testsuite/gcc.target/i386/auto-init-padding-9.c @@ -2,6 +2,25 @@ padding. */ /* { dg-do compile } */ /* { dg-options "-ftrivial-auto-var-init=zero -march=x86-64" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**... +** pxor %xmm0, %xmm0 +**... +**.L[0-9]+: +** movl %esi, %ecx +** movaps %xmm0, \(%rdx,%rcx\) +** movaps %xmm0, 16\(%rdx,%rcx\) +** movaps %xmm0, 32\(%rdx,%rcx\) +** movaps %xmm0, 48\(%rdx,%rcx\) +** addl \$64, %esi +** cmpl %edi, %esi +** jb .L[0-9]+ +**... +*/ struct test_trailing_hole { int one; @@ -18,8 +37,4 @@ int foo () return var[2].four; } -/* { dg-final { scan-assembler "movl\t\\\$0," } } */ -/* { dg-final { scan-assembler "movl\t\\\$20," { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler "rep stosq" { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler "movl\t\\\$40," { target ia32} } } */ -/* { dg-final { scan-assembler "rep stosl" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c b/gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c new file mode 100644 index 000000000000..d0316efc8eea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-12.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-sse" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**.LFB[0-9]+: +**... +** xorl %edx, %edx +**... +**.L[0-9]+: +** movl %edx, %eax +** addl \$32, %edx +** movq \(%rsi,%rax\), %r10 +** movq 8\(%rsi,%rax\), %r9 +** movq 16\(%rsi,%rax\), %r8 +** movq 24\(%rsi,%rax\), %rcx +** movq %r10, \(%rdi,%rax\) +** movq %r9, 8\(%rdi,%rax\) +** movq %r8, 16\(%rdi,%rax\) +** movq %rcx, 24\(%rdi,%rax\) +** cmpl \$224, %edx +** jb .L[0-9]+ +**... +*/ + +void +foo (char *dest, char *src) +{ + __builtin_memcpy (dest, src, 253); +} + +/* { dg-final { scan-assembler-not "rep mov" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c b/gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c new file mode 100644 index 000000000000..109bd675a51b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-13.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-avx" } */ +/* { dg-final { scan-assembler "jmp\tmemcpy" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "call\tmemcpy" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep movsb" } } */ + +void +foo (char *dest, char *src) +{ + __builtin_memcpy (dest, src, 257); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c b/gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c new file mode 100644 index 000000000000..e3494413b2d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-pr70308-1a.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**.LFB[0-9]+: +** .cfi_startproc +** subq \$16, %rsp +** .cfi_def_cfa_offset 24 +** pxor %xmm0, %xmm0 +** movaps %xmm0, -120\(%rsp\) +** movaps %xmm0, -104\(%rsp\) +** movaps %xmm0, -88\(%rsp\) +** movaps %xmm0, -72\(%rsp\) +** movaps %xmm0, -56\(%rsp\) +** movaps %xmm0, -40\(%rsp\) +** movaps %xmm0, -24\(%rsp\) +** movaps %xmm0, -8\(%rsp\) +** xorl %eax, %eax +** addq \$16, %rsp +** .cfi_def_cfa_offset 8 +** ret +**... +*/ + +extern int scanf (const char *, ...); +extern void *memset (void *, int, __SIZE_TYPE__); + +int +foo (void) +{ + char buf[128]; + +#if USE_SCANF + if (scanf("%s", buf) != 1) + return 42; +#endif + + memset (buf,0, sizeof (buf)); + asm volatile("": : :"memory"); + return 0; +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c b/gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c new file mode 100644 index 000000000000..52f7b8ce242f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-pr70308-1b.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -DUSE_SCANF" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**.LFB[0-9]+: +** .cfi_startproc +** subq \$136, %rsp +** .cfi_def_cfa_offset 144 +** xorl %eax, %eax +** movl \$.LC[0-9]+, %edi +** movq %rsp, %rsi +** call scanf +** cmpl \$1, %eax +** je .L[0-9]+ +** movl \$42, %eax +** addq \$136, %rsp +** .cfi_remember_state +** .cfi_def_cfa_offset 8 +** ret +** .p2align 4,,10 +** .p2align 3 +**.L[0-9]+: +** .cfi_restore_state +** pxor %xmm0, %xmm0 +** movaps %xmm0, \(%rsp\) +** movaps %xmm0, 16\(%rsp\) +** movaps %xmm0, 32\(%rsp\) +** movaps %xmm0, 48\(%rsp\) +** movaps %xmm0, 64\(%rsp\) +** movaps %xmm0, 80\(%rsp\) +** movaps %xmm0, 96\(%rsp\) +** movaps %xmm0, 112\(%rsp\) +** xorl %eax, %eax +** addq \$136, %rsp +** .cfi_def_cfa_offset 8 +** ret +**... +*/ + +extern int scanf (const char *, ...); +extern void *memset (void *, int, __SIZE_TYPE__); + +int +foo (void) +{ + char buf[128]; + +#if USE_SCANF + if (scanf("%s", buf) != 1) + return 42; +#endif + + memset (buf,0, sizeof (buf)); + asm volatile("": : :"memory"); + return 0; +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-25.c b/gcc/testsuite/gcc.target/i386/memset-strategy-25.c new file mode 100644 index 000000000000..040439d1671c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-25.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-sse" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**.LFB[0-9]+: +** .cfi_startproc +** xorl %eax, %eax +**.L[0-9]+: +** movl %eax, %edx +** addl \$32, %eax +** movq \$0, \(%rdi,%rdx\) +** movq \$0, 8\(%rdi,%rdx\) +** movq \$0, 16\(%rdi,%rdx\) +** movq \$0, 24\(%rdi,%rdx\) +** cmpl \$224, %eax +** jb .L[0-9]+ +**... +*/ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 253); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-26.c b/gcc/testsuite/gcc.target/i386/memset-strategy-26.c new file mode 100644 index 000000000000..c53bce52e178 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-26.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-sse" } */ +/* { dg-final { scan-assembler-not "jmp\tmemset" } } */ +/* { dg-final { scan-assembler-not "rep stosb" } } */ + +struct foo +{ + char buf[41]; +}; + +void +zero(struct foo *f) +{ + __builtin_memset(f->buf, 0, sizeof(f->buf)); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-27.c b/gcc/testsuite/gcc.target/i386/memset-strategy-27.c new file mode 100644 index 000000000000..685d6e5a5c21 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-27.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-avx" } */ +/* { dg-final { scan-assembler "jmp\tmemset" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "call\tmemset" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep stosb" } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 257); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-28.c b/gcc/testsuite/gcc.target/i386/memset-strategy-28.c new file mode 100644 index 000000000000..1d173edf9306 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-28.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-sse" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**.LFB[0-9]+: +** .cfi_startproc +** movq \$0, \(%rdi\) +** movq \$0, 8\(%rdi\) +** movq \$0, 16\(%rdi\) +** movq \$0, 24\(%rdi\) +** movq \$0, 32\(%rdi\) +** movq \$0, 40\(%rdi\) +** movq \$0, 48\(%rdi\) +** movq \$0, 56\(%rdi\) +** movb \$0, 64\(%rdi\) +** ret +**... +*/ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 65); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-29.c b/gcc/testsuite/gcc.target/i386/memset-strategy-29.c new file mode 100644 index 000000000000..50470eaba6d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-29.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-sse" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**... +**.LFB[0-9]+: +** .cfi_startproc +** xorl %eax, %eax +**.L[0-9]+: +** movl %eax, %edx +** addl \$32, %eax +** movq \$0, \(%rdi,%rdx\) +** movq \$0, 8\(%rdi,%rdx\) +** movq \$0, 16\(%rdi,%rdx\) +** movq \$0, 24\(%rdi,%rdx\) +** cmpl \$64, %eax +** jb .L[0-9]+ +**... +*/ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 81); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-30.c b/gcc/testsuite/gcc.target/i386/memset-strategy-30.c new file mode 100644 index 000000000000..ef32a9ce4cdc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-30.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-sse" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**... +**.LFB[0-9]+: +** .cfi_startproc +** xorl %eax, %eax +**.L[0-9]+: +** movl %eax, %edx +** addl \$32, %eax +** movq \$0, \(%rdi,%rdx\) +** movq \$0, 8\(%rdi,%rdx\) +** movq \$0, 16\(%rdi,%rdx\) +** movq \$0, 24\(%rdi,%rdx\) +** cmpl \$64, %eax +** jb .L[0-9]+ +**... +*/ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 95); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-31.c b/gcc/testsuite/gcc.target/i386/memset-strategy-31.c new file mode 100644 index 000000000000..17a4df25bb28 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-31.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=generic -mno-avx -msse2" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**foo: +**.LFB[0-9]+: +**... +** pxor %xmm0, %xmm0 +** xorl %eax, %eax +**.L[0-9]+: +** movl %eax, %edx +** addl \$64, %eax +** movups %xmm0, \(%rdi,%rdx\) +** movups %xmm0, 16\(%rdi,%rdx\) +** movups %xmm0, 32\(%rdi,%rdx\) +** movups %xmm0, 48\(%rdi,%rdx\) +** cmpl \$192, %eax +** jb .L[0-9]+ +**... +*/ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 254); +} + +/* { dg-final { scan-assembler-not "rep stos" } } */ diff --git a/gcc/testsuite/gcc.target/i386/mvc17.c b/gcc/testsuite/gcc.target/i386/mvc17.c index 8b83c1aecb36..dbf35ac36dc4 100644 --- a/gcc/testsuite/gcc.target/i386/mvc17.c +++ b/gcc/testsuite/gcc.target/i386/mvc17.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-ifunc "" } */ /* { dg-options "-O2 -march=x86-64" } */ -/* { dg-final { scan-assembler-times "rep mov" 1 } } */ +/* { dg-final { scan-assembler-not "rep mov" } } */ __attribute__((target_clones("default","arch=icelake-server"))) void diff --git a/gcc/testsuite/gcc.target/i386/pr111657-1.c b/gcc/testsuite/gcc.target/i386/pr111657-1.c index a4ba21073f52..fa9f4cfe5c53 100644 --- a/gcc/testsuite/gcc.target/i386/pr111657-1.c +++ b/gcc/testsuite/gcc.target/i386/pr111657-1.c @@ -1,5 +1,26 @@ /* { dg-do assemble } */ /* { dg-options "-O2 -mno-sse -mtune=generic -save-temps" } */ +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } {^\t?\.} } } */ + +/* +**bar: +**... +**.L[0-9]+: +** movl %edx, %eax +** addl \$32, %edx +** movq %gs:m\(%rax\), %r9 +** movq %gs:m\+8\(%rax\), %r8 +** movq %gs:m\+16\(%rax\), %rsi +** movq %gs:m\+24\(%rax\), %rcx +** movq %r9, \(%rdi,%rax\) +** movq %r8, 8\(%rdi,%rax\) +** movq %rsi, 16\(%rdi,%rax\) +** movq %rcx, 24\(%rdi,%rax\) +** cmpl \$224, %edx +** jb .L[0-9]+ +**... +*/ typedef unsigned long uword __attribute__ ((mode (word))); @@ -8,5 +29,4 @@ struct a { uword arr[30]; }; __seg_gs struct a m; void bar (struct a *dst) { *dst = m; } -/* { dg-final { scan-assembler "gs\[ \t\]+rep\[; \t\]+movs(l|q)" { target { ! x32 } } } } */ -/* { dg-final { scan-assembler-not "gs\[ \t\]+rep\[; \t\]+movs(l|q)" { target x32 } } } */ +/* { dg-final { scan-assembler-not "rep movs" } } */ diff --git a/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c b/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c index 4b286671e90b..30b82ab695aa 100644 --- a/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c +++ b/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue -fno-stack-protector" } */ +/* { dg-options "-O2 -mmemset-strategy=rep_8byte:-1:align -fdump-rtl-pro_and_epilogue -fno-stack-protector" } */ enum machine_mode { diff --git a/gcc/testsuite/gcc.target/i386/sw-1.c b/gcc/testsuite/gcc.target/i386/sw-1.c index b0432279644a..14db3cee206a 100644 --- a/gcc/testsuite/gcc.target/i386/sw-1.c +++ b/gcc/testsuite/gcc.target/i386/sw-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mtune=generic -fshrink-wrap -fdump-rtl-pro_and_epilogue -fno-stack-protector" } */ +/* { dg-options "-O2 -mtune=generic -mstringop-strategy=rep_byte -fshrink-wrap -fdump-rtl-pro_and_epilogue -fno-stack-protector" } */ /* { dg-additional-options "-mno-avx" { target ia32 } } */ /* { dg-skip-if "No shrink-wrapping preformed" { x86_64-*-mingw* } } */