On x86, both stores with 32-bit immediate and register are supported: 0: 48 c7 40 10 00 00 00 00 movq $0x0,0x10(%rax) 8: 48 89 50 10 movq %rdx,0x10(%rax)
But store with 32-bit immediate is 4 byte longer. Add UNSPEC_STORE_BY_PIECES to x86 backend for register store to avoid store with 32-bit immediate for shorter encoding and add a target hook to select the store instruction used by the store by_pieces infrastructure so that a target can choose a specific instruction for shorter encoding. When optimizing on x86, we choose register store: 1. If length-changing prefix (LCP) stall is avoided with 16-bit register store. Or 2. If more than 2 stores with 32-bit immediate will be used. gcc/ * expr.c (store_by_pieces_d::prepare_mode): Call targetm.store_by_pieces_icode to get store by_pieces insn code. * target.def (store_by_pieces_icode): New hook. * targhooks.cc (default_store_by_pieces_icode): New. targhooks.h (default_store_by_pieces_icode): Likewise. * config/i386/i386.cc (ix86_store_by_pieces_icode): New. (TARGET_STORE_BY_PIECES_ICODE): Likewise. * config/i386/i386.md (UNSPEC_STORE_BY_PIECES): New. (store_by_pieces_mov<mode>): Likewise. (store_by_pieces_mov<mode>_1): Likewise. * config/i386/x86-tune.def (X86_TUNE_USE_REGISTER_STORE_BY_PIECES): Likewise. * doc/tm.texi: Regenerated. * doc/tm.texi.in: Add TARGET_STORE_BY_PIECES_ICODE. gcc/testsuite/ * gcc.target/i386/memset-strategy-10.c: New test. * gcc.target/i386/memset-strategy-11.c: Likewise. * gcc.target/i386/memset-strategy-12.c: Likewise. * gcc.target/i386/memset-strategy-13.c: Likewise. * gcc.target/i386/memset-strategy-14.c: Likewise. * gcc.target/i386/memset-strategy-15.c: Likewise. * gcc.target/i386/memset-strategy-16.c: Likewise. * gcc.target/i386/memset-strategy-17.c: Likewise. * gcc.target/i386/memset-strategy-18.c: Likewise. * gcc.target/i386/memset-strategy-19.c: Likewise. * gcc.target/i386/memset-strategy-20.c: Likewise. * gcc.target/i386/memset-strategy-21.c: Likewise. * gcc.target/i386/pr72839.c: Scan for register store. OK for master? Thanks. -- H.J.
From 85b3f9e34389cba742ca11cacc88b98877be2151 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Mon, 21 Apr 2025 21:12:35 +0800 Subject: [PATCH] Add TARGET_STORE_BY_PIECES_ICODE On x86, both stores with 32-bit immediate and register are supported: 0: 48 c7 40 10 00 00 00 00 movq $0x0,0x10(%rax) 8: 48 89 50 10 movq %rdx,0x10(%rax) But store with 32-bit immediate is 4 byte longer. Add UNSPEC_STORE_BY_PIECES to x86 backend for register store to avoid store with 32-bit immediate for shorter encoding and add a target hook to select the store instruction used by the store by_pieces infrastructure so that a target can choose a specific instruction for shorter encoding. When optimizing on x86, we choose register store: 1. If length-changing prefix (LCP) stall is avoided with 16-bit register store. Or 2. If more than 2 stores with 32-bit immediate will be used. gcc/ * expr.c (store_by_pieces_d::prepare_mode): Call targetm.store_by_pieces_icode to get store by_pieces insn code. * target.def (store_by_pieces_icode): New hook. * targhooks.cc (default_store_by_pieces_icode): New. targhooks.h (default_store_by_pieces_icode): Likewise. * config/i386/i386.cc (ix86_store_by_pieces_icode): New. (TARGET_STORE_BY_PIECES_ICODE): Likewise. * config/i386/i386.md (UNSPEC_STORE_BY_PIECES): New. (store_by_pieces_mov<mode>): Likewise. (store_by_pieces_mov<mode>_1): Likewise. * config/i386/x86-tune.def (X86_TUNE_USE_REGISTER_STORE_BY_PIECES): Likewise. * doc/tm.texi: Regenerated. * doc/tm.texi.in: Add TARGET_STORE_BY_PIECES_ICODE. gcc/testsuite/ * gcc.target/i386/memset-strategy-10.c: New test. * gcc.target/i386/memset-strategy-11.c: Likewise. * gcc.target/i386/memset-strategy-12.c: Likewise. * gcc.target/i386/memset-strategy-13.c: Likewise. * gcc.target/i386/memset-strategy-14.c: Likewise. * gcc.target/i386/memset-strategy-15.c: Likewise. * gcc.target/i386/memset-strategy-16.c: Likewise. * gcc.target/i386/memset-strategy-17.c: Likewise. * gcc.target/i386/memset-strategy-18.c: Likewise. * gcc.target/i386/memset-strategy-19.c: Likewise. * gcc.target/i386/memset-strategy-20.c: Likewise. * gcc.target/i386/memset-strategy-21.c: Likewise. * gcc.target/i386/pr72839.c: Scan for register store. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.cc | 65 +++++++++++++++++++ gcc/config/i386/i386.md | 23 +++++++ gcc/config/i386/x86-tune.def | 6 ++ gcc/doc/tm.texi | 5 ++ gcc/doc/tm.texi.in | 2 + gcc/expr.cc | 2 +- gcc/target.def | 7 ++ gcc/targhooks.cc | 9 +++ gcc/targhooks.h | 2 + .../gcc.target/i386/memset-strategy-10.c | 9 +++ .../gcc.target/i386/memset-strategy-11.c | 10 +++ .../gcc.target/i386/memset-strategy-12.c | 9 +++ .../gcc.target/i386/memset-strategy-13.c | 10 +++ .../gcc.target/i386/memset-strategy-14.c | 11 ++++ .../gcc.target/i386/memset-strategy-15.c | 14 ++++ .../gcc.target/i386/memset-strategy-16.c | 10 +++ .../gcc.target/i386/memset-strategy-17.c | 13 ++++ .../gcc.target/i386/memset-strategy-18.c | 11 ++++ .../gcc.target/i386/memset-strategy-19.c | 9 +++ .../gcc.target/i386/memset-strategy-20.c | 12 ++++ .../gcc.target/i386/memset-strategy-21.c | 11 ++++ gcc/testsuite/gcc.target/i386/pr72839.c | 2 +- 22 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-10.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-11.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-12.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-13.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-14.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-15.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-16.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-17.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-18.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-19.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-20.c create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-21.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 3171d6e0ad4..a3c6349d14d 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -26666,6 +26666,69 @@ ix86_redzone_clobber () return NULL_RTX; } +/* Implement TARGET_STORE_BY_PIECES_ICODE. */ + +static insn_code +ix86_store_by_pieces_icode (machine_mode mode, + unsigned HOST_WIDE_INT length) +{ + if (optimize) + { + /* Avoid word moves with 32-bit immediate if it will be used more + than twice for shorter encoding. */ + if (ix86_tune_features [X86_TUNE_USE_REGISTER_STORE_BY_PIECES]) + { + if (STORE_MAX_PIECES == UNITS_PER_WORD) + { + /* If the remaining bytes are greater than 2 bytes, the + previous register can be reused. */ + if ((length / UNITS_PER_WORD) > 2 + || (length > (UNITS_PER_WORD * 2) + && (length % UNITS_PER_WORD) > 2)) + switch (mode) + { + case HImode: + return CODE_FOR_store_by_pieces_movhi; + case SImode: + return CODE_FOR_store_by_pieces_movsi; + case DImode: + return CODE_FOR_store_by_pieces_movdi; + default: + break; + } + } + else if (!TARGET_64BIT) + { + /* 1 DImode store will be split into 2 SImore stores. */ + if (mode == DImode && (length % 8) >= 2) + return CODE_FOR_nothing; + else if (mode == SImode + && ((length / 4) > 2 + || (length > 8 && (length % 4) > 2))) + return CODE_FOR_store_by_pieces_movsi; + } + } + + if (TARGET_LCP_STALL) + { + /* If HImode register store will be used, use SImode/DImode + register store so that the previous SImode/DImode register + can be reused. */ + if (mode == SImode || mode == DImode) + { + if ((length % GET_MODE_SIZE (mode)) == 2) + return (mode == DImode + ? CODE_FOR_store_by_pieces_movdi + : CODE_FOR_store_by_pieces_movsi); + } + else if (mode == HImode) + return CODE_FOR_store_by_pieces_movhi; + } + } + + return default_store_by_pieces_icode (mode, length); +} + /* Target-specific selftests. */ #if CHECKING_P @@ -27111,6 +27174,8 @@ static const scoped_attribute_specs *const ix86_attribute_table[] = #undef TARGET_OVERLAP_OP_BY_PIECES_P #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true +#undef TARGET_STORE_BY_PIECES_ICODE +#define TARGET_STORE_BY_PIECES_ICODE ix86_store_by_pieces_icode #undef TARGET_FLAGS_REGNUM #define TARGET_FLAGS_REGNUM FLAGS_REG diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e170da3b0e6..aeba9f8b2eb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -118,6 +118,7 @@ (define_c_enum "unspec" [ UNSPEC_POPFL UNSPEC_OPTCOMX UNSPEC_SETCC_SI_SLP + UNSPEC_STORE_BY_PIECES ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -2417,6 +2418,28 @@ (define_expand "mov<mode>" "" "ix86_expand_move (<MODE>mode, operands); DONE;") +;; SI/DI mode register stores used by store by_pieces for shorter +;; encoding. +(define_expand "store_by_pieces_mov<mode>" + [(set (match_operand:SWI248x 0 "memory_operand") + (match_operand:SWI248x 1 "general_operand"))] + "" +{ + operands[1] = force_reg (<MODE>mode, operands[1]); + emit_insn (gen_store_by_pieces_mov<mode>_1 (operands[0], + operands[1])); + DONE; +}) + +(define_insn "store_by_pieces_mov<mode>_1" + [(set (match_operand:SWI248x 0 "memory_operand" "=m") + (unspec:SWI248x [(match_operand:SWI248x 1 "register_operand" "r")] + UNSPEC_STORE_BY_PIECES))] + "" + "mov<SWI248x:imodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "<MODE>")]) + (define_insn "*mov<mode>_xor" [(set (match_operand:SWI48 0 "register_operand" "=r") (match_operand:SWI48 1 "const0_operand")) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c857e769b60..c0e466e991a 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -636,6 +636,12 @@ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues", m_ZNVER4 | m_ZNVER5) +/* X86_TUNE_USE_REGISTER_STORE_BY_PIECES: Generate store_by_pieces with + register store for shorter encoding. */ +DEF_TUNE (X86_TUNE_USE_REGISTER_STORE_BY_PIECES, + "use_register_store_by_pieces", + HOST_WIDE_INT_M1U) + /*****************************************************************************/ /*****************************************************************************/ /* Historical relics: tuning flags that helps a specific old CPU designs */ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index a96700c0d38..0061b2e2dd0 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -7186,6 +7186,11 @@ particular mode from being used for block comparisons by returning a negative number from this hook. @end deftypefn +@deftypefn {Target Hook} insn_code TARGET_STORE_BY_PIECES_ICODE (machine_mode @var{mode}, unsigned HOST_WIDE_INT @var{length}) +This target hook returns insn_code to move up to @var{length} bytes in +@var{mode} used by the store @code{by_pieces} infrastructure. +@end deftypefn + @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit a load or store used to copy memory is. Defaults to @code{MOVE_MAX}. diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index eccc4d88493..e8cd831ad32 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4639,6 +4639,8 @@ If you don't define this, a reasonable default is used. @hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO +@hook TARGET_STORE_BY_PIECES_ICODE + @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit a load or store used to copy memory is. Defaults to @code{MOVE_MAX}. diff --git a/gcc/expr.cc b/gcc/expr.cc index 3815c565e2d..2b56b1bf983 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -1714,7 +1714,7 @@ class store_by_pieces_d : public op_by_pieces_d bool store_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align) { - insn_code icode = optab_handler (mov_optab, mode); + insn_code icode = targetm.store_by_pieces_icode (mode, m_len); m_gen_fun = GEN_FCN (icode); return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode); } diff --git a/gcc/target.def b/gcc/target.def index 6c7cdc8126b..bfe33c4a614 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3909,6 +3909,13 @@ negative number from this hook.", int, (machine_mode mode), default_compare_by_pieces_branch_ratio) +DEFHOOK +(store_by_pieces_icode, + "This target hook returns insn_code to move up to @var{length} bytes in\n\ +@var{mode} used by the store @code{by_pieces} infrastructure.", + insn_code, (machine_mode mode, unsigned HOST_WIDE_INT length), + default_store_by_pieces_icode) + DEFHOOK (slow_unaligned_access, "This hook returns true if memory accesses described by the\n\ diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index c79458e374e..918e7b564a0 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -2196,6 +2196,15 @@ default_compare_by_pieces_branch_ratio (machine_mode) return 1; } +/* This target hook returns insn_code to move the MODE memory used by the + store by_pieces infrastructure. */ + +insn_code +default_store_by_pieces_icode (machine_mode mode, unsigned HOST_WIDE_INT) +{ + return optab_handler (mov_optab, mode); +} + /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function entry. If RECORD_P is true and the target supports named sections, the location of the NOPs will be recorded in a special object section diff --git a/gcc/targhooks.h b/gcc/targhooks.h index f16b58798c2..c04a47c5897 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -249,6 +249,8 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, enum by_pieces_operation, bool); extern int default_compare_by_pieces_branch_ratio (machine_mode); +extern insn_code default_store_by_pieces_icode (machine_mode, + unsigned HOST_WIDE_INT); extern void default_print_patchable_function_entry (FILE *, unsigned HOST_WIDE_INT, diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-10.c b/gcc/testsuite/gcc.target/i386/memset-strategy-10.c new file mode 100644 index 00000000000..1ac39df097b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-10.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-not "mov\[lq\]?\[\\t \]*\\$\[0\]," } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 23); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-11.c b/gcc/testsuite/gcc.target/i386/memset-strategy-11.c new file mode 100644 index 00000000000..67080e22fde --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-11.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-not "mov\[lq\]?\[\\t \]*\\$\[0\]," { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*\\$\[0\]," 2 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 17); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-12.c b/gcc/testsuite/gcc.target/i386/memset-strategy-12.c new file mode 100644 index 00000000000..bdd06aa685b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-12.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*\\$\[0\]," 2 } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 7); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-13.c b/gcc/testsuite/gcc.target/i386/memset-strategy-13.c new file mode 100644 index 00000000000..3ba6218e2c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-13.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%e" 5 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*%r" 3 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 21); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-14.c b/gcc/testsuite/gcc.target/i386/memset-strategy-14.c new file mode 100644 index 00000000000..398401c9b5b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-14.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%e" 5 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*%rax," 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%eax," 1 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 20); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-15.c b/gcc/testsuite/gcc.target/i386/memset-strategy-15.c new file mode 100644 index 00000000000..f96646a4637 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-15.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake" } */ +/* { dg-final { scan-assembler-times "xorl\[\\t \]*%edx,\[\\t \]*%edx" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%edx," 2 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movw\[\\t \]*%dx," 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "xorl\[\\t \]*%eax,\[\\t \]*%eax" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*%rax," 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movw\[\\t \]*%ax," 1 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 10); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-16.c b/gcc/testsuite/gcc.target/i386/memset-strategy-16.c new file mode 100644 index 00000000000..afedfac2b25 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-16.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*\\%e" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*\\$\[0\]," 1 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-17.c b/gcc/testsuite/gcc.target/i386/memset-strategy-17.c new file mode 100644 index 00000000000..df6dbf95cab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-17.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -mtune=tigerlake" } */ +/* { dg-final { scan-assembler-not "xorl" } } */ +/* { dg-final { scan-assembler-times "movups" 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-not "movl\[\\t \]*\\$\[0\]," { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movups" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*\\$\[0\]," 1 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 40); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-18.c b/gcc/testsuite/gcc.target/i386/memset-strategy-18.c new file mode 100644 index 00000000000..ba601cc9709 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-18.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake" } */ +/* { dg-final { scan-assembler-times "xorl\[\\t \]*%e\[ad\]x,\[\\t \]*%e\[ad\]x" 1 } } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%e\[ad\]x," 1 } } */ +/* { dg-final { scan-assembler-times "movw\[\\t \]*%\[ad\]x," 1 } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 6); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-19.c b/gcc/testsuite/gcc.target/i386/memset-strategy-19.c new file mode 100644 index 00000000000..d7964f7fc0e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-19.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake" } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*\\$\[0\]," 1 } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 4); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-20.c b/gcc/testsuite/gcc.target/i386/memset-strategy-20.c new file mode 100644 index 00000000000..e8ae32aec0c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-20.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake" } */ +/* { dg-final { scan-assembler-times "xorl\[\\t \]*%edx,\[\\t \]*%edx" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%edx," 3 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*\\$\[0\]," 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*\\$\[0\]," 1 { target { ! ia32 } } } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 11); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-21.c b/gcc/testsuite/gcc.target/i386/memset-strategy-21.c new file mode 100644 index 00000000000..c2f371862c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-21.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=tigerlake -mno-sse" } */ +/* { dg-final { scan-assembler-times "movl\[\\t \]*%e" 4 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "movq\[\\t \]*%rax," 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "movw\[\\t \]*%\[ad\]x," 1 } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 18); +} diff --git a/gcc/testsuite/gcc.target/i386/pr72839.c b/gcc/testsuite/gcc.target/i386/pr72839.c index 6888d9d0a55..ecdf8609c66 100644 --- a/gcc/testsuite/gcc.target/i386/pr72839.c +++ b/gcc/testsuite/gcc.target/i386/pr72839.c @@ -12,6 +12,6 @@ foo (char *s) "1234567"); } -/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\$\[0-9\]+, \[0-9\]*\\(%\[^,\]+\\)" 16 } } */ +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\%e\[a-es\]\[ix\], \[0-9\]*\\(%\[^,\]+\\)" 16 } } */ /* { dg-final { scan-assembler-not "rep movsl" } } */ /* { dg-final { scan-assembler-not "rep movsb" } } */ -- 2.49.0