On Fri, Aug 29, 2025 at 2:55 AM H.J. Lu <[email protected]> wrote: > > Since > > commit 401199377c50045ede560daf3f6e8b51749c2a87 > Author: H.J. Lu <[email protected]> > Date: Tue Jun 17 10:17:17 2025 +0800 > > x86: Improve vector_loop/unrolled_loop for memset/memcpy > > uses move_by_pieces and store_by_pieces to expand memcpy/memset epilogue > with vector_loop even when targetm.use_by_pieces_infrastructure_p returns > false, which triggers > > gcc_assert (targetm.use_by_pieces_infrastructure_p > (len, align, > memsetp ? SET_BY_PIECES : STORE_BY_PIECES, > optimize_insn_for_speed_p ())); > > in store_by_pieces. Fix it by: > > 1. Add by_pieces_in_use to machine_function to indicate that by_pieces op > is currently in use. > 2. Set and clear by_pieces_in_use when expanding memcpy/memset epilogue > with move_by_pieces and store_by_pieces. > 3. Define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P to return true if > by_pieces_in_use is true. > > gcc/ > > PR target/121096 > * config/i386/i386-expand.cc (expand_cpymem_epilogue): Set and > clear by_pieces_in_use when using by_pieces op. > (expand_setmem_epilogue): Likewise. > * config/i386/i386.cc (ix86_use_by_pieces_infrastructure_p): New. > (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Likewise. > * config/i386/i386.h (machine_function): Add by_pieces_in_use. > > gcc/testsuite/ > > PR target/121096 > * gcc.target/i386/memcpy-strategy-14.c: New test. > * gcc.target/i386/memcpy-strategy-15.c: Likewise. > * gcc.target/i386/memset-strategy-10.c: Likewise. > * gcc.target/i386/memset-strategy-11.c: Likewise. > * gcc.target/i386/memset-strategy-12.c: Likewise. > * gcc.target/i386/memset-strategy-13.c: Likewise. > * gcc.target/i386/memset-strategy-14.c: Likewise. > * gcc.target/i386/memset-strategy-15.c: Likewise.
OK, although it looks a bit hackish. Thanks, Uros. > > Signed-off-by: H.J. Lu <[email protected]> > --- > gcc/config/i386/i386-expand.cc | 4 +++ > gcc/config/i386/i386.cc | 21 +++++++++++++++ > gcc/config/i386/i386.h | 3 +++ > .../gcc.target/i386/memcpy-strategy-14.c | 10 +++++++ > .../gcc.target/i386/memcpy-strategy-15.c | 10 +++++++ > .../gcc.target/i386/memset-strategy-10.c | 24 +++++++++++++++++ > .../gcc.target/i386/memset-strategy-11.c | 9 +++++++ > .../gcc.target/i386/memset-strategy-12.c | 8 ++++++ > .../gcc.target/i386/memset-strategy-13.c | 26 +++++++++++++++++++ > .../gcc.target/i386/memset-strategy-14.c | 8 ++++++ > .../gcc.target/i386/memset-strategy-15.c | 9 +++++++ > 11 files changed, 132 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c > create mode 100644 gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c > create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-10.c > create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-11.c > create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-12.c > create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-13.c > create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-14.c > create mode 100644 gcc/testsuite/gcc.target/i386/memset-strategy-15.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index ef6c12cd569..ec211d11a94 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -8241,8 +8241,10 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem, > unsigned HOST_WIDE_INT countval = UINTVAL (count); > unsigned HOST_WIDE_INT epilogue_size = countval % max_size; > unsigned int destalign = MEM_ALIGN (destmem); > + cfun->machine->by_pieces_in_use = true; > move_by_pieces (destmem, srcmem, epilogue_size, destalign, > RETURN_BEGIN); > + cfun->machine->by_pieces_in_use = false; > return; > } > if (max_size > 8) > @@ -8490,9 +8492,11 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx > value, rtx vec_value, > unsigned HOST_WIDE_INT countval = UINTVAL (count); > unsigned HOST_WIDE_INT epilogue_size = countval % max_size; > unsigned int destalign = MEM_ALIGN (destmem); > + cfun->machine->by_pieces_in_use = true; > store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val, > vec_value ? vec_value : value, destalign, true, > RETURN_BEGIN); > + cfun->machine->by_pieces_in_use = false; > return; > } > if (max_size > 32) > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 1ca6c612137..471be3e8615 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -11382,6 +11382,23 @@ ix86_address_cost (rtx x, machine_mode, > addr_space_t, bool) > > return cost; > } > + > +/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ > + > +bool > +ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, > + unsigned int align, > + enum by_pieces_operation op, > + bool speed_p) > +{ > + /* Return true when we are currently expanding memcpy/memset epilogue > + with move_by_pieces or store_by_pieces. */ > + if (cfun->machine->by_pieces_in_use) > + return true; > + > + return default_use_by_pieces_infrastructure_p (size, align, op, > + speed_p); > +} > > /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as > this is used for to form addresses to local data when -fPIC is in > @@ -27934,6 +27951,10 @@ static const scoped_attribute_specs *const > ix86_attribute_table[] = > #undef TARGET_ADDRESS_COST > #define TARGET_ADDRESS_COST ix86_address_cost > > +#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P > +#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ > + ix86_use_by_pieces_infrastructure_p > + > #undef TARGET_OVERLAP_OP_BY_PIECES_P > #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 2eb141bab1a..ac0ce687f36 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -2954,6 +2954,9 @@ struct GTY(()) machine_function { > /* True if this is a recursive function. */ > BOOL_BITFIELD recursive_function : 1; > > + /* True if by_pieces op is currently in use. */ > + BOOL_BITFIELD by_pieces_in_use : 1; > + > /* The largest alignment, in bytes, of stack slot actually used. */ > unsigned int max_used_stack_alignment; > > diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c > b/gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c > new file mode 100644 > index 00000000000..44cd6523029 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-14.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mno-avx -msse2 -mtune=generic -minline-all-stringops > -mstringop-strategy=vector_loop" } */ > +/* { dg-final { scan-assembler-times "movaps" 8 } } */ > + > +char a[2048]; > +char b[2048]; > +void t (void) > +{ > + __builtin_memcpy (a, b, 2048); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c > b/gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c > new file mode 100644 > index 00000000000..ea8e4be4ac4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-15.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mno-avx -msse2 -mtune=generic -minline-all-stringops > -mstringop-strategy=vector_loop" } */ > +/* { dg-final { scan-assembler-times "movups" 8 } } */ > + > +char *a; > +char *b; > +void t (void) > +{ > + __builtin_memcpy (a, b, 2048); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-10.c > b/gcc/testsuite/gcc.target/i386/memset-strategy-10.c > new file mode 100644 > index 00000000000..d6f2f4ed7ff > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-10.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -march=x86-64 -mstringop-strategy=vector_loop" } */ > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } > {^\t?\.} } } */ > + > +/* > +**foo: > +**.LFB[0-9]+: > +** .cfi_startproc > +** xorps %xmm0, %xmm0 > +** xorl %eax, %eax > +** movq %rax, 48\(%(e|r)di\) > +** movups %xmm0, \(%(e|r)di\) > +** movups %xmm0, 16\(%(e|r)di\) > +** movups %xmm0, 32\(%(e|r)di\) > +** ret > +**... > +*/ > + > +void > +foo (char *a) > +{ > + __builtin_memset (a, 0, 56); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-11.c > b/gcc/testsuite/gcc.target/i386/memset-strategy-11.c > new file mode 100644 > index 00000000000..851c6faaa09 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-11.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mno-avx -msse2 -mtune=generic -minline-all-stringops > -mstringop-strategy=vector_loop" } */ > +/* { dg-final { scan-assembler-times "movaps" 4 } } */ > + > +char a[2048]; > +void t (void) > +{ > + __builtin_memset (a, 0, 2048); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-12.c > b/gcc/testsuite/gcc.target/i386/memset-strategy-12.c > new file mode 100644 > index 00000000000..06cac03426a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-12.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mno-sse -mstringop-strategy=vector_loop" } */ > + > +void > +foo (char *a) > +{ > + __builtin_memset (a, 0, 56); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-13.c > b/gcc/testsuite/gcc.target/i386/memset-strategy-13.c > new file mode 100644 > index 00000000000..cc2129f60eb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-13.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mno-sse -mstringop-strategy=unrolled_loop" } */ > +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ > +/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } > {^\t?\.} } } */ > + > +/* > +**foo: > +**.LFB[0-9]+: > +** .cfi_startproc > +** xorl %eax, %eax > +** movq %rax, \(%(e|r)di\) > +** movq %rax, 8\(%(e|r)di\) > +** movq %rax, 16\(%(e|r)di\) > +** movq %rax, 24\(%(e|r)di\) > +** movq %rax, 32\(%(e|r)di\) > +** movq %rax, 40\(%(e|r)di\) > +** movq %rax, 48\(%(e|r)di\) > +** ret > +**... > +*/ > + > +void > +foo (char *a) > +{ > + __builtin_memset (a, 0, 56); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-14.c > b/gcc/testsuite/gcc.target/i386/memset-strategy-14.c > new file mode 100644 > index 00000000000..144235ee082 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-14.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -march=x86-64 -mstringop-strategy=vector_loop" } */ > + > +void > +foo (char *a, int c) > +{ > + __builtin_memset (a, c, 56); > +} > diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-15.c > b/gcc/testsuite/gcc.target/i386/memset-strategy-15.c > new file mode 100644 > index 00000000000..66f9fa60049 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-15.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Os -mno-avx -msse2 -mtune=generic > -mstringop-strategy=vector_loop" } */ > +/* { dg-final { scan-assembler-times "movups" 4} } */ > + > +char *a; > +void t (void) > +{ > + __builtin_memset (a, 0, 2048); > +} > -- > 2.51.0 >
