https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120683
H.J. Lu <hjl.tools at gmail dot com> changed: What |Removed |Added ---------------------------------------------------------------------------- Last reconfirmed| |2025-06-16 Ever confirmed|0 |1 Status|UNCONFIRMED |NEW Summary|vector_loop generates |vector_loop generates |horrible prologue and |horrible prologue and |epilogue on memset |epilogue on memset/memcpy --- Comment #1 from H.J. Lu <hjl.tools at gmail dot com> --- memcpy isn't much better: --- #include <sys/types.h> void foo (void *dest, void *src, size_t len) { __builtin_memcpy (dest, src, len); } --- -O2 -mmemcpy-strategy=vector_loop:256:noalign,libcall:-1:noalign -minline-all-stringops generates: foo: .LFB6: .cfi_startproc movq %rdi, %r8 movq %rsi, %rax cmpq $64, %rdx jnb .L13 .L2: andl $63, %edx je .L1 xorl %ecx, %ecx .L5: movzbl (%rax,%rcx), %esi movb %sil, (%r8,%rcx) addq $1, %rcx cmpq %rdx, %rcx jb .L5 .L1: ret .p2align 4,,10 .p2align 3 .L13: movq %rdx, %rcx xorl %eax, %eax andq $-64, %rcx .L3: movdqu (%rsi,%rax), %xmm3 movdqu 16(%rsi,%rax), %xmm2 movdqu 32(%rsi,%rax), %xmm1 movdqu 48(%rsi,%rax), %xmm0 movups %xmm3, (%rdi,%rax) movups %xmm2, 16(%rdi,%rax) movups %xmm1, 32(%rdi,%rax) movups %xmm0, 48(%rdi,%rax) addq $64, %rax cmpq %rcx, %rax jb .L3 leaq (%rdi,%rax), %r8 addq %rsi, %rax jmp .L2 .cfi_endproc .LFE6: .size foo, .-foo