https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120683

H.J. Lu <hjl.tools at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|vector_loop generates poor  |vector_loop/unrolled_loop
                   |codes on memset/memcpy      |generates poor codes on
                   |                            |memset/memcpy

--- Comment #3 from H.J. Lu <hjl.tools at gmail dot com> ---
unrolled_loop generates:

[hjl@gnu-tgl-3 pr120683]$ cat x.c
void
foo (char *dest)
{
  __builtin_memset (dest, 0, 61);
}
[hjl@gnu-tgl-3 pr120683]$ make x.s
/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/ -O2
-minline-all-stringops
-mmemset-strategy=unrolled_loop:256:noalign,libcall:-1:noalign -mno-sse -S x.c
[hjl@gnu-tgl-3 pr120683]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4
        .globl  foo
        .type   foo, @function
foo:
.LFB0:
        .cfi_startproc
        xorl    %eax, %eax
.L2:
        movl    %eax, %edx
        movq    $0, (%rdi,%rdx)
        movq    $0, 8(%rdi,%rdx)
        movq    $0, 16(%rdi,%rdx)
        movq    $0, 24(%rdi,%rdx)
        addl    $32, %eax
        jc      .L2 <<<<<<<< Why is this needed?
        movb    $0, 28(%rdi,%rax)
        andq    $0, (%rdi,%rax)
        andq    $0, 8(%rdi,%rax)
        andq    $0, 16(%rdi,%rax)
        andl    $0, 24(%rdi,%rax)
        ret
        .cfi_endproc
.LFE0:
        .size   foo, .-foo
        .ident  "GCC: (GNU) 16.0.0 20250617 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-tgl-3 pr120683]$

Reply via email to