https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121011

            Bug ID: 121011
           Summary: Bad optimizations by GCC 15.0.1 from Fedora
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vda.linux at googlemail dot com
  Target Milestone: ---

void xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count) {
        unsigned char *d = dst;
        const unsigned char *s1 = src1;
        const unsigned char *s2 = src2;
#define longcount ~(unsigned)(sizeof(long)-1)
        while (count & longcount) {
                *(long*)d = *(long*)s1 ^ *(long*)s2;
                count -= sizeof(long);
                d += sizeof(long);
                s1 += sizeof(long);
                s2 += sizeof(long);
        }
        while (count--)
                *d++ = *s1++ ^ *s2++;
}

$ gcc -Os -S bitops.c

The result is rather bad.

        .file   "bitops.c"
        .text
        .globl  xorbuf_3
        .type   xorbuf_3, @function
xorbuf_3:
.LFB0:
        .cfi_startproc
        xorl    %eax, %eax
.L2:
        movl    %ecx, %r8d
        subl    %eax, %r8d
        cmpl    $7, %r8d
        jbe     .L7
        movq    (%rsi,%rax), %r8
        xorq    (%rdx,%rax), %r8
        movq    %r8, (%rdi,%rax)
        addq    $8, %rax
        jmp     .L2
.L7:
        movl    %ecx, %eax
        shrl    $3, %eax
        leal    0(,%rax,8), %r8d
# 3 insns above: bold effort to calculate the number of bytes we processed. too
bad it was already present in %rax...

        imull   $-8, %eax, %eax
        addq    %r8, %rdx
        addq    %r8, %rsi
        addq    %r8, %rdi
        leal    (%rax,%rcx), %r8d
# subtracting (%rax * 8) from %rcx by... %rcx += %rax * -8! With IMUL!! Even
though we happen to have a ready (%rax * 8) result in %r8 and could just "subq
%r8, %rcx"!!!

        xorl    %eax, %eax
.L4:
        cmpq    %r8, %rax
        je      .L8
        movb    (%rsi,%rax), %cl
        xorb    (%rdx,%rax), %cl
        movb    %cl, (%rdi,%rax)
        incq    %rax
        jmp     .L4
.L8:
        ret
        .cfi_endproc
.LFE0:
        .size   xorbuf_3, .-xorbuf_3
        .ident  "GCC: (GNU) 15.0.1 20250418 (Red Hat 15.0.1-0)"
        .section        .note.GNU-stack,"",@progbits

$  gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/15/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-redhat-linux
Configured with: ../configure --enable-bootstrap
--enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,m2,cobol,lto
--prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info
--with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared
--enable-threads=posix --enable-checking=release --enable-multilib
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions
--enable-gnu-unique-object --enable-linker-build-id
--with-gcc-major-version-only --enable-libstdcxx-backtrace
--with-libstdcxx-zoneinfo=/usr/share/zoneinfo --with-linker-hash-style=gnu
--enable-plugin --enable-initfini-array
--with-isl=/builddir/build/BUILD/gcc-15.0.1-build/gcc-15.0.1-20250418/obj-x86_64-redhat-linux/isl-install
--enable-offload-targets=nvptx-none,amdgcn-amdhsa --enable-offload-defaulted
--without-cuda-driver --enable-gnu-indirect-function --enable-cet
--with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux
--with-build-config=bootstrap-lto --enable-link-serialization=1
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 15.0.1 20250418 (Red Hat 15.0.1-0) (GCC)

Reply via email to