https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121011
Bug ID: 121011 Summary: Bad optimizations by GCC 15.0.1 from Fedora Product: gcc Version: 15.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: vda.linux at googlemail dot com Target Milestone: --- void xorbuf_3(void *dst, const void *src1, const void *src2, unsigned count) { unsigned char *d = dst; const unsigned char *s1 = src1; const unsigned char *s2 = src2; #define longcount ~(unsigned)(sizeof(long)-1) while (count & longcount) { *(long*)d = *(long*)s1 ^ *(long*)s2; count -= sizeof(long); d += sizeof(long); s1 += sizeof(long); s2 += sizeof(long); } while (count--) *d++ = *s1++ ^ *s2++; } $ gcc -Os -S bitops.c The result is rather bad. .file "bitops.c" .text .globl xorbuf_3 .type xorbuf_3, @function xorbuf_3: .LFB0: .cfi_startproc xorl %eax, %eax .L2: movl %ecx, %r8d subl %eax, %r8d cmpl $7, %r8d jbe .L7 movq (%rsi,%rax), %r8 xorq (%rdx,%rax), %r8 movq %r8, (%rdi,%rax) addq $8, %rax jmp .L2 .L7: movl %ecx, %eax shrl $3, %eax leal 0(,%rax,8), %r8d # 3 insns above: bold effort to calculate the number of bytes we processed. too bad it was already present in %rax... imull $-8, %eax, %eax addq %r8, %rdx addq %r8, %rsi addq %r8, %rdi leal (%rax,%rcx), %r8d # subtracting (%rax * 8) from %rcx by... %rcx += %rax * -8! With IMUL!! Even though we happen to have a ready (%rax * 8) result in %r8 and could just "subq %r8, %rcx"!!! xorl %eax, %eax .L4: cmpq %r8, %rax je .L8 movb (%rsi,%rax), %cl xorb (%rdx,%rax), %cl movb %cl, (%rdi,%rax) incq %rax jmp .L4 .L8: ret .cfi_endproc .LFE0: .size xorbuf_3, .-xorbuf_3 .ident "GCC: (GNU) 15.0.1 20250418 (Red Hat 15.0.1-0)" .section .note.GNU-stack,"",@progbits $ gcc -v Using built-in specs. COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/15/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,fortran,objc,obj-c++,ada,go,d,m2,cobol,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --enable-libstdcxx-backtrace --with-libstdcxx-zoneinfo=/usr/share/zoneinfo --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl=/builddir/build/BUILD/gcc-15.0.1-build/gcc-15.0.1-20250418/obj-x86_64-redhat-linux/isl-install --enable-offload-targets=nvptx-none,amdgcn-amdhsa --enable-offload-defaulted --without-cuda-driver --enable-gnu-indirect-function --enable-cet --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux --with-build-config=bootstrap-lto --enable-link-serialization=1 Thread model: posix Supported LTO compression algorithms: zlib zstd gcc version 15.0.1 20250418 (Red Hat 15.0.1-0) (GCC)