https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112835
Florian Weimer <fw at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |fw at gcc dot gnu.org
--- Comment #3 from Florian Weimer <fw at gcc dot gnu.org> ---
There's a related issue on x86-64 maybe?
#include <string.h>
int
f (const char *a, const char *b)
{
return memcmp (a, b, 64) == 0;
}
produces with -O2 -march=x86-64-v3:
f:
vmovdqu (%rdi), %ymm0
vpxor (%rsi), %ymm0, %ymm0
vptest %ymm0, %ymm0
jne .L2
vmovdqu 32(%rdi), %ymm0
vpxor 32(%rsi), %ymm0, %ymm0
vptest %ymm0, %ymm0
je .L5
.L2:
movl $1, %eax
xorl $1, %eax
vzeroupper
ret
.p2align 4,,10
.p2align 3
.L5:
xorl %eax, %eax
xorl $1, %eax
vzeroupper
ret
The 32-byte comparison produces a branchless sequence, so I would expect
something like this:
f:
xorl %eax, %eax
vmovdqu (%rdi), %ymm0
vpxor (%rsi), %ymm0, %ymm0
vptest %ymm0, %ymm0
jne 1f
vmovdqu 32(%rdi), %ymm0
vpxor 32(%rsi), %ymm0, %ymm0
vptest %ymm0, %ymm0
sete %al
1:
vzeroupper
ret