https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104610
--- Comment #22 from Hongtao.liu <crazylht at gmail dot com> ---
For 64-byte memory comparison
int compare (const char* s1, const char* s2)
{
return __builtin_memcmp (s1, s2, 64) == 0;
}
We're generating
vmovdqu (%rsi), %ymm0
vpxorq (%rdi), %ymm0, %ymm0
vptest %ymm0, %ymm0
jne .L2
vmovdqu 32(%rsi), %ymm0
vpxorq 32(%rdi), %ymm0, %ymm0
vptest %ymm0, %ymm0
je .L5
.L2:
movl $1, %eax
xorl $1, %eax
vzeroupper
ret
An alternative way is using vpcmpeq + kortest and check Carry bit
vmovdqu64 (%rsi), %zmm0
xorl %eax, %eax
vpcmpeqd (%rdi), %zmm0, %k0
kortestw %k0, %k0
setc %al
vzeroupper
Not sure if it's better or not.