https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97759
--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> --- for testcase: --- #include<stdbool.h> bool is_power2_popcnt (int a) { return __builtin_popcount (a) == 1; } bool is_power2_arithmetic (int a) { return !(a & (a - 1)) && a; } --- gcc -O2 -mavx2 -S got --- .file "test.c" .text .p2align 4 .globl is_power2_popcnt .type is_power2_popcnt, @function is_power2_popcnt: .LFB0: .cfi_startproc popcntl %edi, %edi cmpl $1, %edi sete %al ret .cfi_endproc .LFE0: .size is_power2_popcnt, .-is_power2_popcnt .p2align 4 .globl is_power2_arithmetic .type is_power2_arithmetic, @function is_power2_arithmetic: .LFB1: .cfi_startproc leal -1(%rdi), %eax testl %edi, %eax sete %al testl %edi, %edi setne %dl andl %edx, %eax ret .cfi_endproc --- Latency of popcnt is 3, others is 1. Can't tell which version is better from static observation.