https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89670
--- Comment #8 from Jörn Engel <joern at purestorage dot com> --- Updated testcase below fails to remove the branch with my gcc-8. /* * usage: * gcc -std=gnu11 -Wall -Wextra -g -march=core-avx2 -mbmi -fPIC -O3 % && ./a.out < /dev/zero */ #include <stdint.h> #include <stdio.h> #include <unistd.h> #include <x86intrin.h> typedef uint8_t u8_256 __attribute__((vector_size(32), may_alias)); typedef char c256 __attribute__((vector_size(32), may_alias)); typedef uint8_t u256u __attribute__((vector_size(32), may_alias, aligned(1))); static inline u8_256 read256(const void *buf) { return *(const u256u *)buf; } static inline int movemask8_256(u8_256 mask) { return __builtin_ia32_pmovmskb256((c256)mask); } static inline int matchlen32(const void *a, const void *b) { int mask = ~movemask8_256(read256(a) == read256(b)); return mask ? __builtin_ctz(mask) : 32; } static int ml30(const void *src) { int ml = matchlen32(src, src + 1); if (ml >= 30) ml += matchlen32(src + 32, src + 1 + 32); return ml; } static int ml32(const void *src) { int ml = matchlen32(src, src + 1); if (ml >= 32) ml += matchlen32(src + 32, src + 1 + 32); return ml; } int main(void) { uint8_t src[256]; ssize_t n; n = read(0, src, sizeof(src)); if (n != sizeof(src)) return -1; printf("should be 64: %d\n", ml30(src)); printf("should be 64: %d\n", ml32(src)); return 0; }