https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89670

--- Comment #8 from Jörn Engel <joern at purestorage dot com> ---
Updated testcase below fails to remove the branch with my gcc-8.

/*
 * usage:
 * gcc -std=gnu11 -Wall -Wextra -g -march=core-avx2 -mbmi -fPIC -O3 % &&
./a.out < /dev/zero
 */
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <x86intrin.h>

typedef uint8_t u8_256 __attribute__((vector_size(32), may_alias));
typedef char      c256 __attribute__((vector_size(32), may_alias));
typedef uint8_t  u256u __attribute__((vector_size(32), may_alias, aligned(1)));

static inline  u8_256 read256(const void *buf) { return *(const u256u *)buf; }

static inline int movemask8_256(u8_256 mask)
{
        return __builtin_ia32_pmovmskb256((c256)mask);
}

static inline int matchlen32(const void *a, const void *b)
{
        int mask = ~movemask8_256(read256(a) == read256(b));
        return mask ? __builtin_ctz(mask) : 32;
}

static int ml30(const void *src)
{
        int ml = matchlen32(src, src + 1);
        if (ml >= 30)
                ml += matchlen32(src + 32, src + 1 + 32);
        return ml;
}

static int ml32(const void *src)
{
        int ml = matchlen32(src, src + 1);
        if (ml >= 32)
                ml += matchlen32(src + 32, src + 1 + 32);
        return ml;
}

int main(void)
{
        uint8_t src[256];
        ssize_t n;

        n = read(0, src, sizeof(src));
        if (n != sizeof(src))
                return -1;
        printf("should be 64: %d\n", ml30(src));
        printf("should be 64: %d\n", ml32(src));
        return 0;
}

Reply via email to