https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119186

            Bug ID: 119186
           Summary: Using __builtin_ctz results in a error result.
           Product: gcc
           Version: 12.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: drfeng08 at gmail dot com
  Target Milestone: ---

> g++ --version
g++ (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0
Copyright (C) 2022 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> cat reproduce.cpp 
#include <immintrin.h>

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iostream>

inline int sse_memcmp2(const char* p1, const char* p2, int size) {
    __m128i left = _mm_lddqu_si128((__m128i*)(p1));
    __m128i right = _mm_lddqu_si128((__m128i*)(p2));
    __m128i nz = ~_mm_cmpeq_epi8(left, right);
    unsigned short mask = _mm_movemask_epi8(nz);
    int index = __builtin_ctz(mask);
    asm volatile("" : : : "memory");
    std::cout << index << ":" << size << std::endl;
    if (index >= size) return 0;
    int l = (uint8_t)p1[index];
    int r = (uint8_t)p2[index];
    return l - r;
}

int main() {
    const char c1[32] = "0123456789abcdef";
    const char c2[32] = "0123456789abcdef";

    size_t length = 16;

    int res = memcmp(c1, c2, length);
    int res2 = sse_memcmp2(c1, c2, length);
    if (res != res2) {
        std::abort();
    }
    std::cout << "finished" << std::endl;
    return 0;
}
> g++ reproduce.cpp -msse4.2 -O2 -g -fno-strict-aliasing
32:16
Aborted (core dumped)


It works in gcc 11.4.0 or -O0 in 12.3.0v

Reply via email to