https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121120

            Bug ID: 121120
           Summary: Missed vectorization of default struct equality
                    operator
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tdebock at DRWUK dot com
  Target Milestone: ---

#include <array>
#include <cstdint>

struct A
{
    int64_t a;
    int64_t b;

    bool operator==(const A&) const = default;
};

bool f(const A& x, const A& y)
{
    return x == y;
}

struct A2
{
    int64_t a;
    int32_t b;
    int32_t c;

    bool operator==(const A2&) const = default;
};

bool f2(const A2& x, const A2& y)
{
    return x == y;
}

struct A3
{
    int64_t x;
    std::array<int32_t, 2> a;

    bool operator==(const A3&) const = default;
};

bool f3(const A3& x, const A3& y)
{
    return x == y;
}

in gcc (with -O3), produces:
"f(A const&, A const&)":
        mov     rdx, QWORD PTR [rsi]
        xor     eax, eax
        cmp     QWORD PTR [rdi], rdx
        jne     .L1
        mov     rax, QWORD PTR [rsi+8]
        cmp     QWORD PTR [rdi+8], rax
        sete    al
.L1:
        ret
"f2(A2 const&, A2 const&)":
        mov     rcx, QWORD PTR [rsi]
        xor     eax, eax
        cmp     QWORD PTR [rdi], rcx
        jne     .L5
        mov     eax, DWORD PTR [rsi+12]
        mov     edx, DWORD PTR [rsi+8]
        cmp     DWORD PTR [rdi+12], eax
        sete    al
        cmp     DWORD PTR [rdi+8], edx
        sete    dl
        and     eax, edx
.L5:
        ret
"f3(A3 const&, A3 const&)":
        mov     rdx, QWORD PTR [rsi]
        xor     eax, eax
        cmp     QWORD PTR [rdi], rdx
        jne     .L8
        mov     rax, QWORD PTR [rsi+8]
        cmp     QWORD PTR [rdi+8], rax
        sete    al
.L8:
        ret

while clang gives:
f(A const&, A const&):
        vmovdqu xmm0, xmmword ptr [rsi]
        vpxor   xmm0, xmm0, xmmword ptr [rdi]
        vptest  xmm0, xmm0
        sete    al
        ret

f2(A2 const&, A2 const&):
        vmovdqu xmm0, xmmword ptr [rdi]
        vpxor   xmm0, xmm0, xmmword ptr [rsi]
        vptest  xmm0, xmm0
        sete    al
        ret

f3(A3 const&, A3 const&):
        vmovdqu xmm0, xmmword ptr [rsi]
        vpxor   xmm0, xmm0, xmmword ptr [rdi]
        vptest  xmm0, xmm0
        sete    al
        ret

no matter how many fields are added to the struct, gcc refuses to vectorize the
equality check. __builtin_memcmp could be used as this does vectorize properly,
but this results in comparisons between structs taking into account padding too
(which I believe is undefined behaviour). Clang handles padding in structs
(when doing struct comparisons) by vectorizing the reads from consecutive
sections of data without any padding for each unpadded section respectively. I
believe this functionality should be available as well, and I am interested in
implementing it. Adding a new pattern to the slp_patterns seems appropriate to
me, but the slp_tree's are not very well documented, so any ideas are welcome.

Reply via email to