https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117568

            Bug ID: 117568
           Summary: z13: Use vector instructions for fixed length memcmp
           Product: gcc
           Version: 13.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jens.seifert at de dot ibm.com
  Target Milestone: ---

#include <memory.h>
#include <vecintrin.h>

Up to 16 bytes consider using vector instructions for memcmp.
This is not required for 1,2,4,8 bytes, but for the rest.

For general memcmp.
memcmp == 0:
Use lochi to get result of clc
otherwise
maybe use 2xlochi instead of ipm+2*shift after clc, but I am not 100% sure if
lochi is faster.b

bool eq15(const unsigned char *a, const unsigned char *b)
{
    return memcmp(a, b, 15) == 0;
}

bool eq15_vec(const unsigned char *a, const unsigned char *b)
{
    const int len = 15;
    vector unsigned char va = vec_load_len(a, len-1);
    vector unsigned char vb = vec_load_len(b, len-1);
    return vec_all_eq(va, vb);
}

eq15(unsigned char const*, unsigned char const*):
        clc     0(15,%r3),0(%r2)
        ipm     %r2
        sll     %r2,2
        sra     %r2,30
        lpr     %r0,%r2
        ahi     %r0,-1
        risbgn  %r2,%r0,64-1,128+63,32+1
        br      %r14
eq15_vec(unsigned char const*, unsigned char const*):
        lhi     %r1,14
        vll     %v0,%r1,0(%r2)
        vll     %v2,%r1,0(%r3)
        lghi    %r2,0
        vceqbs  %v0,%v0,%v2
        locghie %r2,1
        br      %r14

int compare15(const unsigned char *a, const unsigned char *b)
{
    return memcmp(a, b, 15);
}

int compare15_vec(const unsigned char *a, const unsigned char *b)
{
    const int len = 15;
    vector unsigned char va = vec_load_len(a, len-1);
    vector unsigned char vb = vec_load_len(b, len-1);
    vector int le = (vector int)vec_subc_u128(va, vb);
    vector int ge = (vector int)vec_subc_u128(vb, va);
    return vec_extract(le - ge, 3);
}

compare15(unsigned char const*, unsigned char const*):
        clc     0(15,%r3),0(%r2)
        ipm     %r2
        sllg    %r0,%r2,34
        srag    %r2,%r0,62
        br      %r14
compare15_vec(unsigned char const*, unsigned char const*):
        lhi     %r1,14
        vll     %v4,%r1,0(%r2)
        vll     %v0,%r1,0(%r3)
        vscbiq  %v2,%v4,%v0
        vscbiq  %v6,%v0,%v4
        vsf     %v1,%v2,%v6
        vlgvf   %r2,%v1,3
        lgfr    %r2,%r2
        br      %r14

=> For general memcmp vectorization does not pay off.

Reply via email to