https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119494

            Bug ID: 119494
           Summary: z196: Inefficient implementation for
                    __builtin_parityll for z196 < z15
           Product: gcc
           Version: 14.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jens.seifert at de dot ibm.com
  Target Milestone: ---

bool parityll(unsigned long long x)
{
    return __builtin_parityll(x);
}

Code generation for z15 and above is optimal:
bool parityll(unsigned long long x)
{
    return __builtin_popcountll(x) & 1;
}

parityll(unsigned long long):
        popcnt  %r2,%r2,8
        risbg   %r2,%r2,63,128+63,0
        br

But for < z15:
parityll(unsigned long long):
        popcnt  %r2,%r2
        sllg    %r1,%r2,32
        agr     %r2,%r1
        sllg    %r0,%r2,16
        agr     %r2,%r0
        sllg    %r3,%r2,8
        agr     %r2,%r3
        risbg   %r2,%r2,64-1,128+63,7+1
        br      %r14

It should use rxsbg
bool parity_z196(unsigned long long x)
{
   __asm__("popcnt %0,%1":"=r"(x):"r"(x):"cc");
   x ^= (x >> 32);
   x ^= (x >> 16);
   x ^= (x >> 8);
   return x & 1;
}

parity_z196(unsigned long long):
        popcnt %r2,%r2
        rxsbg   %r2,%r2,32,63,32
        rxsbg   %r2,%r2,16,63,48
        rxsbg   %r2,%r2,8,63,56
        risbg   %r2,%r2,63,128+63,0
        br      %r14

Reply via email to