https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109499

            Bug ID: 109499
           Summary: Unnecessary zeroing in SVE loops
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rsandifo at gcc dot gnu.org
                CC: rguenth at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64*-*-*

The following two loops contain unnecessary zeroing operations:

// -march=armv8.2-a+sve -O2
void
f (int *__restrict x, int *__restrict y, int n)
{
  for (int i = 0; i < n; i++)
    x[i] = x[i] ? y[i] : 0;
}

void
g (int *__restrict x, int *__restrict y, int n)
{
  for (int i = 0; i < n; i++)
    x[i] = x[i] ? y[i] & 15 : 0;
}

Output:

f(int*, int*, int):
        cmp     w2, 0
        ble     .L1
        mov     x3, 0
        cntw    x4
        whilelo p0.s, wzr, w2
        mov     z1.b, #0
.L3:
        ld1w    z0.s, p0/z, [x0, x3, lsl 2]
        cmpne   p1.s, p0/z, z0.s, #0
        ld1w    z0.s, p1/z, [x1, x3, lsl 2]   // Sets inactive lanes to zero
        sel     z0.s, p1, z0.s, z1.s          // Not needed
        st1w    z0.s, p0, [x0, x3, lsl 2]
        add     x3, x3, x4
        whilelo p0.s, w3, w2
        b.any   .L3
.L1:
        ret
g(int*, int*, int):
        cmp     w2, 0
        ble     .L6
        mov     x3, 0
        cntw    x4
        whilelo p0.s, wzr, w2
        mov     z1.s, #15
.L8:
        ld1w    z0.s, p0/z, [x0, x3, lsl 2]
        cmpne   p1.s, p0/z, z0.s, #0
        ld1w    z0.s, p1/z, [x1, x3, lsl 2]   // Sets inactive lanes to zero
        movprfx z0.s, p1/z, z0.s              // Not needed
        and     z0.s, p1/m, z0.s, z1.s        // Could be AND (immediate)
        st1w    z0.s, p0, [x0, x3, lsl 2]
        add     x3, x3, x4
        whilelo p0.s, w3, w2
        b.any   .L8
.L6:
        ret

It would be good to model somehow that IFN_MASK_LOAD has a zeroing effect on
AArch64, so that this is exposed at the gimple level.  At the same time, we
probably don't want the behaviour of the ifn to depend on target hooks.  Not
sure what the best design is here.

Reply via email to