https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91166

            Bug ID: 91166
           Summary: [SVE] Unfolded ZIPs of constants
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rsandifo at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64-linux-gnu

Compiling this testcase with -O3 for -march=armv8.2-a+sve:

void
foo (double x[][4])
{
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j)
      x[i][j] = 0;
}

creates redundant ZIPs of constants:

foo:
.LFB0:
        .cfi_startproc
        addvl   x3, x0, #1
        mov     x1, 0
        mov     x2, 16
        mov     z0.b, #0
        mov     x4, x2
        zip1    z0.d, z0.d, z0.d     // Zero
        uqdecd  x4
        zip1    z1.d, z0.d, z0.d     // Zero
        whilelo p1.d, xzr, x2
        zip2    z0.d, z0.d, z0.d     // Zero
        whilelo p0.d, xzr, x4
.L2:
        st1d    z1.d, p1, [x0, x1, lsl 3]
        st1d    z0.d, p0, [x3, x1, lsl 3]
        incw    x1
        whilelo p0.d, x1, x4
        whilelo p1.d, x1, x2
        b.any   .L2
        ret
        .cfi_endproc

A similar problem occurs for variables:

void
foo (double x[][4], double y)
{
  for (int i = 0; i < 4; ++i)
    for (int j = 0; j < 4; ++j)
      x[i][j] = y;
}

This can be fixed by folding a VEC_PERM_EXPR in which the first two
inputs are both duplicates of the same value.

Reply via email to