https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117607

            Bug ID: 117607
           Summary: unnecessary scev optimization for popcnt
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: fxue at os dot amperecomputing.com
  Target Milestone: ---

Look at the case:

  int *foo(long v, int *p)
  {
    while (v) {
      long t = v;

      v &= (v - 1);
      *p++ = __builtin_ctzl(t);
    }

    return p;
  }

On arch with popcnt supported, such as aarch64, with "-O2 -ftree-scev-cprop",
it generates:

foo:
.LFB0:
        .cfi_startproc
        cbz     x0, .L4
        mov     x4, x1
        mov     x2, x0
        .p2align 5,,15
.L3:
        rbit    x3, x2
        clz     x3, x3
        str     w3, [x4], 4
        sub     x3, x2, #1
        ands    x2, x2, x3
        bne     .L3
        fmov    d31, x0
        add     x1, x1, 4
        cnt     v31.8b, v31.8b
        addv    b31, v31.8b
        fmov    w0, s31
        sub     w2, w0, #1
        add     x0, x1, w2, uxtw 2
        ret
        .p2align 2,,3
.L4:
        mov     x0, x1
        ret

For ""-O2 -fno-tree-scev-cprop", it gets simpler codegen:

.LFB0:
        .cfi_startproc
        mov     x2, x0
        mov     x0, x1
        cbz     x2, .L2
        .p2align 5,,15
.L3:
        rbit    x1, x2
        sub     x3, x2, #1
        clz     x1, x1
        str     w1, [x0], 4
        ands    x2, x2, x3
        bne     .L3
.L2:
        ret

The cause is that scev would compute exit value of "p" using POPCNT in
one-shot. However, since "p" value is used and has to be evaluated at every
iteration, so the computation at exit is unneeded.

final value replacement:
  p_15 = PHI <p_11(3)>
 with expr: (int *) (((sizetype) (unsigned int) (.POPCOUNT ((unsigned long)
v_7(D)) + -1) * 4 + (sizetype) p_8(D)) + 4)
 final stmt:
  p_15 = (int *) _25
  • [Bug tree-optimization/117... fxue at os dot amperecomputing.com via Gcc-bugs

Reply via email to