[Bug middle-end/106688] New: leaving SSA emits assignment into the inner loop

amonakov at gcc dot gnu.org via Gcc-bugs Fri, 19 Aug 2022 12:41:23 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106688


            Bug ID: 106688
           Summary: leaving SSA emits assignment into the inner loop
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: amonakov at gcc dot gnu.org
  Target Milestone: ---

For the following testcase, gcc -O2

unsigned foo(const unsigned char *buf, long size);
unsigned bar(const unsigned char *buf, long size)
{
        typedef char  i8v8  __attribute__((vector_size(8)));
        typedef short i16v8 __attribute__((vector_size(16)));
        long chunk_sz = 15*16;
        for (; size >= chunk_sz; size -= chunk_sz) {
                i16v8 vs1 = { 0 };
                const unsigned char *end = buf + chunk_sz;
                for (; buf != end; buf += 16) {
                        i16v8 b;
                        asm("pmovzxbw %1, %0" : "=x"(b) : "m"(*(i8v8*)buf));
                        vs1 += b;
                        asm("pmovzxbw %1, %0" : "=x"(b) :
"m"(*(i8v8*)(buf+8)));
                        vs1 += b;
                }
                asm("" :: "x"(vs1));
        }
        return foo(buf, size);
}

(asms needed due to PR 31667)

generates

bar:
        cmp     rsi, 239
        jle     .L2
        lea     rdx, [rdi+240]
.L4:
        lea     rax, [rdx-240]
        pxor    xmm0, xmm0
.L3:
        pmovzxbw QWORD PTR [rax], xmm1
        add     rax, 16
        paddw   xmm0, xmm1

        mov     rdi, rdx ; <<< ehhh

        pmovzxbw QWORD PTR [rax-8], xmm1
        paddw   xmm0, xmm1
        cmp     rax, rdx
        jne     .L3
        sub     rsi, 240
        add     rdx, 240
        cmp     rsi, 239
        jg      .L4
.L2:
        jmp     foo

It looks as if going out of SSA places in the loop a register copy
corresponding to a phi node which is outside of the loop. Strangely, RTL
optimizations do not clean it up either.

[Bug middle-end/106688] New: leaving SSA emits assignment into the inner loop

Reply via email to