https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117014

            Bug ID: 117014
           Summary: missed vectorization opportunity
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: 652023330028 at smail dot nju.edu.cn
  Target Milestone: ---

Hello, we noticed that there seems to be a missing vectorization for the code
below.

reduced code: https://godbolt.org/z/YaEPdWM3n

int data[100];
void f(int arr1[100], int arr2[100])
{
    for(int i = 0; i < 100; i++){
        arr2[i] = -arr1[i];
        int k = arr1[i] + arr2[i];
        data[i] = data[i + k] + 1;
    }
}

GCC -O3 -fno-vect-cost-model -fwrapv:
f(int*, int*):
        xor     eax, eax
.L2:
        mov     ecx, DWORD PTR [rdi+rax*4]
        mov     edx, ecx
        neg     edx
        mov     DWORD PTR [rsi+rax*4], edx
        mov     edx, eax
        sub     edx, ecx
        add     edx, DWORD PTR [rdi+rax*4]
        movsx   rdx, edx
        mov     edx, DWORD PTR data[0+rdx*4]
        add     edx, 1
        mov     DWORD PTR data[0+rax*4], edx
        add     rax, 1
        cmp     rax, 100
        jne     .L2
        ret

Possibly due to omission: _8 = _7 + _26; ==> _8 = i_22
  _4 = MEM[(int *)arr1_15(D) + ivtmp.5_21 * 4];
  _6 = -_4;
  MEM[(int *)arr2_16(D) + ivtmp.5_21 * 4] = _6;
  # DEBUG BEGIN_STMT
  _7 = MEM[(int *)arr1_15(D) + ivtmp.5_21 * 4];
  # DEBUG D#1 => -_4
  # DEBUG k => _7 + D#1
  # DEBUG BEGIN_STMT
  _26 = i_22 - _4;
  _8 = _7 + _26;
  _9 = data[_8];



Expected code: vectorize the loop body.
For example, when assign k=0 directly ('int k = 0;'), gcc:
f(int*, int*):
        lea     rdx, [rdi+4]
        mov     eax, OFFSET FLAT:data
        sub     rax, rdx
        cmp     rax, 8
        mov     rax, rsi
        seta    cl
        sub     rax, rdx
        cmp     rax, 8
        seta    al
        test    cl, al
        je      .L5
        mov     rax, rsi
        sub     rax, OFFSET FLAT:data-15
        cmp     rax, 30
        jbe     .L5
        pcmpeqd xmm1, xmm1
        xor     eax, eax
        psrld   xmm1, 31
.L3:
        movdqu  xmm3, XMMWORD PTR [rdi+rax]
        pxor    xmm0, xmm0
        psubd   xmm0, xmm3
        movups  XMMWORD PTR [rsi+rax], xmm0
        movdqa  xmm0, XMMWORD PTR data[rax]
        add     rax, 16
        paddd   xmm0, xmm1
        movaps  XMMWORD PTR data[rax-16], xmm0
        cmp     rax, 400
        jne     .L3
        ret
.L5:
        xor     eax, eax
.L2:
        mov     edx, DWORD PTR [rdi+rax]
        neg     edx
        mov     DWORD PTR [rsi+rax], edx
        add     DWORD PTR data[rax], 1
        add     rax, 4
        cmp     rax, 400
        jne     .L2
        ret

Thank you very much for your time and effort! We look forward to hearing from
you.
  • [Bug tree-optimization/1... 652023330028 at smail dot nju.edu.cn via Gcc-bugs

Reply via email to