https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122901

--- Comment #1 from Daniel Fruzynski <[email protected]> ---
Forgot to add x86_64 outputs for 2nd function:

clang:
[code]
f2(short*, unsigned int, unsigned int):
        test    esi, esi
        je      .LBB0_3
        movd    xmm0, edx
        pshufd  xmm0, xmm0, 0
        mov     eax, esi
        xor     ecx, ecx
        pxor    xmm1, xmm1
.LBB0_2:
        pmovsxwd        xmm2, qword ptr [rdi + 2*rcx + 8]
        pmovsxwd        xmm3, qword ptr [rdi + 2*rcx]
        pmulld  xmm3, xmm0
        pmulld  xmm2, xmm0
        movdqa  xmm4, xmm3
        psrad   xmm4, 31
        psrld   xmm4, 22
        paddd   xmm4, xmm3
        psrld   xmm4, 10
        movdqa  xmm3, xmm2
        psrad   xmm3, 31
        psrld   xmm3, 22
        paddd   xmm3, xmm2
        psrld   xmm3, 10
        pblendw xmm3, xmm1, 170
        pblendw xmm4, xmm1, 170
        packusdw        xmm4, xmm3
        movdqa  xmmword ptr [rdi + 2*rcx], xmm4
        add     rcx, 8
        cmp     rcx, rax
        jb      .LBB0_2
.LBB0_3:
        ret
[/code]

gcc:
[code]
f2(short*, unsigned int, unsigned int):
        test    esi, esi
        je      .L8
        push    rbp
        movd    xmm5, edx
        xor     eax, eax
        pshufd  xmm3, xmm5, 0
        mov     rbp, rsp
        and     rsp, -32
.L3:
        mov     edx, eax
        add     eax, 8
        movdqa  xmm0, XMMWORD PTR [rdi+rdx*2]
        pmovsxwd        xmm1, xmm0
        psrldq  xmm0, 8
        pmulld  xmm1, xmm3
        pmovsxwd        xmm0, xmm0
        pmulld  xmm0, xmm3
        movdqa  xmm2, xmm1
        psrad   xmm2, 31
        psrld   xmm2, 22
        paddd   xmm2, xmm1
        movdqa  xmm1, xmm0
        psrad   xmm1, 31
        psrad   xmm2, 10
        psrld   xmm1, 22
        movaps  XMMWORD PTR [rsp-32], xmm2
        paddd   xmm0, xmm1
        pxor    xmm1, xmm1
        psrad   xmm0, 10
        pblendw xmm1, xmm2, 85
        pxor    xmm2, xmm2
        pblendw xmm2, xmm0, 85
        movaps  XMMWORD PTR [rsp-16], xmm0
        packusdw        xmm1, xmm2
        movaps  XMMWORD PTR [rdi+rdx*2], xmm1
        cmp     eax, esi
        jb      .L3
        leave
        ret
.L8:
        ret
[/code]

Reply via email to