https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101956
Bug ID: 101956 Summary: Miss vectorization from v4hi to v4df Product: gcc Version: 12.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: crazylht at gmail dot com Target Milestone: --- Host: x86_64-pc-linux-gnu Target: x86_64-*-* i?86-*-* void foo (double* p, short* q) { p[0] = q[0]; p[1] = q[1]; p[2] = q[2]; p[3] = q[3]; } clang generates vpmovsxwd xmm0, qword ptr [rsi] vcvtdq2pd ymm0, xmm0 vmovups ymmword ptr [rdi], ymm0 vzeroupper gcc generates foo(double*, short*): movswl (%rsi), %eax vxorps %xmm0, %xmm0, %xmm0 vcvtsi2sdl %eax, %xmm0, %xmm1 movswl 2(%rsi), %eax vcvtsi2sdl %eax, %xmm0, %xmm2 movswl 4(%rsi), %eax vmovsd %xmm2, %xmm2, %xmm3 vcvtsi2sdl %eax, %xmm0, %xmm2 movswl 6(%rsi), %eax vcvtsi2sdl %eax, %xmm0, %xmm0 vunpcklpd %xmm0, %xmm2, %xmm2 vunpcklpd %xmm3, %xmm1, %xmm0 vinsertf128 $0x1, %xmm2, %ymm0, %ymm0 vmovupd %ymm0, (%rdi) vzeroupper ret