https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88828
Bug ID: 88828
Summary: Inefficient update of the first element of vector
registers
Product: gcc
Version: 9.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: hjl.tools at gmail dot com
CC: ubizjak at gmail dot com
Target Milestone: ---
[hjl@gnu-cfl-1 pr88778]$ cat u4.i
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
__m128
foo (__m128 x, float f)
{
__m128 y = __extension__ (__m128)(__v4sf)
{ f, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x)[3] };
return y;
}
[hjl@gnu-cfl-1 pr88778]$ gcc -S -O2 u4.i
[hjl@gnu-cfl-1 pr88778]$ cat u4.s
.file "u4.i"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
movaps %xmm0, %xmm4
movaps %xmm0, %xmm3
shufps $85, %xmm0, %xmm4
unpckhps %xmm0, %xmm3
unpcklps %xmm4, %xmm1
shufps $255, %xmm0, %xmm0
unpcklps %xmm0, %xmm3
movaps %xmm1, %xmm0
movlhps %xmm3, %xmm0
ret
.cfi_endproc
.LFE0:
.size foo, .-foo
.ident "GCC: (GNU) 8.2.1 20190109 (Red Hat 8.2.1-7)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 pr88778]$
A simple movss will do.