https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113978
--- Comment #4 from 严 逍宇 <xjkp2283572185 at gmail dot com> ---
I find an example without abi problem:
===
Source Code
===
using v [[using gnu: vector_size(128)]] = char;
void f(v *pa, v *pb) noexcept
{
v a{*pa}, b{*pb};
*pa = b;
*pb = a;
}
===
Command
===
g++ test.cpp -Ofast -march=znver4 -S
===
Result
===
_Z1fPDv128_cS0_:
.LFB0:
subq $376, %rsp
.seh_stackalloc 376
.seh_endprologue
vmovdqa64 (%rcx), %zmm1
vmovdqa64 64(%rcx), %zmm0
leaq 127(%rsp), %rax
andq $-128, %rax
vmovdqa64 (%rdx), %zmm3
vmovdqa64 64(%rdx), %zmm2
vmovdqa64 %zmm1, 128(%rax)
vmovdqa64 %zmm0, 192(%rax)
vmovdqa64 %zmm3, (%rcx)
vmovdqa64 %zmm2, 64(%rcx)
vmovdqa64 %zmm3, (%rax)
vmovdqa64 %zmm2, 64(%rax)
vmovdqa64 %zmm1, (%rdx)
vmovdqa64 %zmm0, 64(%rdx)
vzeroupper
addq $376, %rsp
ret
But clang can do this right:
_Z1fPDv128_cS0_: # @_Z1fPDv128_cS0_
# %bb.0:
vmovaps (%rcx), %zmm0
vmovaps 64(%rcx), %zmm1
vmovaps (%rdx), %zmm2
vmovaps 64(%rdx), %zmm3
vmovaps %zmm2, (%rcx)
vmovaps %zmm3, 64(%rcx)
vmovaps %zmm0, (%rdx)
vmovaps %zmm1, 64(%rdx)
vzeroupper
retq