https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105923
--- Comment #5 from Hongtao.liu <crazylht at gmail dot com> ---
after a rough hack to allow complex type in both omp-simd-clone and
vectorizer(get_related_vectype_for_scalar_type), testcase in PR could be
compiled to
.L4:
vmovapd a(%rbx), %ymm0
addq $32, %rbx
call _ZGVdN4v_foo
vmovapd %ymm0, b-32(%rbx)
cmpq %rbx, %r13
jne .L4
movl %r12d, %ebx
andl $-4, %ebx
movl %ebx, %r13d
cmpl %ebx, %r12d
je .L20
vzeroupper
.L3:
subl %r13d, %r12d
cmpl $1, %r12d
je .L6
salq $4, %r13
vmovapd a(%r13), %xmm0
call _ZGVbN2v_foo
vmovapd %xmm0, b(%r13)
testb $1, %r12b
je .L17
andl $-2, %r12d
addl %r12d, %ebx
.L6:
movslq %ebx, %rbx
salq $4, %rbx
vmovsd a(%rbx), %xmm0
vmovsd a+8(%rbx), %xmm1
call foo
vmovsd %xmm0, b(%rbx)
vmovsd %xmm1, b+8(%rbx)