https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100267

--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> ---
After support v{,p}expand* thats w/o mask operands, codegen seems to be optimal

dummyf1_avx512x8:
.LFB5668:
        .cfi_startproc
        movl    (%rdi), %edx
        movq    8(%rdi), %rax
        vmovdqu (%rax,%rdx,8), %ymm0
        vmovdqu 32(%rax,%rdx,8), %ymm1
        vpexpandq       %ymm0, %ymm0
        vpexpandq       %ymm1, %ymm1
        vpaddq  %ymm1, %ymm0, %ymm0
        ret
        .cfi_endproc

Reply via email to