https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100267
--- Comment #8 from Hongtao.liu <crazylht at gmail dot com> --- "less optimizations" part should be fixed in GCC12. .file "test.c" .text .p2align 4 .globl dummyf1_avx512x8 .type dummyf1_avx512x8, @function dummyf1_avx512x8: .LFB5668: .cfi_startproc movl (%rdi), %edx movq 8(%rdi), %rax vmovdqu (%rax,%rdx,8), %ymm0 vmovdqu 32(%rax,%rdx,8), %ymm1 vpaddq %ymm1, %ymm0, %ymm0 ret .cfi_endproc .LFE5668: .size dummyf1_avx512x8, .-dummyf1_avx512x8 .ident "GCC: (GNU) 12.0.0 20210621 (experimental)" .section .note.GNU-stack,"",@progbits