https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90492
--- Comment #4 from g.peterh...@t-online.de --- #include <array> #include <iostream> int main(const int argc, const char** argv) { using value_type = int64_t; using array_type = std::array<value_type, 16>; array_type a, b; for (size_t i=0; i<a.size(); ++i) a[i] = i; b = a; for (size_t i=0; i<a.size(); ++i) std::cout<<(b[i])<<' '; return EXIT_SUCCESS; } compile with gcc-9 and -O3 -march=skylake-avx512 -mtune=intel -mno-vzeroupper generates this code: 0000000000000000 <main>: 0: 55 push %rbp 1: 48 89 e5 mov %rsp,%rbp 4: 41 54 push %r12 6: 53 push %rbx 7: 48 83 e4 c0 and $0xffffffffffffffc0,%rsp b: 48 8d a4 24 c0 fe ff lea -0x140(%rsp),%rsp 12: ff 13: 62 f1 fd 48 6f 05 00 vmovdqa64 0x0(%rip),%zmm0 # 1d <main+0x1d> 1a: 00 00 00 19: R_X86_64_PC32 .rodata-0x4 1d: 48 8d 9c 24 c0 00 00 lea 0xc0(%rsp),%rbx 24: 00 25: 62 f1 fd 48 7f 44 24 vmovdqa64 %zmm0,0x40(%rsp) 2c: 01 2d: c5 f9 6f d0 vmovdqa %xmm0,%xmm2 31: 62 f1 fd 48 6f 05 00 vmovdqa64 0x0(%rip),%zmm0 # 3b <main+0x3b> 38: 00 00 00 37: R_X86_64_PC32 .rodata+0x3c 3b: 4c 8d a4 24 40 01 00 lea 0x140(%rsp),%r12 42: 00 43: 62 f1 fd 48 7f 44 24 vmovdqa64 %zmm0,0x80(%rsp) 4a: 02 4b: 62 f1 fd 08 6f 5c 24 vmovdqa64 0x50(%rsp),%xmm3 52: 05 53: 62 f1 fd 08 6f 64 24 vmovdqa64 0x60(%rsp),%xmm4 5a: 06 5b: 62 f1 fd 08 6f 6c 24 vmovdqa64 0x70(%rsp),%xmm5 62: 07 63: 62 f1 fd 08 6f 74 24 vmovdqa64 0x90(%rsp),%xmm6 6a: 09 6b: 62 f1 fd 08 6f 7c 24 vmovdqa64 0xa0(%rsp),%xmm7 72: 0a 73: 62 f1 fd 08 6f 4c 24 vmovdqa64 0xb0(%rsp),%xmm1 7a: 0b 7b: 62 f1 fd 08 7f 54 24 vmovdqa64 %xmm2,0xc0(%rsp) 82: 0c 83: 62 f1 fd 08 7f 5c 24 vmovdqa64 %xmm3,0xd0(%rsp) 8a: 0d 8b: 62 f1 fd 08 7f 64 24 vmovdqa64 %xmm4,0xe0(%rsp) 92: 0e 93: 62 f1 fd 08 7f 44 24 vmovdqa64 %xmm0,0x100(%rsp) 9a: 10 9b: 62 f1 fd 08 7f 6c 24 vmovdqa64 %xmm5,0xf0(%rsp) a2: 0f a3: 62 f1 fd 08 7f 74 24 vmovdqa64 %xmm6,0x110(%rsp) aa: 11 ab: 62 f1 fd 08 7f 7c 24 vmovdqa64 %xmm7,0x120(%rsp) b2: 12 b3: 62 f1 fd 08 7f 4c 24 vmovdqa64 %xmm1,0x130(%rsp) ba: 13 bb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) c0: 48 8b 33 mov (%rbx),%rsi c3: bf 00 00 00 00 mov $0x0,%edi c4: R_X86_64_32 std::cout c8: 48 83 c3 08 add $0x8,%rbx cc: e8 00 00 00 00 callq d1 <main+0xd1> cd: R_X86_64_PLT32 std::ostream& std::ostream::_M_insert<long>(long)-0x4 d1: 48 89 c7 mov %rax,%rdi d4: ba 01 00 00 00 mov $0x1,%edx d9: c6 44 24 3f 20 movb $0x20,0x3f(%rsp) de: 48 8d 74 24 3f lea 0x3f(%rsp),%rsi e3: e8 00 00 00 00 callq e8 <main+0xe8> e4: R_X86_64_PLT32 std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)-0x4 e8: 49 39 dc cmp %rbx,%r12 eb: 75 d3 jne c0 <main+0xc0> ed: 48 8d 65 f0 lea -0x10(%rbp),%rsp f1: 31 c0 xor %eax,%eax f3: 5b pop %rbx f4: 41 5c pop %r12 f6: 5d pop %rbp f7: c3 retq f8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) ff: 00