https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90492

--- Comment #4 from g.peterh...@t-online.de ---
#include <array>
#include <iostream>

int main(const int argc, const char** argv)
{
        using value_type = int64_t;
        using array_type = std::array<value_type, 16>;

        array_type      a, b;

        for (size_t i=0; i<a.size(); ++i)
                a[i] = i;

        b = a;

        for (size_t i=0; i<a.size(); ++i)
                std::cout<<(b[i])<<' ';

        return EXIT_SUCCESS;
}

compile with gcc-9 and -O3 -march=skylake-avx512 -mtune=intel -mno-vzeroupper
generates this code:

0000000000000000 <main>:
    0:  55                      push   %rbp
    1:  48 89 e5                mov    %rsp,%rbp
    4:  41 54                   push   %r12
    6:  53                      push   %rbx
    7:  48 83 e4 c0             and    $0xffffffffffffffc0,%rsp
    b:  48 8d a4 24 c0 fe ff    lea    -0x140(%rsp),%rsp
   12:  ff
   13:  62 f1 fd 48 6f 05 00    vmovdqa64 0x0(%rip),%zmm0        # 1d
<main+0x1d>
   1a:  00 00 00
                        19: R_X86_64_PC32       .rodata-0x4
   1d:  48 8d 9c 24 c0 00 00    lea    0xc0(%rsp),%rbx
   24:  00
   25:  62 f1 fd 48 7f 44 24    vmovdqa64 %zmm0,0x40(%rsp)
   2c:  01
   2d:  c5 f9 6f d0             vmovdqa %xmm0,%xmm2
   31:  62 f1 fd 48 6f 05 00    vmovdqa64 0x0(%rip),%zmm0        # 3b
<main+0x3b>
   38:  00 00 00
                        37: R_X86_64_PC32       .rodata+0x3c
   3b:  4c 8d a4 24 40 01 00    lea    0x140(%rsp),%r12
   42:  00
   43:  62 f1 fd 48 7f 44 24    vmovdqa64 %zmm0,0x80(%rsp)
   4a:  02
   4b:  62 f1 fd 08 6f 5c 24    vmovdqa64 0x50(%rsp),%xmm3
   52:  05
   53:  62 f1 fd 08 6f 64 24    vmovdqa64 0x60(%rsp),%xmm4
   5a:  06
   5b:  62 f1 fd 08 6f 6c 24    vmovdqa64 0x70(%rsp),%xmm5
   62:  07
   63:  62 f1 fd 08 6f 74 24    vmovdqa64 0x90(%rsp),%xmm6
   6a:  09
   6b:  62 f1 fd 08 6f 7c 24    vmovdqa64 0xa0(%rsp),%xmm7
   72:  0a
   73:  62 f1 fd 08 6f 4c 24    vmovdqa64 0xb0(%rsp),%xmm1
   7a:  0b
   7b:  62 f1 fd 08 7f 54 24    vmovdqa64 %xmm2,0xc0(%rsp)
   82:  0c
   83:  62 f1 fd 08 7f 5c 24    vmovdqa64 %xmm3,0xd0(%rsp)
   8a:  0d
   8b:  62 f1 fd 08 7f 64 24    vmovdqa64 %xmm4,0xe0(%rsp)
   92:  0e
   93:  62 f1 fd 08 7f 44 24    vmovdqa64 %xmm0,0x100(%rsp)
   9a:  10
   9b:  62 f1 fd 08 7f 6c 24    vmovdqa64 %xmm5,0xf0(%rsp)
   a2:  0f
   a3:  62 f1 fd 08 7f 74 24    vmovdqa64 %xmm6,0x110(%rsp)
   aa:  11
   ab:  62 f1 fd 08 7f 7c 24    vmovdqa64 %xmm7,0x120(%rsp)
   b2:  12
   b3:  62 f1 fd 08 7f 4c 24    vmovdqa64 %xmm1,0x130(%rsp)
   ba:  13
   bb:  0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
   c0:  48 8b 33                mov    (%rbx),%rsi
   c3:  bf 00 00 00 00          mov    $0x0,%edi
                        c4: R_X86_64_32 std::cout
   c8:  48 83 c3 08             add    $0x8,%rbx
   cc:  e8 00 00 00 00          callq  d1 <main+0xd1>
                        cd: R_X86_64_PLT32      std::ostream&
std::ostream::_M_insert<long>(long)-0x4
   d1:  48 89 c7                mov    %rax,%rdi
   d4:  ba 01 00 00 00          mov    $0x1,%edx
   d9:  c6 44 24 3f 20          movb   $0x20,0x3f(%rsp)
   de:  48 8d 74 24 3f          lea    0x3f(%rsp),%rsi
   e3:  e8 00 00 00 00          callq  e8 <main+0xe8>
                        e4: R_X86_64_PLT32      std::basic_ostream<char,
std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char>
>(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)-0x4
   e8:  49 39 dc                cmp    %rbx,%r12
   eb:  75 d3                   jne    c0 <main+0xc0>
   ed:  48 8d 65 f0             lea    -0x10(%rbp),%rsp
   f1:  31 c0                   xor    %eax,%eax
   f3:  5b                      pop    %rbx
   f4:  41 5c                   pop    %r12
   f6:  5d                      pop    %rbp
   f7:  c3                      retq
   f8:  0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
   ff:  00

Reply via email to