https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120428

--- Comment #2 from Shawn Xu <shawn at shawnxu dot org> ---
Shorter reproduction:

// https://godbolt.org/z/z8z4Ye4rq
#include <array>
#include <cstddef>
#include <cstdint>
#include <iostream>

void permute(std::array<std::byte, 32>& data) {
    static constexpr std::array<std::size_t, 2> order{0, 1};
    std::array<std::byte, 32> buffer{};

    for (std::size_t i = 0; i < 2; i++)
    {
        auto* const buffer_chunk = &buffer[i * 16];
        auto* const value_chunk  = &data[order[i] * 16];

        std::copy(value_chunk, value_chunk + 16, buffer_chunk);
    }

    std::copy(std::begin(buffer), std::end(buffer), std::begin(data));
}

GCC 15.1 Produces:
permute(std::array<std::byte, 32ul>&):
        push    rbp
        mov     rbp, rsp
        and     rsp, -32
        vmovdqu xmm0, XMMWORD PTR [rdi]
        vmovdqa XMMWORD PTR [rsp-32], xmm0
        vmovdqu xmm0, XMMWORD PTR [rdi+16]
        vmovdqa XMMWORD PTR [rsp-16], xmm0
        vmovdqa ymm0, YMMWORD PTR [rsp-32]
        vmovdqu YMMWORD PTR [rdi], ymm0
        vzeroupper
        leave
        ret

GCC 14.2 Produces:
permute(std::array<std::byte, 32ul>&):
        ret

Reply via email to