https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108902

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jakub at gcc dot gnu.org

--- Comment #1 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
#include <array>
#include <cstdint>
#include <stdfloat>
#include <algorithm>
#include <execution>
#include <x86intrin.h>


using array_t = std::array<std::float16_t, 128>;


void inc_loop(array_t& arr)    noexcept
{
    for (auto& val : arr)
        ++val;
}


void inc_transform(array_t& arr)    noexcept
{
    std::transform(std::execution::unseq, arr.begin(), arr.end(), arr.begin(),
[](const auto val) constexpr noexcept{return val+1;});
}


void inc_intrinsic(array_t& arr)    noexcept
{
    auto load_cvt = [](const std::float16_t*const ptr) noexcept
    {
        return _mm256_cvtph_ps(*((const __m128i*const)ptr));
    };

    auto save_cvt = [](std::float16_t* ptr, const __m256 arg)    noexcept
    {
        *((__m128i*)ptr) = _mm256_cvtps_ph(arg, _MM_FROUND_CUR_DIRECTION);
    };

    for (std::size_t i=0; i<arr.size(); i+=8)
    {
        __m256
            tmp = load_cvt(&arr[i]);

        ++tmp;
       save_cvt(&arr[i], tmp);
    }
}

Reply via email to