https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108902
Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |jakub at gcc dot gnu.org --- Comment #1 from Jakub Jelinek <jakub at gcc dot gnu.org> --- #include <array> #include <cstdint> #include <stdfloat> #include <algorithm> #include <execution> #include <x86intrin.h> using array_t = std::array<std::float16_t, 128>; void inc_loop(array_t& arr) noexcept { for (auto& val : arr) ++val; } void inc_transform(array_t& arr) noexcept { std::transform(std::execution::unseq, arr.begin(), arr.end(), arr.begin(), [](const auto val) constexpr noexcept{return val+1;}); } void inc_intrinsic(array_t& arr) noexcept { auto load_cvt = [](const std::float16_t*const ptr) noexcept { return _mm256_cvtph_ps(*((const __m128i*const)ptr)); }; auto save_cvt = [](std::float16_t* ptr, const __m256 arg) noexcept { *((__m128i*)ptr) = _mm256_cvtps_ph(arg, _MM_FROUND_CUR_DIRECTION); }; for (std::size_t i=0; i<arr.size(); i+=8) { __m256 tmp = load_cvt(&arr[i]); ++tmp; save_cvt(&arr[i], tmp); } }