https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85048
--- Comment #6 from Matthias Kretz (Vir) <mkretz at gcc dot gnu.org> --- Most of the conversions are optimized perfectly now. Only the following conversions are still missing for AVX-512: https://godbolt.org/z/9afWbYod6 #include <cstdint> template <class T, int N, int Size = N * sizeof(T)> using V [[gnu::vector_size(Size)]] = T; template <class From, class To> V<To, 4> cvt4(V<From, 4> x) { return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])}; } template <class From, class To> V<To, 8> cvt8(V<From, 8> x) { return V<To, 8>{ To(x[0]), To(x[1]), To(x[2]), To(x[3]), To(x[4]), To(x[5]), To(x[6]), To(x[7]) }; } template <class From, class To> V<To, 16> cvt16(V<From, 16> x) { return V<To, 16>{ To(x[0]), To(x[1]), To(x[2]), To(x[3]), To(x[4]), To(x[5]), To(x[6]), To(x[7]), To(x[8]), To(x[9]), To(x[10]), To(x[11]), To(x[12]), To(x[13]), To(x[14]), To(x[15]) }; } #define _(name, from, to, size) \ auto name(V<from, size> x) { return cvt##size<from, to>(x); } // integral -> double _(vcvtudq2pd, uint32_t, double, 4) _(vcvtudq2pd, uint32_t, double, 8) // integral -> float _(vcvtqq2ps , int64_t, float, 16) _(vcvtuqq2ps, uint64_t, float, 16) // float -> integral _(vcvttps2qq, float, int64_t, 16) _( cvttps2udq, float, uint32_t, 4) _(vcvttps2udq, float, uint32_t, 8) _(vcvttps2uqq, float, uint64_t, 16) // double -> integral _(vcvttpd2udq, double, uint32_t, 4)