================ @@ -24,567 +24,3243 @@ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ __min_vector_width__(256))) +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF mask[i] +/// dst.fp16[i] := __W[i] ---------------- phoebewang wrote:
Switch it with the ELSE branch. https://github.com/llvm/llvm-project/pull/120766 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits