================
@@ -24,567 +24,3243 @@
   __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    
\
                  __min_vector_width__(256)))
 
+/// Convert two 128-bit vectors, \a __A and \a __B, containing packed
+/// single-precision (32-bit) floating-point elements to a 128-bit vector
+/// containing FP16 elements.
+///
+/// \code{.operation}
+/// FOR i := 0 to 7
+///    IF i < 4
+///            dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i])
+///    ELSE
+///            dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4])
+///    FI
+/// ENDFOR
+/// \endcode
----------------
mikolaj-pirog wrote:

Recent intrinsics (amxfp8intrin.h) also follows this order, as vast majority of 
existing intrinsic do. It shouldn't be problematic to the tooling -- if it is, 
I will fix it (the tooling)

https://github.com/llvm/llvm-project/pull/120766
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to