================ @@ -0,0 +1,245 @@ +/*===------------- avx512bmmvlintrin.h - BMM intrinsics ------------------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx512bmmvlintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __BMMVLINTRIN_H +#define __BMMVLINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bmm,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bmm,avx512vl"), __min_vector_width__(256))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +/// Multiplies two 16x16 bit matrices using OR reduction and ORs the product +/// into a third 16x16 bit matrix (which is also the destination). +/// +/// For the 256-bit YMM form, the source registers/memory each contain a single +/// 16x16 (256-bit) matrix in bits [255:0]. The operation performs: +/// \code{.operation} +/// for i in 0 to 15 +/// for j in 0 to 15 +/// reduction_bit = __C[16*i+j] +/// for k in 0 to 15 +/// reduction_bit |= __A[16*i+k] & __B[16*k+j] +/// end for k +/// dest[16*i+j] = reduction_bit +/// end for j +/// end for i +/// \endcode +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> VBMACOR16X16X16 </c> instruction. +/// +/// \param __A +/// A 256-bit vector containing a 16x16 bit matrix. +/// \param __B +/// A 256-bit vector containing a 16x16 bit matrix. +/// \param __C +/// A 256-bit accumulator vector containing the initial values to OR with. +/// \returns A 256-bit vector containing the accumulated result. +/// \note This instruction does not support masking. +static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_bmacor16x16x16(__m256i __A, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_bmacor16x16x16_v16hi( + (__v16hi)__A, (__v16hi)__B, (__v16hi)__C); +} + +/// Multiplies two 16x16 bit matrices using XOR reduction and XORs the product +/// into a third 16x16 bit matrix (which is also the destination). +/// +/// For the 256-bit YMM form, the source registers/memory each contain a single +/// 16x16 (256-bit) matrix in bits [255:0]. The operation performs: +/// \code{.operation} +/// for i in 0 to 15 +/// for j in 0 to 15 +/// reduction_bit = __C[16*i+j] +/// for k in 0 to 15 +/// reduction_bit ^= __A[16*i+k] & __B[16*k+j] +/// end for k +/// dest[16*i+j] = reduction_bit +/// end for j +/// end for i +/// \endcode +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> VBMACXOR16X16X16 </c> instruction. +/// +/// \param __A +/// A 256-bit vector containing a 16x16 bit matrix. +/// \param __B +/// A 256-bit vector containing a 16x16 bit matrix. +/// \param __C +/// A 256-bit accumulator vector containing the initial values to XOR with. +/// \returns A 256-bit vector containing the accumulated result. +/// \note This instruction does not support masking. +static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR ---------------- RKSimon wrote:
not currently usable in a constant expression ```suggestion static __inline __m256i __DEFAULT_FN_ATTRS256 ``` https://github.com/llvm/llvm-project/pull/182556 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
