[clang] [compiler-rt] [llvm] [X86] Add AVX512BMM support for AMD Zen 6 (znver6) (PR #182556)

Simon Pilgrim via cfe-commits Mon, 16 Mar 2026 05:19:06 -0700

================
@@ -0,0 +1,245 @@
+/*===------------- avx512bmmvlintrin.h - BMM intrinsics ------------------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         
\
+    "Never use <avx512bmmvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __BMMVLINTRIN_H
+#define __BMMVLINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  
\
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512bmm,avx512vl"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  
\
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512bmm,avx512vl"), __min_vector_width__(256)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
+/// Multiplies two 16x16 bit matrices using OR reduction and ORs the product
+/// into a third 16x16 bit matrix (which is also the destination).
+///
+/// For the 256-bit YMM form, the source registers/memory each contain a single
+/// 16x16 (256-bit) matrix in bits [255:0]. The operation performs:
+/// \code{.operation}
+///   for i in 0 to 15
+///     for j in 0 to 15
+///       reduction_bit = __C[16*i+j]
+///       for k in 0 to 15
+///         reduction_bit |= __A[16*i+k] & __B[16*k+j]
+///       end for k
+///       dest[16*i+j] = reduction_bit
+///     end for j
+///   end for i
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> VBMACOR16X16X16 </c> instruction.
+///
+/// \param __A
+///    A 256-bit vector containing a 16x16 bit matrix.
+/// \param __B
+///    A 256-bit vector containing a 16x16 bit matrix.
+/// \param __C
+///    A 256-bit accumulator vector containing the initial values to OR with.
+/// \returns A 256-bit vector containing the accumulated result.
+/// \note This instruction does not support masking.
+static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_bmacor16x16x16(__m256i __A, __m256i __B, __m256i __C) {
+  return (__m256i)__builtin_ia32_bmacor16x16x16_v16hi(
+      (__v16hi)__A, (__v16hi)__B, (__v16hi)__C);
+}
+
+/// Multiplies two 16x16 bit matrices using XOR reduction and XORs the product
+/// into a third 16x16 bit matrix (which is also the destination).
+///
+/// For the 256-bit YMM form, the source registers/memory each contain a single
+/// 16x16 (256-bit) matrix in bits [255:0]. The operation performs:
+/// \code{.operation}
+///   for i in 0 to 15
+///     for j in 0 to 15
+///       reduction_bit = __C[16*i+j]
+///       for k in 0 to 15
+///         reduction_bit ^= __A[16*i+k] & __B[16*k+j]
+///       end for k
+///       dest[16*i+j] = reduction_bit
+///     end for j
+///   end for i
+/// \endcode
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> VBMACXOR16X16X16 </c> instruction.
+///
+/// \param __A
+///    A 256-bit vector containing a 16x16 bit matrix.
+/// \param __B
+///    A 256-bit vector containing a 16x16 bit matrix.
+/// \param __C
+///    A 256-bit accumulator vector containing the initial values to XOR with.
+/// \returns A 256-bit vector containing the accumulated result.
+/// \note This instruction does not support masking.
+static __inline __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
----------------
RKSimon wrote:


not currently usable in a constant expression
```suggestion
static __inline __m256i __DEFAULT_FN_ATTRS256
```

https://github.com/llvm/llvm-project/pull/182556
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [compiler-rt] [llvm] [X86] Add AVX512BMM support for AMD Zen 6 (znver6) (PR #182556)

Reply via email to