https://github.com/eulerkochy created https://github.com/llvm/llvm-project/pull/156369
None >From e3795502f1acf7ec5c3980ed9fdb502bbf9403ba Mon Sep 17 00:00:00 2001 From: Koustav Chowdhury <kc99....@gmail.com> Date: Tue, 2 Sep 2025 01:11:34 +0530 Subject: [PATCH] feat: constexpr SSE multiplication intrinsics --- clang/lib/Headers/avx2intrin.h | 5 ++--- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/avx512dqintrin.h | 8 ++++---- clang/lib/Headers/avx512fintrin.h | 15 ++++++--------- clang/lib/Headers/avx512vlbwintrin.h | 8 ++++---- clang/lib/Headers/avx512vldqintrin.h | 16 ++++++++-------- clang/lib/Headers/avx512vlintrin.h | 10 ++++------ clang/lib/Headers/smmintrin.h | 4 ++-- 8 files changed, 32 insertions(+), 38 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 384faa35d246f..4d03103ac1e08 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1766,9 +1766,8 @@ _mm256_mullo_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mullo_epi32 (__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mullo_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index eabe215947761..8e9cc395abb2a 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -451,14 +451,14 @@ _mm512_mullo_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A * (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 87d16b474d466..b0fe903d82431 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -156,19 +156,19 @@ _store_mask8(__mmask8 *__A, __mmask8 __B) { *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mullo_epi64 (__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mullo_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullo_epi64(__A, __B), diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 0006e334022b6..1968b2b3d91d2 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1450,23 +1450,20 @@ _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mullo_epi32 (__m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mullo_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A * (__v16su) __B); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_mullo_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_mullo_epi32(__A, __B), (__v16si)__W); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index ea6144008200a..888086dc214f1 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -426,28 +426,28 @@ _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index ceebd09e1d3af..e93eb10c31ce2 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -32,38 +32,38 @@ #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mullo_epi64 (__m256i __A, __m256i __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm256_mullo_epi64(__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mullo_epi64 (__m128i __A, __m128i __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mullo_epi64(__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 6e16d2d4a0620..41f11757abbea 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -429,17 +429,15 @@ _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_mullo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_mullo_epi32(__A, __B), (__v8si)__W); diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index f68dd7ed2bcc9..e8f1f57c97c08 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -548,8 +548,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1, /// \param __V2 /// A 128-bit integer vector. /// \returns A 128-bit integer vector containing the products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, - __m128i __V2) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mullo_epi32(__m128i __V1, __m128i __V2) { return (__m128i)((__v4su)__V1 * (__v4su)__V2); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits