https://github.com/eulerkochy updated 
https://github.com/llvm/llvm-project/pull/156369

>From e3795502f1acf7ec5c3980ed9fdb502bbf9403ba Mon Sep 17 00:00:00 2001
From: Koustav Chowdhury <kc99....@gmail.com>
Date: Tue, 2 Sep 2025 01:11:34 +0530
Subject: [PATCH 1/2] feat: constexpr SSE multiplication intrinsics

---
 clang/lib/Headers/avx2intrin.h       |  5 ++---
 clang/lib/Headers/avx512bwintrin.h   |  4 ++--
 clang/lib/Headers/avx512dqintrin.h   |  8 ++++----
 clang/lib/Headers/avx512fintrin.h    | 15 ++++++---------
 clang/lib/Headers/avx512vlbwintrin.h |  8 ++++----
 clang/lib/Headers/avx512vldqintrin.h | 16 ++++++++--------
 clang/lib/Headers/avx512vlintrin.h   | 10 ++++------
 clang/lib/Headers/smmintrin.h        |  4 ++--
 8 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 384faa35d246f..4d03103ac1e08 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1766,9 +1766,8 @@ _mm256_mullo_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the products.
-static __inline__  __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi32 (__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mullo_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a * (__v8su)__b);
 }
 
diff --git a/clang/lib/Headers/avx512bwintrin.h 
b/clang/lib/Headers/avx512bwintrin.h
index eabe215947761..8e9cc395abb2a 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -451,14 +451,14 @@ _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A * (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_mullo_epi16(__A, 
__B),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_mullo_epi16(__A, 
__B),
diff --git a/clang/lib/Headers/avx512dqintrin.h 
b/clang/lib/Headers/avx512dqintrin.h
index 87d16b474d466..b0fe903d82431 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -156,19 +156,19 @@ _store_mask8(__mmask8 *__A, __mmask8 __B) {
   *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullo_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A * (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_mullo_epi64(__A, 
__B),
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_mullo_epi64(__A, 
__B),
diff --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 0006e334022b6..1968b2b3d91d2 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1450,23 +1450,20 @@ _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, 
__m512i __Y)
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullo_epi32 (__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullo_epi32(__m512i __A, __m512i __B) {
   return (__m512i) ((__v16su) __A * (__v16su) __B);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_mullo_epi32(__A, 
__B),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_mullo_epi32(__A, 
__B),
                                              (__v16si)__W);
diff --git a/clang/lib/Headers/avx512vlbwintrin.h 
b/clang/lib/Headers/avx512vlbwintrin.h
index ea6144008200a..888086dc214f1 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -426,28 +426,28 @@ _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i 
__B) {
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_mullo_epi16(__A, 
__B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_mullo_epi16(__A, 
__B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mullo_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mullo_epi16(__A, __B),
diff --git a/clang/lib/Headers/avx512vldqintrin.h 
b/clang/lib/Headers/avx512vldqintrin.h
index ceebd09e1d3af..e93eb10c31ce2 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -32,38 +32,38 @@
 #define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_mullo_epi64(__m256i __A, __m256i __B) {
   return (__m256i) ((__v4du) __A * (__v4du) __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_mullo_epi64(__A, 
__B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_mullo_epi64(__A, 
__B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mullo_epi64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mullo_epi64(__m128i __A, __m128i __B) {
   return (__m128i) ((__v2du) __A * (__v2du) __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_mullo_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_mullo_epi64(__A, __B),
diff --git a/clang/lib/Headers/avx512vlintrin.h 
b/clang/lib/Headers/avx512vlintrin.h
index 6e16d2d4a0620..41f11757abbea 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -429,17 +429,15 @@ _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i 
__Y)
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_mullo_epi32(__A, 
__B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_mullo_epi32(__A, 
__B),
                                              (__v8si)__W);
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index f68dd7ed2bcc9..e8f1f57c97c08 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -548,8 +548,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS 
_mm_blendv_epi8(__m128i __V1,
 /// \param __V2
 ///    A 128-bit integer vector.
 /// \returns A 128-bit integer vector containing the products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
-                                                             __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mullo_epi32(__m128i __V1, __m128i __V2) {
   return (__m128i)((__v4su)__V1 * (__v4su)__V2);
 }
 

>From 2531540b99b23472a60e952e81ab015644500b1b Mon Sep 17 00:00:00 2001
From: Koustav Chowdhury <kc99....@gmail.com>
Date: Tue, 2 Sep 2025 14:04:03 +0530
Subject: [PATCH 2/2] test: sse constexpr multiply

---
 clang/test/CodeGen/X86/avx2-builtins.c       | 1 +
 clang/test/CodeGen/X86/avx512bw-builtins.c   | 2 ++
 clang/test/CodeGen/X86/avx512dq-builtins.c   | 3 +++
 clang/test/CodeGen/X86/avx512f-builtins.c    | 3 +++
 clang/test/CodeGen/X86/avx512vl-builtins.c   | 2 ++
 clang/test/CodeGen/X86/avx512vlbw-builtins.c | 4 ++++
 clang/test/CodeGen/X86/avx512vldq-builtins.c | 6 ++++++
 clang/test/CodeGen/X86/sse41-builtins.c      | 1 +
 8 files changed, 22 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx2-builtins.c 
b/clang/test/CodeGen/X86/avx2-builtins.c
index a39ce513837ea..c9b6f69684c9e 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -976,6 +976,7 @@ __m256i test_mm256_mullo_epi32(__m256i a, __m256i b) {
   // CHECK: mul <8 x i32>
   return _mm256_mullo_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_mullo_epi32((__m256i)(__v8si){+1, -2, +3, -4, 
+5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 
28, 36, -40, -40, -36, -28, -16));
 
 __m256i test_mm256_or_si256(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_or_si256
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c 
b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 264a4578c2cd2..5f8497cdef37f 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -839,6 +839,7 @@ __m512i test_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 
__U, __m512i __A, __
   //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_mullo_epi16(__W, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_mullo_epi16((__m512i)(__v32hi){-1, +2, 
-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, 
+20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}, 0x0000FFFF, 
(__m512i)(__v32hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, 
-15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, 
-31, +32, -33}, (__m512i)(__v32hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, 
-13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, 
-29, +30, -31, +32, -33, +34}), -6, -12, -20, -30, -42, -56, -72, -90, -110, 
-132, -156, -182, -210, -240, -272, -306, -17, +18, -19, +20, -21, +22, -23, 
+24, -25, +26, -27, +28, -29, +30, -31, +32));
 
 __m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 
{
   //CHECK-LABEL: test_mm512_maskz_mullo_epi16
@@ -846,6 +847,7 @@ __m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, 
__m512i __A, __m512i __B) {
   //CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_mullo_epi16(__U, __A, __B);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mullo_epi16(0x0000FFFF, 
(__m512i)(__v32hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, 
-15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, 
-31, +32, -33}, (__m512i)(__v32hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, 
-13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, 
-29, +30, -31, +32, -33, +34}), -6, -12, -20, -30, -42, -56, -72, -90, -110, 
-132, -156, -182, -210, -240, -272, -306, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0));
 
 __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
   // CHECK-LABEL: test_mm512_mask_blend_epi8
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c 
b/clang/test/CodeGen/X86/avx512dq-builtins.c
index d2bd7808ca91d..88f160a6deda5 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -248,6 +248,7 @@ __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
   // CHECK: mul <8 x i64>
   return (__m512i) _mm512_mullo_epi64(__A, __B);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mullo_epi64((__m512i)(__v8di){+1, -2, +3, -4, 
+5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, 
+12, -20, -30, -42, +56, -72));
 
 __m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 
__m512i __B) {
   // CHECK-LABEL: test_mm512_mask_mullo_epi64
@@ -255,6 +256,7 @@ __m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 
__U, __m512i __A, __m
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return (__m512i) _mm512_mask_mullo_epi64(__W, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_mullo_epi64((__m512i)(__v8di){-100, 
+200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m512i)(__v8di){+1, -2, +3, 
-4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, -4, +5, -6, +7, -8, +9}), -2, 
-6, -12, -20, -500, +600, -700, +800));
 
 __m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_mullo_epi64
@@ -262,6 +264,7 @@ __m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i 
__A, __m512i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return (__m512i) _mm512_maskz_mullo_epi64(__U, __A, __B);
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_mullo_epi64(0x0F, (__m512i)(__v8di){+1, 
-2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, 
+9}), -2, -6, +12, -20, 0, 0, 0, 0));
 
 __m512d test_mm512_xor_pd (__m512d __A, __m512d __B) {
   // CHECK-LABEL: test_mm512_xor_pd
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index 123b8de8396a6..a60bb83113087 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3093,6 +3093,7 @@ __m512i test_mm512_maskz_mullo_epi32 (__mmask16 
__k,__m512i __A, __m512i __B) {
   //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_mullo_epi32(__k,__A,__B);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_mullo_epi32(0x00FF, 
(__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, 
+15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, 
+13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, 0, 0, 0, 0, 0, 
0, 0, 0));
 
 __m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, 
__m512i __src) {
   //CHECK-LABEL: test_mm512_mask_mullo_epi32
@@ -3100,12 +3101,14 @@ __m512i test_mm512_mask_mullo_epi32 (__mmask16 
__k,__m512i __A, __m512i __B, __m
   //CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_mullo_epi32(__src,__k,__A,__B);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_mullo_epi32((__m512i)(__v16si){-100, 
+200, -300, +400, -500, +600, -700, +800, -900, +1000, -1100, +1200, -1300, 
+1400, -1500, +1600}, 0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, 
-8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, 
-6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, 
-30, -42, +56, -72, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600));
 
 __m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mullo_epi32
   //CHECK: mul <16 x i32>
   return _mm512_mullo_epi32(__A,__B);
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mullo_epi32((__m512i)(__v16si){+1, -2, +3, 
-4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, 
(__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, 
+15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, -90, -110, +132, -156, 
-182, -210, +240, -272));
 
 __m512i test_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mullox_epi64
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c 
b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 78e01b944ec5d..b8dcabbf7285e 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -834,6 +834,7 @@ __m256i test_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i 
__A, __m256i __B) {
   //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_mullo_epi32(__M, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_mullo_epi32(0x0F, (__m256i)(__v8si){+1, 
-2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-2, +3, +4, +5, -6, +7, +8, 
+9}), -2, -6, +12, -20, 0, 0, 0, 0));
 
 __m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
        __m256i __B) {
@@ -842,6 +843,7 @@ __m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 
__M, __m256i __A,
   //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_mullo_epi32(__W, __M, __A, __B);
 }
+TEST_CONSTEXPR(match_v8si(_mm256_mask_mullo_epi32((__m256i)(__v8si){-100, 
+200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m256i)(__v8si){+1, -2, +3, 
-4, +5, -6, +7, -8}, (__m256i)(__v8si){-2, +3, +4, +5, -6, +7, +8, +9}), -2, 
-6, +12, -20, -500, +600, -700, +800));
 
 __m256i test_mm256_and_epi32 (__m256i __A, __m256i __B) {
   //CHECK-LABEL: test_mm256_and_epi32
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c 
b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index dd24ed86912be..4527c9175dcaa 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -861,6 +861,7 @@ __m256i test_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 
__U, __m256i __A, __
   //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_mullo_epi16(__W, __U , __A, __B);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_mullo_epi16((__m256i)(__v16hi){-1, +2, 
-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, 0x00FF, 
(__m256i)(__v16hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, 
-15, +16, -17}, (__m256i)(__v16hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, 
-13, +14, -15, +16, -17, +18}), -6, -12, -20, -30, -42, -56, -72, -90, -9, +10, 
-11, +12, -13, +14, -15, +16));
 
 __m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) 
{
   //CHECK-LABEL: test_mm256_maskz_mullo_epi16
@@ -868,6 +869,7 @@ __m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, 
__m256i __A, __m256i __B) {
   //CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_mullo_epi16(__U , __A, __B);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mullo_epi16(0x00FF, 
(__m256i)(__v16hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, 
-15, +16, -17}, (__m256i)(__v16hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, 
-13, +14, -15, +16, -17, +18}), -6, -12, -20, -30, -42, -56, -72, -90, 0, 0, 0, 
0, 0, 0, 0, 0));
 
 __m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, 
__m128i __B) {
   //CHECK-LABEL: test_mm_mask_mullo_epi16
@@ -875,6 +877,7 @@ __m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 
__U, __m128i __A, __m128
   //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_mullo_epi16(__W, __U , __A, __B);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_mask_mullo_epi16((__m128i)(__v8hi){-1, +2, -3, 
+4, -5, +6, -7, +8}, 0x0F, (__m128i)(__v8hi){+2, -3, +4, -5, +6, -7, +8, -9}, 
(__m128i)(__v8hi){-3, +4, -5, +6, -7, +8, -9, +10}), -6, -12, -20, -30, -5, +6, 
-7, +8));
 
 __m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   //CHECK-LABEL: test_mm_maskz_mullo_epi16
@@ -882,6 +885,7 @@ __m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i 
__A, __m128i __B) {
   //CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_mullo_epi16(__U , __A, __B);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_mullo_epi16(0x0F, (__m128i)(__v8hi){+2, 
-3, +4, -5, +6, -7, +8, -9}, (__m128i)(__v8hi){-3, +4, -5, +6, -7, +8, -9, 
+10}), -6, -12, -20, -30, 0, 0, 0, 0));
 
 
 __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c 
b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 720999cea6ab4..e1e8916bf60b3 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -17,6 +17,7 @@ __m256i test_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
   // CHECK: mul <4 x i64>
   return _mm256_mullo_epi64(__A, __B);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_mullo_epi64((__m256i)(__v4di){+1, -2, +3, 
-4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, +12, +20));
 
 __m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 
__m256i __B) {
   // CHECK-LABEL: test_mm256_mask_mullo_epi64
@@ -24,6 +25,7 @@ __m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 
__U, __m256i __A, __m
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return (__m256i) _mm256_mask_mullo_epi64 ( __W, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_mask_mullo_epi64((__m256i)(__v4di){-100, 
+200, -300, +400}, 0x03, (__m256i)(__v4di){+1, -2, +3, -4}, 
(__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, -300, +400));
 
 __m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_maskz_mullo_epi64
@@ -31,12 +33,14 @@ __m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i 
__A, __m256i __B) {
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return (__m256i) _mm256_maskz_mullo_epi64 (__U, __A, __B);
 }
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_mullo_epi64(0x03, (__m256i)(__v4di){+1, 
-2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, 0, 0));
 
 __m128i test_mm_mullo_epi64 (__m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mullo_epi64
   // CHECK: mul <2 x i64>
   return (__m128i) _mm_mullo_epi64(__A, __B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_mullo_epi64((__m128i)(__v2di){+1, -2}, 
(__m128i)(__v2di){-3, +4}), -3, -8));
 
 __m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 
__m128i __B) {
   // CHECK-LABEL: test_mm_mask_mullo_epi64
@@ -44,6 +48,7 @@ __m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, 
__m128i __A, __m128
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return (__m128i) _mm_mask_mullo_epi64 ( __W, __U, __A, __B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_mask_mullo_epi64((__m128i)(__v2di){-100, +200}, 
0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, +200));
 
 __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_mullo_epi64
@@ -51,6 +56,7 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, 
__m128i __B) {
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return (__m128i) _mm_maskz_mullo_epi64 (__U, __A, __B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_maskz_mullo_epi64(0x01, (__m128i)(__v2di){+1, 
-2}, (__m128i)(__v2di){-3, +4}), -3, 0));
 
 __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, 
__m256d __B) {
   // CHECK-LABEL: test_mm256_mask_andnot_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c 
b/clang/test/CodeGen/X86/sse41-builtins.c
index d3e4edc2fd172..5f53614872604 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -361,6 +361,7 @@ __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
   // CHECK: mul <4 x i32>
   return _mm_mullo_epi32(x, y);
 }
+TEST_CONSTEXPR(match_v4si(_mm_mullo_epi32((__m128i)(__v4si){+1, -2, +3, -4}, 
(__m128i)(__v4si){-16, +14, +12, -10}), -16, -28, +36, +40));
 
 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
   // CHECK-LABEL: test_mm_packus_epi32

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to