https://github.com/Temperz87 updated https://github.com/llvm/llvm-project/pull/160636
>From 5114deca2ad04ce6b28ee62407324c18d7215e41 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Wed, 24 Sep 2025 23:32:09 -0400 Subject: [PATCH 1/9] Add support for constexpr with PMULHRSW --- clang/include/clang/Basic/BuiltinsX86.td | 9 ++++++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 15 +++++++++++++++ clang/lib/AST/ExprConstant.cpp | 19 ++++++++++++++++++- clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avx512bwintrin.h | 6 +++--- clang/lib/Headers/avx512vlbwintrin.h | 6 +++--- clang/lib/Headers/tmmintrin.h | 11 +++++++---- clang/test/CodeGen/X86/avx2-builtins.c | 1 + clang/test/CodeGen/X86/avx512bw-builtins.c | 2 ++ clang/test/CodeGen/X86/mmx-builtins.c | 1 + clang/test/CodeGen/X86/ssse3-builtins.c | 1 + 11 files changed, 58 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0d44e78f879b9..d5cb2cadee075 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -125,12 +125,15 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">; - def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; } + + let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + } } // AVX @@ -584,7 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">; def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">; def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; - def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; @@ -629,6 +631,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; + def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -1340,7 +1343,7 @@ let Features = "avx512bitalg,evex512", Attributes = [NoThrow, Const, RequiredVec def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">; } -let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ff6ef5a1f6864..3d6763a20098c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -8,6 +8,7 @@ #include "../ExprConstShared.h" #include "Boolean.h" #include "EvalEmitter.h" +#include "FixedPoint.h" #include "Interp.h" #include "InterpBuiltinBitCast.h" #include "PrimType.h" @@ -3311,6 +3312,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + + case clang::X86::BI__builtin_ia32_pmulhrsw128: + case clang::X86::BI__builtin_ia32_pmulhrsw256: + case clang::X86::BI__builtin_ia32_pmulhrsw512: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, BuiltinID,[](const APSInt &LHS, const APSInt &RHS) { + unsigned width = LHS.getBitWidth(); + + APInt mul = llvm::APIntOps::mulhs(LHS, RHS); + mul = mul.relativeLShr(14); + mul = mul.sadd_sat(APInt(width, 1, true)); + return APInt(mul.relativeLShr(1)); + }); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 6c6909e5b2370..2d04b2e016cc6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11679,7 +11679,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } case Builtin::BI__builtin_elementwise_add_sat: - case Builtin::BI__builtin_elementwise_sub_sat: + case Builtin::BI__builtin_elementwise_sub_sat: + case clang::X86::BI__builtin_ia32_pmulhrsw128: + case clang::X86::BI__builtin_ia32_pmulhrsw256: + case clang::X86::BI__builtin_ia32_pmulhrsw512: case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: @@ -11813,6 +11816,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS), DestUnsigned))); break; + + case clang::X86::BI__builtin_ia32_pmulhrsw128: + case clang::X86::BI__builtin_ia32_pmulhrsw256: + case clang::X86::BI__builtin_ia32_pmulhrsw512: { + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned width = Info.Ctx.getIntWidth(DestEltTy); + + APInt mul = llvm::APIntOps::mulhs(LHS, RHS); + mul = mul.relativeLShr(14); + mul = mul.sadd_sat(APInt(width, 1, true)); + ResultElements.push_back(APValue(APSInt(mul.relativeLShr(1)))); + break; + } case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: @@ -11825,6 +11841,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS), /*isUnsigned=*/false))); break; + case clang::X86::BI__builtin_ia32_psllv2di: case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 5a32312be200e..4fc5f8e469c53 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1678,7 +1678,7 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) { /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index a08735b937704..25bd6ceb8eac6 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1046,13 +1046,13 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, @@ -1060,7 +1060,7 @@ _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 888086dc214f1..90108a50e5f79 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1571,21 +1571,21 @@ _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index f01c61afa8ea2..6214eb66f0375 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -29,6 +29,9 @@ #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -560,7 +563,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); @@ -580,11 +583,11 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_pi16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), + (__v8hi)__zext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 18ce88c8fb4d0..13a28dcc373b4 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -987,6 +987,7 @@ __m256i test_mm256_mulhrs_epi16(__m256i a, __m256i b) { // CHECK: call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_mulhrs_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_mulhrs_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2)); __m256i test_mm256_mullo_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mullo_epi16 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index f9330a1d914bc..72d0f1390aaea 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1375,6 +1375,8 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.pmul.hr.sw.512 return _mm512_mulhrs_epi16(__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mulhrs_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2)); + __m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_mulhrs_epi16 // CHECK: @llvm.x86.avx512.pmul.hr.sw.512 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index b19e82383cbfd..c65853ca76dfe 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -426,6 +426,7 @@ __m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128( return _mm_mulhrs_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_mulhrs_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 2, 2, 0, 0)); __m64 test_mm_mullo_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mullo_pi16 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index 56ff73f08ab32..fb2221d62dba9 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -102,6 +102,7 @@ __m128i test_mm_mulhrs_epi16(__m128i a, __m128i b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_mulhrs_epi16(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_mulhrs_epi16((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), 2, 2, 0, 0)); __m128i test_mm_shuffle_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_shuffle_epi8 >From 8b3895110de80df82aeeace7c1f71a97fa93cb63 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Wed, 24 Sep 2025 23:35:19 -0400 Subject: [PATCH 2/9] Remove accidentally new include directive --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 3d6763a20098c..373a651bb9fbe 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -8,7 +8,6 @@ #include "../ExprConstShared.h" #include "Boolean.h" #include "EvalEmitter.h" -#include "FixedPoint.h" #include "Interp.h" #include "InterpBuiltinBitCast.h" #include "PrimType.h" >From 412712fab60f0e71d8554ac7dae2b66b3d9114e6 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Thu, 25 Sep 2025 01:43:02 -0400 Subject: [PATCH 3/9] Fix capitalization of local variables --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 10 +++++----- clang/lib/AST/ExprConstant.cpp | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e6fa8868a12b5..12b37eb289922 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3429,12 +3429,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pmulhrsw512: return interp__builtin_elementwise_int_binop( S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { - unsigned width = LHS.getBitWidth(); + unsigned Width = LHS.getBitWidth(); - APInt mul = llvm::APIntOps::mulhs(LHS, RHS); - mul = mul.relativeLShr(14); - mul = mul.sadd_sat(APInt(width, 1, true)); - return APInt(mul.relativeLShr(1)); + APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); + Mul = Mul.relativeLShr(14); + Mul = Mul.sadd_sat(APInt(Width, 1, true)); + return APInt(Mul.relativeLShr(1)); }); case clang::X86::BI__builtin_ia32_pavgb128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index c4178e119ded6..72d9fcc8cabf9 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11729,13 +11729,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pmulhrsw128: case clang::X86::BI__builtin_ia32_pmulhrsw256: case clang::X86::BI__builtin_ia32_pmulhrsw512: - return EvaluateBinOpExpr([](const APSInt &LHs, const APSInt& RHS) -> { - QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); - unsigned width = Info.Ctx.getIntWidth(DestEltTy); - APInt mul = llvm::APIntOps::mulhs(LHS, RHS); - mul = mul.relativeLShr(14); - mul = mul.sadd_sat(APInt(width, 1, true)); - return APInt(mul.relativeLShr(1)); + return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt& RHS) { + unsigned Width = LHS.getBitWidth(); + APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); + Mul = Mul.relativeLShr(14); + Mul = Mul.sadd_sat(APInt(Width, 1, true)); + return APInt(Mul.relativeLShr(1)); }); case clang::X86::BI__builtin_ia32_pmulhuw128: >From 125542be537b9053cdd7e6d5230c009630b95855 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Thu, 25 Sep 2025 13:55:55 -0400 Subject: [PATCH 4/9] Formatting changes, don't use saturated add --- clang/include/clang/Basic/BuiltinsX86.td | 5 +---- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 ++--- clang/lib/AST/ExprConstant.cpp | 5 ++--- clang/lib/Headers/tmmintrin.h | 1 + 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cfcb9d276cc37..be87343d09a26 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1354,13 +1354,10 @@ let Features = "avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { - def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; -} - let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">; def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; + def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 12b37eb289922..b8c4c44ece1d6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3430,11 +3430,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_elementwise_int_binop( S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { unsigned Width = LHS.getBitWidth(); - APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); Mul = Mul.relativeLShr(14); - Mul = Mul.sadd_sat(APInt(Width, 1, true)); - return APInt(Mul.relativeLShr(1)); + Mul = Mul + APInt(Width, 1, true); + return Mul.relativeLShr(1); }); case clang::X86::BI__builtin_ia32_pavgb128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 72d9fcc8cabf9..986af12bf54fb 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -67,7 +67,6 @@ #include <cstring> #include <functional> #include <limits> -#include <llvm/ADT/APSInt.h> #include <optional> #define DEBUG_TYPE "exprconstant" @@ -11733,8 +11732,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned Width = LHS.getBitWidth(); APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); Mul = Mul.relativeLShr(14); - Mul = Mul.sadd_sat(APInt(Width, 1, true)); - return APInt(Mul.relativeLShr(1)); + Mul = Mul + APInt(Width, 1, true); + return Mul.relativeLShr(1); }); case clang::X86::BI__builtin_ia32_pmulhuw128: diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 551959b5768f1..305eb56581ce8 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -799,6 +799,7 @@ _mm_sign_pi32(__m64 __a, __m64 __b) } #undef __anyext128 +#undef __zext128 #undef __trunc64 #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_CONSTEXPR >From b206462adba7a090e62496c9f50a08432c160525 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Thu, 25 Sep 2025 13:56:43 -0400 Subject: [PATCH 5/9] Format --- clang/include/clang/Basic/BuiltinsX86.td | 4 +++- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 14 +++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index be87343d09a26..570414c9d3ca2 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1357,7 +1357,9 @@ let Features = "avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">; def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; - def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; + def pmulhrsw512 + : X86Builtin< + "_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b8c4c44ece1d6..63c4232986efe 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3428,13 +3428,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pmulhrsw256: case clang::X86::BI__builtin_ia32_pmulhrsw512: return interp__builtin_elementwise_int_binop( - S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { - unsigned Width = LHS.getBitWidth(); - APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); - Mul = Mul.relativeLShr(14); - Mul = Mul + APInt(Width, 1, true); - return Mul.relativeLShr(1); - }); + S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { + unsigned Width = LHS.getBitWidth(); + APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); + Mul = Mul.relativeLShr(14); + Mul = Mul + APInt(Width, 1, true); + return Mul.relativeLShr(1); + }); case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: >From d91a35203bc168e24b5c4696c8791f62e740c6c5 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Sat, 27 Sep 2025 15:09:11 -0400 Subject: [PATCH 6/9] mask/maskz tests --- clang/lib/Headers/avx512vlbwintrin.h | 2 +- clang/test/CodeGen/X86/avx512bw-builtins.c | 4 ++++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index d2dffcc6e5915..3370a0392f64b 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1514,7 +1514,7 @@ _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 135fb294cba77..8970eb233c551 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1595,12 +1595,16 @@ __m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_mulhrs_epi16(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhrs_epi16(_mm512_set1_epi16(1), 0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)); + __m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_mulhrs_epi16 // CHECK: @llvm.x86.avx512.pmul.hr.sw.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_mulhrs_epi16(__U,__A,__B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhrs_epi16(0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mulhi_epi16 // CHECK: @llvm.x86.avx512.pmulh.w.512 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 6c9c80efcef9d..d22d92509d45b 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -2040,6 +2040,7 @@ __m128i test_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_mulhrs_epi16(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhrs_epi16(_mm_set1_epi16(1), 0x0F, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 1, 1, 1, 1)); __m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_mulhrs_epi16 @@ -2047,6 +2048,7 @@ __m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_mulhrs_epi16(__U, __X, __Y); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhrs_epi16(0x0F, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 0, 0, 0, 0)); __m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_mulhrs_epi16 @@ -2054,6 +2056,7 @@ __m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_mulhrs_epi16(__W, __U, __X, __Y); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhrs_epi16(_mm256_set1_epi16(1), 0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2)); __m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_mulhrs_epi16 @@ -2061,6 +2064,7 @@ __m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_mulhrs_epi16(__U, __X, __Y); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mulhrs_epi16(0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2)); __m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_mulhi_epu16 >From 84155ce98d3470d29db5f762907f0936c3076de8 Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Sat, 27 Sep 2025 18:22:46 -0400 Subject: [PATCH 7/9] Format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 1 - clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/Headers/avx2intrin.h | 6 ++---- clang/lib/Headers/tmmintrin.h | 6 ++---- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 63c4232986efe..c011ae1636fab 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3423,7 +3423,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); - case clang::X86::BI__builtin_ia32_pmulhrsw128: case clang::X86::BI__builtin_ia32_pmulhrsw256: case clang::X86::BI__builtin_ia32_pmulhrsw512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 986af12bf54fb..58f4b13b30fc6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11728,7 +11728,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pmulhrsw128: case clang::X86::BI__builtin_ia32_pmulhrsw256: case clang::X86::BI__builtin_ia32_pmulhrsw512: - return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt& RHS) { + return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) { unsigned Width = LHS.getBitWidth(); APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); Mul = Mul.relativeLShr(14); diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index cae57a1cb26f4..bd89751a22505 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1659,8 +1659,7 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) { /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mulhrs_epi16(__m256i __a, __m256i __b) -{ +_mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } @@ -1678,8 +1677,7 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mulhi_epu16(__m256i __a, __m256i __b) -{ +_mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b); } diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 305eb56581ce8..c569f833806be 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -559,8 +559,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm_mulhrs_epi16(__m128i __a, __m128i __b) -{ +_mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } @@ -579,8 +578,7 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm_mulhrs_pi16(__m64 __a, __m64 __b) -{ +_mm_mulhrs_pi16(__m64 __a, __m64 __b) { return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b))); } >From 59d447d70a3e900646e361e005d40bcfc6be04db Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Sat, 27 Sep 2025 18:36:50 -0400 Subject: [PATCH 8/9] I think that's all the format issues? --- clang/lib/AST/ExprConstant.cpp | 1 - clang/lib/Headers/avx512bwintrin.h | 9 +++------ clang/lib/Headers/tmmintrin.h | 6 +++--- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 58f4b13b30fc6..d14a7cc9f426c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11723,7 +11723,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pavgb512: case clang::X86::BI__builtin_ia32_pavgw512: return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU); - case clang::X86::BI__builtin_ia32_pmulhrsw128: case clang::X86::BI__builtin_ia32_pmulhrsw256: diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 8e802d7946126..bd1f332d21d66 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1007,22 +1007,19 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mulhrs_epi16(__m512i __A, __m512i __B) -{ +_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index c569f833806be..c822ce645f1b2 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -560,7 +560,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -579,8 +579,8 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) { /// products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_pi16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), - (__v8hi)__zext128(__b))); + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), + (__v8hi)__zext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the >From 7b7338f74c4eb89c6935435b263a738467d714bb Mon Sep 17 00:00:00 2001 From: Shulin Gonsalves <[email protected]> Date: Fri, 17 Oct 2025 23:59:30 -0400 Subject: [PATCH 9/9] Change folding procedure to match intel instrinics guide (and actually work) --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 ++----- clang/lib/AST/ExprConstant.cpp | 7 ++----- clang/lib/Headers/tmmintrin.h | 6 +++--- clang/test/CodeGen/X86/avx2-builtins.c | 2 +- clang/test/CodeGen/X86/avx512bw-builtins.c | 6 +++--- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 8 ++++---- clang/test/CodeGen/X86/mmx-builtins.c | 2 +- clang/test/CodeGen/X86/ssse3-builtins.c | 2 +- 8 files changed, 17 insertions(+), 23 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index c011ae1636fab..06d924a20657a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3428,11 +3428,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pmulhrsw512: return interp__builtin_elementwise_int_binop( S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { - unsigned Width = LHS.getBitWidth(); - APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); - Mul = Mul.relativeLShr(14); - Mul = Mul + APInt(Width, 1, true); - return Mul.relativeLShr(1); + return (llvm::APIntOps::mulsExtended(LHS, RHS).ashr(14) + 1) + .extractBits(16, 1); }); case clang::X86::BI__builtin_ia32_pavgb128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 58f4b13b30fc6..37ceee51ce64d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11729,11 +11729,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case clang::X86::BI__builtin_ia32_pmulhrsw256: case clang::X86::BI__builtin_ia32_pmulhrsw512: return EvaluateBinOpExpr([](const APSInt &LHS, const APSInt &RHS) { - unsigned Width = LHS.getBitWidth(); - APInt Mul = llvm::APIntOps::mulhs(LHS, RHS); - Mul = Mul.relativeLShr(14); - Mul = Mul + APInt(Width, 1, true); - return Mul.relativeLShr(1); + return (llvm::APIntOps::mulsExtended(LHS, RHS).ashr(14) + 1) + .extractBits(16, 1); }); case clang::X86::BI__builtin_ia32_pmulhuw128: diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index c569f833806be..c822ce645f1b2 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -560,7 +560,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) /// products of both operands. static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -579,8 +579,8 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) { /// products of both operands. static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mulhrs_pi16(__m64 __a, __m64 __b) { - return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), - (__v8hi)__zext128(__b))); + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), + (__v8hi)__zext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index d554c5f4035b7..3e417b3d9d25c 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1012,7 +1012,7 @@ __m256i test_mm256_mulhrs_epi16(__m256i a, __m256i b) { // CHECK: call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_mulhrs_epi16(a, b); } -TEST_CONSTEXPR(match_v16hi(_mm256_mulhrs_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2)); +TEST_CONSTEXPR(match_v16hi(_mm256_mulhrs_epi16((__m256i)(__v16hi){+100, +200, -300, -400, +500, +600, -700, +800, -900, -1000, +1100, +1200, -1300, -1400, +1500, +1600}, (__m256i)(__v16hi){+1600, -1500, +1400, -1300, +1200, -1100, +1000, -900, +800, -700, +600, -500, +400, -300, +200, -100}), +5, -9, -13, +16, +18, -20, -21, -22, -22, +21, +20, -18, -16, +13, +9, -5)); __m256i test_mm256_mullo_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mullo_epi16 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 8970eb233c551..e6dbed6fe38f4 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1587,7 +1587,7 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.pmul.hr.sw.512 return _mm512_mulhrs_epi16(__A,__B); } -TEST_CONSTEXPR(match_v32hi(_mm512_mulhrs_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2)); +TEST_CONSTEXPR(match_v32hi(_mm512_mulhrs_epi16((__m512i)(__v32hi){+100, +200, -300, -400, +500, +600, -700, +800, -900, -1000, +1100, +1200, -1300, -1400, +1500, +1600, -1700, -1800, +1900, +2000, -2100, -2200, +2300, +2400, -2500, -2600, +2700, +2800, -2900, -3000, +3100, +3200}, (__m512i)(__v32hi){+3200, -3100, +3000, -2900, +2800, -2700, +2600, -2500, +2400, -2300, +2200, -2100, +2000, -1900, +1800, -1700, +1600, -1500, +1400, -1300, +1200, -1100, +1000, -900, +800, -700, +600, -500, +400, -300, +200, -100}), +10, -19, -27, +35, +43, -49, -56, -61, -66, +70, +74, -77, -79, +81, +82, -83, -83, +82, +81, -79, -77, +74, +70, -66, -61, +56, +49, -43, -35, +27, +19, -10)); __m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_mulhrs_epi16 @@ -1595,7 +1595,7 @@ __m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_mulhrs_epi16(__W,__U,__A,__B); } -TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhrs_epi16(_mm512_set1_epi16(1), 0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)); +TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhrs_epi16(_mm512_set1_epi16(1), 0x0000FFFF, (__m512i)(__v32hi){+100, +200, -300, -400, +500, +600, -700, +800, -900, -1000, +1100, +1200, -1300, -1400, +1500, +1600, -1700, -1800, +1900, +2000, -2100, -2200, +2300, +2400, -2500, -2600, +2700, +2800, -2900, -3000, +3100, +3200}, (__m512i)(__v32hi){+3200, -3100, +3000, -2900, +2800, -2700, +2600, -2500, +2400, -2300, +2200, -2100, +2000, -1900, +1800, -1700, +1600, -1500, +1400, -1300, +1200, -1100, +1000, -900, +800, -700, +600, -500, +400, -300, +200, -100}), +10, -19, -27, +35, +43, -49, -56, -61, -66, +70, +74, -77, -79, +81, +82, -83, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1, +1)); __m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_mulhrs_epi16 @@ -1603,7 +1603,7 @@ __m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_mulhrs_epi16(__U,__A,__B); } -TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhrs_epi16(0x0000FFFF, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhrs_epi16(0x0000FFFF, (__m512i)(__v32hi){+100, +200, -300, -400, +500, +600, -700, +800, -900, -1000, +1100, +1200, -1300, -1400, +1500, +1600, -1700, -1800, +1900, +2000, -2100, -2200, +2300, +2400, -2500, -2600, +2700, +2800, -2900, -3000, +3100, +3200}, (__m512i)(__v32hi){+3200, -3100, +3000, -2900, +2800, -2700, +2600, -2500, +2400, -2300, +2200, -2100, +2000, -1900, +1800, -1700, +1600, -1500, +1400, -1300, +1200, -1100, +1000, -900, +800, -700, +600, -500, +400, -300, +200, -100}), +10, -19, -27, +35, +43, -49, -56, -61, -66, +70, +74, -77, -79, +81, +82, -83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mulhi_epi16 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d22d92509d45b..f36f9e6cbc9a3 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -2040,7 +2040,7 @@ __m128i test_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_mulhrs_epi16(__W, __U, __X, __Y); } -TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhrs_epi16(_mm_set1_epi16(1), 0x0F, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 1, 1, 1, 1)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhrs_epi16(_mm_set1_epi16(1), 0x0F, (__m128i)(__v8hi){+100, +200, -300, -400, +500, +600, -700, +800}, (__m128i)(__v8hi){+8000, -7000, +6000, -5000, +4000, -3000, +2000, -1000}), +24, -43, -55, +61, +1, +1, +1, +1)); __m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK-LABEL: test_mm_maskz_mulhrs_epi16 @@ -2048,7 +2048,7 @@ __m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_mulhrs_epi16(__U, __X, __Y); } -TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhrs_epi16(0x0F, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhrs_epi16(0x0F, (__m128i)(__v8hi){+100, +200, -300, -400, +500, +600, -700, +800}, (__m128i)(__v8hi){+8000, -7000, +6000, -5000, +4000, -3000, +2000, -1000}), +24, -43, -55, +61, 0, 0, 0, 0)); __m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_mask_mulhrs_epi16 @@ -2056,7 +2056,7 @@ __m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_mulhrs_epi16(__W, __U, __X, __Y); } -TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhrs_epi16(_mm256_set1_epi16(1), 0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhrs_epi16(_mm256_set1_epi16(1), 0xF00F, (__m256i)(__v16hi){+100, +200, -300, -400, +500, +600, -700, +800, -900, -1000, +1100, +1200, -1300, -1400, +1500, +1600}, (__m256i)(__v16hi){+1600, -1500, +1400, -1300, +1200, -1100, +1000, -900, +800, -700, +600, -500, +400, -300, +200, -100}), +5, -9, -13, +16, +1, +1, +1, +1, +1, +1, +1, +1, -16, +13, +9, -5)); __m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { // CHECK-LABEL: test_mm256_maskz_mulhrs_epi16 @@ -2064,7 +2064,7 @@ __m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_mulhrs_epi16(__U, __X, __Y); } -TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mulhrs_epi16(0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mulhrs_epi16(0xF00F, (__m256i)(__v16hi){+100, +200, -300, -400, +500, +600, -700, +800, -900, -1000, +1100, +1200, -1300, -1400, +1500, +1600}, (__m256i)(__v16hi){+1600, -1500, +1400, -1300, +1200, -1100, +1000, -900, +800, -700, +600, -500, +400, -300, +200, -100}), +5, -9, -13, +16, 0, 0, 0, 0, 0, 0, 0, 0, -16, +13, +9, -5)); __m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_mulhi_epu16 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 75ba9f3821d97..252386d3b9a76 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -428,7 +428,7 @@ __m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128( return _mm_mulhrs_pi16(a, b); } -TEST_CONSTEXPR(match_v4hi(_mm_mulhrs_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 2, 2, 0, 0)); +TEST_CONSTEXPR(match_v4hi(_mm_mulhrs_pi16((__m64)(__v4hi){+100, +200, -300, -400}, (__m64)(__v4hi){+30000, -20000, +10000, -5000}), +92, -122, -92, +61)); __m64 test_mm_mullo_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mullo_pi16 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index fb2221d62dba9..21d9e43177aab 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -102,7 +102,7 @@ __m128i test_mm_mulhrs_epi16(__m128i a, __m128i b) { // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_mulhrs_epi16(a, b); } -TEST_CONSTEXPR(match_v4si(_mm_mulhrs_epi16((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), 2, 2, 0, 0)); +TEST_CONSTEXPR(match_v8hi(_mm_mulhrs_epi16((__m128i)(__v8hi){+100, +200, -300, -400, +500, +600, -700, +800}, (__m128i)(__v8hi){+8000, -7000, +6000, -5000, +4000, -3000, +2000, -1000}), +24, -43, -55, +61, +61, -55, -43, -24)); __m128i test_mm_shuffle_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_shuffle_epi8 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
