https://github.com/AdityaC4 updated https://github.com/llvm/llvm-project/pull/161302
>From 0c07c0640c4818c9bcae057ca1c54d87bc2de104 Mon Sep 17 00:00:00 2001 From: AdityaC4 <[email protected]> Date: Mon, 29 Sep 2025 18:42:01 -0500 Subject: [PATCH] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow element extraction/insertion intrinsics to be used in constexpr #159753 rebase/sove merge conflict with latest main --- clang/include/clang/Basic/BuiltinsX86.td | 24 +++---- clang/include/clang/Basic/BuiltinsX86_64.td | 4 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 77 +++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 65 +++++++++++++++++ clang/test/CodeGen/X86/avx-builtins.c | 8 +++ clang/test/CodeGen/X86/mmx-builtins.c | 2 + clang/test/CodeGen/X86/sse2-builtins.c | 2 + clang/test/CodeGen/X86/sse41-builtins.c | 7 ++ 8 files changed, 175 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 77e599587edc3..11e0a6611c5d8 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -52,7 +52,7 @@ def emms : X86Builtin<"void()"> { let Features = "mmx"; } -let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in { +let Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<64>], Features = "sse" in { def vec_ext_v4hi : X86Builtin<"short(_Vector<4, short>, _Constant int)">; def vec_set_v4hi : X86Builtin<"_Vector<4, short>(_Vector<4, short>, short, _Constant int)">; } @@ -92,13 +92,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; } - let Features = "sse2" in { - def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">; - def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">; - def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">; - def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">; - def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">; - } let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">; @@ -108,6 +101,12 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; + + def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">; + def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">; + def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">; + def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">; + def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">; } let Features = "sse3" in { @@ -323,9 +322,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; - def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">; - def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">; - def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">; } let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -338,6 +334,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; + + def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">; + def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">; + def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">; } let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -560,7 +560,7 @@ let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskstoreps : X86Builtin<"void(_Vector<4, float *>, _Vector<4, int>, _Vector<4, float>)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vec_ext_v32qi : X86Builtin<"char(_Vector<32, char>, _Constant int)">; def vec_ext_v16hi : X86Builtin<"short(_Vector<16, short>, _Constant int)">; def vec_ext_v8si : X86Builtin<"int(_Vector<8, int>, _Constant int)">; diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td index 214b175ace5eb..275278c5ac089 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.td +++ b/clang/include/clang/Basic/BuiltinsX86_64.td @@ -56,7 +56,7 @@ let Features = "sse2", Attributes = [NoThrow] in { def movnti64 : X86Builtin<"void(long long int *, long long int)">; } -let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vec_set_v2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int, _Constant int)">; } @@ -64,7 +64,7 @@ let Features = "crc32", Attributes = [NoThrow, Const] in { def crc32di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vec_ext_v4di : X86Builtin<"long long int(_Vector<4, long long int>, _Constant int)">; def vec_set_v4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a2e97fcafdfef..8b88ca44ca16e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2878,6 +2878,60 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID) { + assert(Call->getNumArgs() == 2); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + const Pointer &Vec = S.Stk.pop<Pointer>(); + if (!Vec.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned NumElts = Vec.getNumElems(); + unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1)); + + switch (ID) { + case X86::BI__builtin_ia32_vec_ext_v4sf: + S.Stk.push<Floating>(Vec.elem<Floating>(Index)); + return true; + default: { + PrimType ElemPT = Vec.getFieldDesc()->getPrimType(); + INT_TYPE_SWITCH_NO_BOOL(ElemPT, { + APSInt V = Vec.elem<T>(Index).toAPSInt(); + pushInteger(S, V, Call->getType()); + }); + return true; + } + } +} + +static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID) { + assert(Call->getNumArgs() == 3); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(2)); + APSInt ValAPS = popToAPSInt(S, Call->getArg(1)); + + const Pointer &Base = S.Stk.pop<Pointer>(); + if (!Base.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned NumElts = Base.getNumElems(); + unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1)); + + PrimType ElemPT = Base.getFieldDesc()->getPrimType(); + INT_TYPE_SWITCH_NO_BOOL(ElemPT, { + for (unsigned I = 0; I != NumElts; ++I) + Dst.elem<T>(I) = Base.elem<T>(I); + Dst.elem<T>(Index) = static_cast<T>(ValAPS); + }); + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3686,6 +3740,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_vec_ext_v4hi: + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vec_ext_v32qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_vec_ext_v4sf: + return interp__builtin_vec_ext(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_vec_set_v4hi: + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vec_set_v32qi: + case X86::BI__builtin_ia32_vec_set_v16hi: + case X86::BI__builtin_ia32_vec_set_v8si: + case X86::BI__builtin_ia32_vec_set_v4di: + return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b706b14945b6d..98cdbc03d00aa 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12235,6 +12235,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case clang::X86::BI__builtin_ia32_vec_set_v4hi: + case clang::X86::BI__builtin_ia32_vec_set_v16qi: + case clang::X86::BI__builtin_ia32_vec_set_v8hi: + case clang::X86::BI__builtin_ia32_vec_set_v4si: + case clang::X86::BI__builtin_ia32_vec_set_v2di: + case clang::X86::BI__builtin_ia32_vec_set_v32qi: + case clang::X86::BI__builtin_ia32_vec_set_v16hi: + case clang::X86::BI__builtin_ia32_vec_set_v8si: + case clang::X86::BI__builtin_ia32_vec_set_v4di: { + APValue VecVal; + APSInt Scalar, IndexAPS; + if (!EvaluateVector(E->getArg(0), VecVal, Info) || + !EvaluateInteger(E->getArg(1), Scalar, Info) || + !EvaluateInteger(E->getArg(2), IndexAPS, Info)) + return false; + + QualType ElemTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned ElemWidth = Info.Ctx.getIntWidth(ElemTy); + bool ElemUnsigned = ElemTy->isUnsignedIntegerOrEnumerationType(); + Scalar.setIsUnsigned(ElemUnsigned); + APSInt ElemAPS = Scalar.extOrTrunc(ElemWidth); + APValue ElemAV(ElemAPS); + + unsigned NumElts = VecVal.getVectorLength(); + unsigned Index = + static_cast<unsigned>(IndexAPS.getZExtValue() & (NumElts - 1)); + + SmallVector<APValue, 4> Elts; + Elts.reserve(NumElts); + for (unsigned EltNum = 0; EltNum != NumElts; ++EltNum) + Elts.push_back(EltNum == Index ? ElemAV : VecVal.getVectorElt(EltNum)); + + return Success(APValue(Elts.data(), NumElts), E); + } } } @@ -14822,6 +14857,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); } + + case clang::X86::BI__builtin_ia32_vec_ext_v4hi: + case clang::X86::BI__builtin_ia32_vec_ext_v16qi: + case clang::X86::BI__builtin_ia32_vec_ext_v8hi: + case clang::X86::BI__builtin_ia32_vec_ext_v4si: + case clang::X86::BI__builtin_ia32_vec_ext_v2di: + case clang::X86::BI__builtin_ia32_vec_ext_v32qi: + case clang::X86::BI__builtin_ia32_vec_ext_v16hi: + case clang::X86::BI__builtin_ia32_vec_ext_v8si: + case clang::X86::BI__builtin_ia32_vec_ext_v4di: { + APValue Vec; + APSInt IdxAPS; + if (!EvaluateVector(E->getArg(0), Vec, Info) || + !EvaluateInteger(E->getArg(1), IdxAPS, Info)) + return false; + unsigned N = Vec.getVectorLength(); + unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1)); + return Success(Vec.getVectorElt(Idx).getInt(), E); + } } } @@ -16638,6 +16692,17 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { (void)Result.fusedMultiplyAdd(SourceY, SourceZ, RM); return true; } + + case clang::X86::BI__builtin_ia32_vec_ext_v4sf: { + APValue Vec; + APSInt IdxAPS; + if (!EvaluateVector(E->getArg(0), Vec, Info) || + !EvaluateInteger(E->getArg(1), IdxAPS, Info)) + return false; + unsigned N = Vec.getVectorLength(); + unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1)); + return Success(Vec.getVectorElt(Idx), E); + } } } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 3018bb9719b89..5f08b6be81ab7 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1039,6 +1039,7 @@ int test_mm256_extract_epi8(__m256i A) { // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 31); } +TEST_CONSTEXPR(_mm256_extract_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 45) == 13); int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 @@ -1046,12 +1047,14 @@ int test_mm256_extract_epi16(__m256i A) { // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 15); } +TEST_CONSTEXPR(_mm256_extract_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 50) == 4); int test_mm256_extract_epi32(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi32 // CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7 return _mm256_extract_epi32(A, 7); } +TEST_CONSTEXPR(_mm256_extract_epi32(((__m256i)(__v8si){0, 5, 10, 15, 20, 25, 30, 35}), 18) == 10); #if __x86_64__ long long test_mm256_extract_epi64(__m256i A) { @@ -1059,6 +1062,7 @@ long long test_mm256_extract_epi64(__m256i A) { // X64: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3 return _mm256_extract_epi64(A, 3); } +TEST_CONSTEXPR(_mm256_extract_epi64(((__m256i)(__v4di){5, 15, 25, 35}), 14) == 25); #endif __m128d test_mm256_extractf128_pd(__m256d A) { @@ -1120,18 +1124,21 @@ __m256i test_mm256_insert_epi8(__m256i x, char b) { // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14 return _mm256_insert_epi8(x, b, 14); } +TEST_CONSTEXPR(match_v32qi(_mm256_insert_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 77, 47), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 77, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); __m256i test_mm256_insert_epi16(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi16 // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4 return _mm256_insert_epi16(x, b, 4); } +TEST_CONSTEXPR(match_v16hi(_mm256_insert_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 909, 62), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 909, 30)); __m256i test_mm256_insert_epi32(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi32 // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5 return _mm256_insert_epi32(x, b, 5); } +TEST_CONSTEXPR(match_v8si(_mm256_insert_epi32(((__m256i)(__v8si){ 0, 5, 10, 15, 20, 25, 30, 35}), 4321, 18), 0, 5, 4321, 15, 20, 25, 30, 35)); #if __x86_64__ __m256i test_mm256_insert_epi64(__m256i x, long long b) { @@ -1139,6 +1146,7 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) { // X64: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2 return _mm256_insert_epi64(x, b, 2); } +TEST_CONSTEXPR(match_v4di(_mm256_insert_epi64(((__m256i)(__v4di){5, 15, 25, 35}), -123456789LL, 10), 5, 15, -123456789LL, 35)); #endif __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) { diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 26c5f7315457e..f9ee32e440795 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -292,6 +292,7 @@ int test_mm_extract_pi16(__m64 a) { // CHECK: extractelement <4 x i16> {{%.*}}, i64 2 return _mm_extract_pi16(a, 2); } +TEST_CONSTEXPR(_mm_extract_pi16(((__m64)(__v4hi){10, 20, 30, 40}), 7) == 40); __m64 test_m_from_int(int a) { // CHECK-LABEL: test_m_from_int @@ -347,6 +348,7 @@ __m64 test_mm_insert_pi16(__m64 a, int d) { // CHECK: insertelement <4 x i16> return _mm_insert_pi16(a, d, 2); } +TEST_CONSTEXPR(match_v4hi(_mm_insert_pi16(((__m64)(__v4hi){0, 1, 2, 3}), 77, 10), 0, 1, 77, 3)); __m64 test_mm_madd_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_madd_pi16 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 84b90c09444c2..65bfec39c8f5a 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -723,12 +723,14 @@ int test_mm_extract_epi16(__m128i A) { // CHECK: zext i16 %{{.*}} to i32 return _mm_extract_epi16(A, 1); } +TEST_CONSTEXPR(_mm_extract_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 25) == 10); __m128i test_mm_insert_epi16(__m128i A, int B) { // CHECK-LABEL: test_mm_insert_epi16 // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0 return _mm_insert_epi16(A, B, 0); } +TEST_CONSTEXPR(match_v8hi(_mm_insert_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 555, 17), 0, 555, 20, 30, 40, 50, 60, 70)); void test_mm_lfence(void) { // CHECK-LABEL: test_mm_lfence diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 3c3724643870e..eee479a755ab4 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -231,24 +231,28 @@ int test_mm_extract_epi8(__m128i x) { // CHECK: zext i8 %{{.*}} to i32 return _mm_extract_epi8(x, 1); } +TEST_CONSTEXPR(_mm_extract_epi8(((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 20) == 4); int test_mm_extract_epi32(__m128i x) { // CHECK-LABEL: test_mm_extract_epi32 // CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1 return _mm_extract_epi32(x, 1); } +TEST_CONSTEXPR(_mm_extract_epi32(((__m128i)(__v4si){1, 3, 5, 7}), 10) == 5); long long test_mm_extract_epi64(__m128i x) { // CHECK-LABEL: test_mm_extract_epi64 // CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1 return _mm_extract_epi64(x, 1); } +TEST_CONSTEXPR(_mm_extract_epi64(((__m128i)(__v2di){11, 22}), 5) == 22); int test_mm_extract_ps(__m128 x) { // CHECK-LABEL: test_mm_extract_ps // CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1 return _mm_extract_ps(x, 1); } +TEST_CONSTEXPR(_mm_extract_ps(((__m128){1.25f, 2.5f, 3.75f, 5.0f}), 6) == __builtin_bit_cast(int, 3.75f)); __m128d test_mm_floor_pd(__m128d x) { // CHECK-LABEL: test_mm_floor_pd @@ -279,12 +283,14 @@ __m128i test_mm_insert_epi8(__m128i x, char b) { // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi8(x, b, 1); } +TEST_CONSTEXPR(match_v16qi(_mm_insert_epi8(((__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 101, 33), 0, 101, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); __m128i test_mm_insert_epi32(__m128i x, int b) { // CHECK-LABEL: test_mm_insert_epi32 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi32(x, b, 1); } +TEST_CONSTEXPR(match_v4si(_mm_insert_epi32(((__m128i)(__v4si){0, 1, 2, 3}), 5678, 18), 0, 1, 5678, 3)); #ifdef __x86_64__ __m128i test_mm_insert_epi64(__m128i x, long long b) { @@ -292,6 +298,7 @@ __m128i test_mm_insert_epi64(__m128i x, long long b) { // X64: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi64(x, b, 1); } +TEST_CONSTEXPR(match_v2di(_mm_insert_epi64(((__m128i)(__v2di){100, 200}), -999, 9), 100, -999)); #endif __m128 test_mm_insert_ps(__m128 x, __m128 y) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
