llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) <details> <summary>Changes</summary> Extends interp__builtin_elementwise_triop to handle (vector, vector, scalar) trinary op intrinsics Fixes #<!-- -->153152 --- Patch is 45.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157681.diff 5 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.td (+14-59) - (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+53-1) - (modified) clang/lib/AST/ExprConstant.cpp (+63) - (modified) clang/test/CodeGen/X86/avx512vbmi2-builtins.c (+18) - (modified) clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c (+36) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b4ff550d27279..597d6fb334f31 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1772,75 +1772,30 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpshldd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; def vpshldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; def vpshldw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; + def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; def vpshrdd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def vpshrdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def vpshrdw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; } -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; + def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; + def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; + def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; + def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; def vpshrdw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a0dcdace854b9..08cd100e79d35 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2851,11 +2851,32 @@ static bool interp__builtin_elementwise_triop( return true; } - // Vector type. const auto *VecT = Arg0Type->castAs<VectorType>(); const PrimType &ElemT = *S.getContext().classify(VecT->getElementType()); unsigned NumElems = VecT->getNumElements(); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + // Vector + Vector + Scalar case. + if (!Arg2Type->isVectorType()) { + APSInt Op2 = popToAPSInt( + S.Stk, *S.getContext().classify(Call->getArg(2)->getType())); + + const Pointer &Op1 = S.Stk.pop<Pointer>(); + const Pointer &Op0 = S.Stk.pop<Pointer>(); + const Pointer &Dst = S.Stk.peek<Pointer>(); + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + Dst.elem<T>(I) = static_cast<T>(APSInt( + Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2), + DestUnsigned)); + }); + } + Dst.initializeAllElements(); + + return true; + } + // Vector type. const Pointer &Op2 = S.Stk.pop<Pointer>(); const Pointer &Op1 = S.Stk.pop<Pointer>(); const Pointer &Op0 = S.Stk.pop<Pointer>(); @@ -3421,6 +3442,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return F; }); + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, + [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) { + return llvm::APIntOps::fshl(Hi, Lo, Amt); + }); + + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + // NOTE: Reversed Hi/Lo operands. + return interp__builtin_elementwise_triop( + S, OpPC, Call, + [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) { + return llvm::APIntOps::fshr(Hi, Lo, Amt); + }); + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: case clang::X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ca930737474df..07eabbe79cbb8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11860,6 +11860,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: { + APValue SourceHi, SourceLo, SourceAmt; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) || + !EvaluateAsRValue(Info, E->getArg(1), SourceLo) || + !EvaluateAsRValue(Info, E->getArg(2), SourceAmt)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceHi.getVectorLength(); + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(SourceLen); + + APInt Amt = SourceAmt.getInt(); + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APInt Hi = SourceHi.getVectorElt(EltNum).getInt(); + APInt Lo = SourceLo.getVectorElt(EltNum).getInt(); + APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt); + ResultElements.push_back( + APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: { + // NOTE: Reversed Hi/Lo operands. + APValue SourceHi, SourceLo, SourceAmt; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLo) || + !EvaluateAsRValue(Info, E->getArg(1), SourceHi) || + !EvaluateAsRValue(Info, E->getArg(2), SourceAmt)) + return false; + + QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType(); + unsigned SourceLen = SourceHi.getVectorLength(); + SmallVector<APValue, 32> ResultElements; + ResultElements.reserve(SourceLen); + + APInt Amt = SourceAmt.getInt(); + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APInt Hi = SourceHi.getVectorElt(EltNum).getInt(); + APInt Lo = SourceLo.getVectorElt(EltNum).getInt(); + APInt R = llvm::APIntOps::fshr(Hi, Lo, Amt); + ResultElements.push_back( + APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index aecd965551ce1..560035598a6e4 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -96,6 +96,7 @@ __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shldi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 38280596832649216LL, -40532396646334464LL, 999, 999, 999, -47287796087390209LL, 999)); __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi64 @@ -103,12 +104,14 @@ __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 38280596832649216LL, -40532396646334464LL, 0, 0, 0, -47287796087390209LL, 0)); __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi64 // CHECK: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shldi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 38280596832649215LL, 38280596832649216LL, -40532396646334464LL, 45035996273704959LL, -42784196460019713LL, 47287796087390208LL, -47287796087390209LL, 54043195528445951LL)); __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi32 @@ -116,6 +119,7 @@ __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shldi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 999, 999, 73727, 999, -73729, -75777, 81919, 999, 86015, 999, 999, 999, 94207, -92161, 999)); __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi32 @@ -123,12 +127,14 @@ __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 0, 0, 73727, 0, -73729, -75777, 81919, 0, 86015, 0, 0, 0, 94207, -92161, 0)); __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi32 // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shldi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 67584, -69632, 73727, 73728, -73729, -75777, 81919, -79873, 86015, 88063, -86017, -88065, 94207, -92161, -96256)); __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi16 @@ -136,6 +142,7 @@ __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 999, -8321, 999, 8704, -8832, 999, -8961, 9216, 9344, 999, 999, -9728, 9983, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 11136, 999, 999, 999, 11775, 11903, 999, -11905, 999)); __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi16 @@ -143,12 +150,14 @@ __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 0, -8321, 0, 8704, -8832, 0, -8961, 9216, 9344, 0, 0, -9728, 9983, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 11136, 0, 0, 0, 11775, 11903, 0, -11905, 0)); __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi16 // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) return _mm512_shldi_epi16(__A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_shldi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 8320, -8321, 8576, 8704, -8832, 8960, -8961, 9216, 9344, -9472, 9727, -9728, 9983, 10111, -10112, -10240, -10241, 10623, -10497, 10752, 11007, 11008, 11136, -11137, -11392, 11647, 11775, 11903, -11777, -11905, 12160)); __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi64 @@ -156,6 +165,7 @@ __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 16384, 32767, 999, 999, 999, -49153, 999)); __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi64 @@ -163,12 +173,14 @@ __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 16384, 32767, 0, 0, 0, -49153, 0)); __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi64 // CHECK: call <8 x i64> @llvm... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/157681 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits