https://github.com/sskzakaria updated https://github.com/llvm/llvm-project/pull/165054
>From 00115fd82621a909b6ee8bf049159fd09da3cba1 Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Fri, 24 Oct 2025 18:01:57 -0400 Subject: [PATCH 1/5] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX512 mask predicate intrinsics --- clang/include/clang/Basic/BuiltinsX86.td | 33 +++++++++++++------- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 33 ++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 27 ++++++++++++++++ clang/lib/Headers/avx512vlbwintrin.h | 10 +++--- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 5 +++ 5 files changed, 91 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 62c70fba946be..c962b28668562 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2502,24 +2502,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">; def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">; def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512dq", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">; def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">; def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">; def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">; } @@ -2539,11 +2543,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">; } @@ -2563,11 +2569,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">; } @@ -3361,15 +3369,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">; } -let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512bw,avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ee18be166845..f4c61b0ae8d06 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3128,6 +3128,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID) { + assert(Call->getNumArgs() == 1); + + const Pointer &Vec = S.Stk.pop<Pointer>(); + APInt RetMask(Vec.getNumElems(), 0); + unsigned VectorLen = Vec.getNumElems(); + PrimType ElemT = Vec.getFieldDesc()->getPrimType(); + + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + APSInt A; + INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); }); + unsigned MSB = A[A.getBitWidth() - 1]; + RetMask.setBitVal(ElemNum, MSB); + } + pushInteger(S, RetMask, Call->getType()); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4141,6 +4160,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: + return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 16141b27f4ce8..75a633a5c6232 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15449,6 +15449,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1)); return Success(Vec.getVectorElt(Idx).getInt(), E); } + + case clang::X86::BI__builtin_ia32_cvtb2mask128: + case clang::X86::BI__builtin_ia32_cvtb2mask256: + case clang::X86::BI__builtin_ia32_cvtb2mask512: + case clang::X86::BI__builtin_ia32_cvtw2mask128: + case clang::X86::BI__builtin_ia32_cvtw2mask256: + case clang::X86::BI__builtin_ia32_cvtw2mask512: + case clang::X86::BI__builtin_ia32_cvtd2mask128: + case clang::X86::BI__builtin_ia32_cvtd2mask256: + case clang::X86::BI__builtin_ia32_cvtd2mask512: + case clang::X86::BI__builtin_ia32_cvtq2mask128: + case clang::X86::BI__builtin_ia32_cvtq2mask256: + case clang::X86::BI__builtin_ia32_cvtq2mask512: { + assert(E->getNumArgs() == 1); + APValue Vec; + if (!EvaluateVector(E->getArg(0), Vec, Info)) + return false; + + unsigned VectorLen = Vec.getVectorLength(); + APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true); + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + const APSInt &A = Vec.getVectorElt(ElemNum).getInt(); + unsigned MSB = A[A.getBitWidth() - 1]; + RetMask.setBitVal(ElemNum, MSB); + } + return Success(APValue(RetMask), E); + } } } diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 639fb60f476c6..d15e4fdc2ffc1 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2492,15 +2492,13 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 -_mm_movepi8_mask (__m128i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi8_mask(__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 -_mm256_movepi8_mask (__m256i __A) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi8_mask(__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d569283928a0a..c38e9a26c3609 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3009,6 +3009,11 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) { return _mm_movepi8_mask(__A); } +TEST_CONSTEXPR(_mm_movepi8_mask( + ((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15}) +) == (__mmask16)0x0000); + __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi8_mask // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer >From 6df958d73c16e75129b2137dd1f5878950a6ce24 Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Fri, 24 Oct 2025 18:57:35 -0400 Subject: [PATCH 2/5] _mm256_movepi8_mask --- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index c38e9a26c3609..45ebc1d819ce7 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3010,9 +3010,9 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) { } TEST_CONSTEXPR(_mm_movepi8_mask( - ((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, + ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) -) == (__mmask16)0x0000); +) == (__mmask16)0x0004); __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi8_mask @@ -3020,6 +3020,13 @@ __mmask32 test_mm256_movepi8_mask(__m256i __A) { return _mm256_movepi8_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi8_mask( + ((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, char(128)}) +) == (__mmask32)0x80000004); + __m128i test_mm_movm_epi8(__mmask16 __A) { // CHECK-LABEL: test_mm_movm_epi8 // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> >From 01d277598e6a660bd3c4c63d7761233c19d6d277 Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Fri, 24 Oct 2025 19:14:54 -0400 Subject: [PATCH 3/5] fixing merge conflict --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 7c3c2fdd7c591..3837764fab96b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3336,6 +3336,9 @@ static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC, RetMask.setBitVal(ElemNum, MSB); } pushInteger(S, RetMask, Call->getType()); + return true; +} + static bool interp__builtin_x86_byteshift( InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I, @@ -4514,6 +4517,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_cvtq2mask256: case X86::BI__builtin_ia32_cvtq2mask512: return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: >From e5e642b0f0f0ea80ca344179cda78eb712e1259e Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Wed, 29 Oct 2025 17:26:19 -0400 Subject: [PATCH 4/5] minor --- clang/lib/AST/ExprConstant.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3a77567189244..46b3baa42e2fe 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15791,6 +15791,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned MSB = A[A.getBitWidth() - 1]; RetMask.setBitVal(ElemNum, MSB); } + return Success(APValue(RetMask), E); + } + case clang::X86::BI__builtin_ia32_cmpb128_mask: case clang::X86::BI__builtin_ia32_cmpw128_mask: case clang::X86::BI__builtin_ia32_cmpd128_mask: >From 7a4694021a6679c495310299d9e8aa6ac04de214 Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Mon, 17 Nov 2025 20:31:18 -0500 Subject: [PATCH 5/5] movepi test coverage --- clang/lib/Headers/avx512bwintrin.h | 10 +++---- clang/lib/Headers/avx512dqintrin.h | 10 +++---- clang/lib/Headers/avx512vlbwintrin.h | 10 +++---- clang/lib/Headers/avx512vldqintrin.h | 20 +++++-------- clang/test/CodeGen/X86/avx512bw-builtins.c | 31 ++++++++++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 4 +++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 8 +++-- clang/test/CodeGen/X86/avx512vldq-builtins.c | 12 ++++++++ 8 files changed, 72 insertions(+), 33 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index ac75b6ccde735..87687fcb0bf7f 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1776,15 +1776,13 @@ _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 -_mm512_movepi8_mask (__m512i __A) -{ +static __inline__ __mmask64 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi8_mask(__m512i __A) { return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 -_mm512_movepi16_mask (__m512i __A) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi16_mask(__m512i __A) { return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index fef1a2d64d538..bcdaea5a75ddd 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1052,9 +1052,8 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(C), (int)(R))) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 -_mm512_movepi32_mask (__m512i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi32_mask(__m512i __A) { return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); } @@ -1070,9 +1069,8 @@ _mm512_movm_epi64 (__mmask8 __A) return (__m512i) __builtin_ia32_cvtmask2q512 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 -_mm512_movepi64_mask (__m512i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_movepi64_mask(__m512i __A) { return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); } diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 65392c070dcde..c46b1d49515b5 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2498,15 +2498,13 @@ _mm256_movepi8_mask(__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi16_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi16_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 -_mm256_movepi16_mask (__m256i __A) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi16_mask(__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 707d039cf4c07..8014ee4bf555a 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -914,15 +914,13 @@ _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) { (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi32_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi32_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 -_mm256_movepi32_mask (__m256i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi32_mask(__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } @@ -950,15 +948,13 @@ _mm256_movm_epi64 (__mmask8 __A) return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 -_mm_movepi64_mask (__m128i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_movepi64_mask(__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 -_mm256_movepi64_mask (__m256i __A) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_movepi64_mask(__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index be2cd480f7558..a6671451c4527 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -2537,6 +2537,18 @@ __mmask64 test_mm512_movepi8_mask(__m512i __A) { return _mm512_movepi8_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi8_mask( + ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, char(255)}) +) == (__mmask64)0x8000000000000004); + + __m512i test_mm512_movm_epi8(__mmask64 __A) { // CHECK-LABEL: test_mm512_movm_epi8 // CHECK: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1> @@ -2544,6 +2556,18 @@ __m512i test_mm512_movm_epi8(__mmask64 __A) { return _mm512_movm_epi8(__A); } +TEST_CONSTEXPR(_mm512_movepi8_mask( + ((__m512i)(__v64qi){0, 1, char(129), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, char(255)}) +) == (__mmask64)0x8000000000000004); + + __m512i test_mm512_movm_epi16(__mmask32 __A) { // CHECK-LABEL: test_mm512_movm_epi16 // CHECK: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1> @@ -2741,6 +2765,13 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) { return _mm512_movepi16_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi16_mask( + ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, short(32768)}) +) == (__mmask32)0x80000004); + void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) { // CHECK-LABEL: test_mm512_mask_cvtepi16_storeu_epi8 diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 9c4ada3a2b7b8..ef34326a9ef91 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -1251,6 +1251,8 @@ __mmask16 test_mm512_movepi32_mask(__m512i __A) { return _mm512_movepi32_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi32_mask(((__m512i)(__v16si){0, 1, -1, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, -1})) == (__mmask16)0x8004); + __m512i test_mm512_movm_epi32(__mmask16 __A) { // CHECK-LABEL: test_mm512_movm_epi32 // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> @@ -1271,6 +1273,8 @@ __mmask8 test_mm512_movepi64_mask(__m512i __A) { return _mm512_movepi64_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi64_mask(((__m512i)(__v8di){0, 1, -1, 3, 4, 5, 6, -1})) == (__mmask8)0x84); + __m512 test_mm512_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: test_mm512_broadcast_f32x2 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index f3c65325c84ac..ab6288d72ee44 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3052,9 +3052,7 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) { } TEST_CONSTEXPR(_mm_movepi8_mask( - ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15}) -) == (__mmask16)0x0004); + ((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})) == (__mmask16)0x0004); __mmask32 test_mm256_movepi8_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi8_mask @@ -3454,6 +3452,7 @@ __mmask8 test_mm_movepi16_mask(__m128i __A) { // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> %{{.*}}, zeroinitializer return _mm_movepi16_mask(__A); } +TEST_CONSTEXPR(_mm_movepi16_mask(((__m128i)(__v8hi){0, 1, -1, 3, 4, 5, 6, 7})) == (__mmask8)0x04); __mmask16 test_mm256_movepi16_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi16_mask @@ -3461,6 +3460,9 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) { return _mm256_movepi16_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi16_mask( + ((__m256i)(__v16hi){0, 1, short(32769), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, short(65535)})) == (__mmask16)0x8004); + __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_shufflehi_epi16 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4> diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index 1bfc0229eeb26..92d8e1aa0879a 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -921,12 +921,21 @@ __mmask8 test_mm_movepi32_mask(__m128i __A) { return _mm_movepi32_mask(__A); } +TEST_CONSTEXPR(_mm512_movepi16_mask( + ((__m512i)(__v32hi){0, 1, short(32768), 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, short(32768)}) +) == (__mmask32)0x80000004); + __mmask8 test_mm256_movepi32_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi32_mask // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer return _mm256_movepi32_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi32_mask(((__m256i)(__v8si){0, 1, -1, 3, 4, 5, 6, -2147483648})) == (__mmask8)0x84); + __m128i test_mm_movm_epi32(__mmask8 __A) { // CHECK-LABEL: test_mm_movm_epi32 // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> @@ -965,6 +974,8 @@ __mmask8 test_mm_movepi64_mask(__m128i __A) { return _mm_movepi64_mask(__A); } +TEST_CONSTEXPR(_mm_movepi64_mask(((__m128i)(__v2di){0, -1})) == (__mmask8)0x02); + __mmask8 test_mm256_movepi64_mask(__m256i __A) { // CHECK-LABEL: test_mm256_movepi64_mask // CHECK: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer @@ -972,6 +983,7 @@ __mmask8 test_mm256_movepi64_mask(__m256i __A) { return _mm256_movepi64_mask(__A); } +TEST_CONSTEXPR(_mm256_movepi64_mask(((__m256i)(__v4di){0, 1, -1, 3})) == (__mmask8)0x04); __m256 test_mm256_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: test_mm256_broadcast_f32x2 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
