https://github.com/sskzakaria updated https://github.com/llvm/llvm-project/pull/164026
>From dbcb924813e9d657e7aba3b7a9cd79b7635f63ab Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Fri, 17 Oct 2025 18:27:24 -0400 Subject: [PATCH 1/3] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 integer comparison intrinsics to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 36 ++++----- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 84 ++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 81 +++++++++++++++++++ clang/lib/Headers/avx512vlbwintrin.h | 8 +- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 23 ++++++ 5 files changed, 210 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 62c70fba946be..e409042c5818e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1272,81 +1272,81 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ee18be166845..0ba8fc0cbc203 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3101,6 +3101,62 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC, return true; } + static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID, + bool IsUnsigned) { + assert(Call->getNumArgs() == 4); + + APSInt Mask = popToAPSInt(S, Call->getArg(3)); + APSInt Opcode = popToAPSInt(S, Call->getArg(2)); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &RHS = S.Stk.pop<Pointer>(); + + assert(LHS.getNumElems() == RHS.getNumElems()); + + APInt RetMask = APInt::getZero(LHS.getNumElems()); + unsigned VectorLen = LHS.getNumElems(); + PrimType ElemT = LHS.getFieldDesc()->getPrimType(); + + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + APSInt A = LHS.elem<T>(ElemNum).toAPSInt(); + APSInt B = RHS.elem<T>(ElemNum).toAPSInt(); + bool result = false; + switch (Opcode.getExtValue() & 0x7) { + case 0x00: // _MM_CMPINT_EQ + result = (A == B); + break; + case 0x01: // _MM_CMPINT_LT + result = IsUnsigned ? A.ult(B) : A.slt(B); + break; + case 0x02: // _MM_CMPINT_LE + result = IsUnsigned ? A.ule(B) : A.sle(B); + break; + case 0x03: // _MM_CMPINT_FALSE + result = false; + break; + case 0x04: // _MM_CMPINT_NE + result = (A != B); + break; + case 0x05: // _MM_CMPINT_NLT (>=) + result = IsUnsigned ? A.uge(B) : A.sge(B); + break; + case 0x06: // _MM_CMPINT_NLE (>) + result = IsUnsigned ? A.ugt(B) : A.sgt(B); + break; + case 0x07: // _MM_CMPINT_TRUE + result = true; + break; + } + + RetMask.setBitVal(ElemNum, Mask[ElemNum] && result); + } + }); + + pushInteger(S, RetMask, Call->getType()); + return true; + } + static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, const CallExpr *Call) { assert(Call->getNumArgs() == 1); @@ -4141,6 +4197,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_cmpb128_mask: + case X86::BI__builtin_ia32_cmpw128_mask: + case X86::BI__builtin_ia32_cmpd128_mask: + case X86::BI__builtin_ia32_cmpq128_mask: + case X86::BI__builtin_ia32_cmpb256_mask: + case X86::BI__builtin_ia32_cmpw256_mask: + case X86::BI__builtin_ia32_cmpd256_mask: + case X86::BI__builtin_ia32_cmpq256_mask: + case X86::BI__builtin_ia32_cmpb512_mask: + case X86::BI__builtin_ia32_cmpw512_mask: + case X86::BI__builtin_ia32_cmpd512_mask: + case X86::BI__builtin_ia32_cmpq512_mask: + return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/false); + + case X86::BI__builtin_ia32_ucmpb128_mask: + case X86::BI__builtin_ia32_ucmpw128_mask: + case X86::BI__builtin_ia32_ucmpd128_mask: + case X86::BI__builtin_ia32_ucmpq128_mask: + case X86::BI__builtin_ia32_ucmpb256_mask: + case X86::BI__builtin_ia32_ucmpw256_mask: + case X86::BI__builtin_ia32_ucmpd256_mask: + case X86::BI__builtin_ia32_ucmpq256_mask: + case X86::BI__builtin_ia32_ucmpb512_mask: + case X86::BI__builtin_ia32_ucmpw512_mask: + case X86::BI__builtin_ia32_ucmpd512_mask: + case X86::BI__builtin_ia32_ucmpq512_mask: + return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/true); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 16141b27f4ce8..ef17e16388fd8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15449,6 +15449,87 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1)); return Success(Vec.getVectorElt(Idx).getInt(), E); } + + case clang::X86::BI__builtin_ia32_cmpb128_mask: + case clang::X86::BI__builtin_ia32_cmpw128_mask: + case clang::X86::BI__builtin_ia32_cmpd128_mask: + case clang::X86::BI__builtin_ia32_cmpq128_mask: + case clang::X86::BI__builtin_ia32_cmpb256_mask: + case clang::X86::BI__builtin_ia32_cmpw256_mask: + case clang::X86::BI__builtin_ia32_cmpd256_mask: + case clang::X86::BI__builtin_ia32_cmpq256_mask: + case clang::X86::BI__builtin_ia32_cmpb512_mask: + case clang::X86::BI__builtin_ia32_cmpw512_mask: + case clang::X86::BI__builtin_ia32_cmpd512_mask: + case clang::X86::BI__builtin_ia32_cmpq512_mask: + case clang::X86::BI__builtin_ia32_ucmpb128_mask: + case clang::X86::BI__builtin_ia32_ucmpw128_mask: + case clang::X86::BI__builtin_ia32_ucmpd128_mask: + case clang::X86::BI__builtin_ia32_ucmpq128_mask: + case clang::X86::BI__builtin_ia32_ucmpb256_mask: + case clang::X86::BI__builtin_ia32_ucmpw256_mask: + case clang::X86::BI__builtin_ia32_ucmpd256_mask: + case clang::X86::BI__builtin_ia32_ucmpq256_mask: + case clang::X86::BI__builtin_ia32_ucmpb512_mask: + case clang::X86::BI__builtin_ia32_ucmpw512_mask: + case clang::X86::BI__builtin_ia32_ucmpd512_mask: + case clang::X86::BI__builtin_ia32_ucmpq512_mask: { + assert(E->getNumArgs() == 4); + + bool IsUnsigned = (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask && + BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask); + + APValue LHS, RHS; + APSInt Mask, Opcode; + if (!EvaluateVector(E->getArg(0), LHS, Info) || + !EvaluateVector(E->getArg(1), RHS, Info) || + !EvaluateInteger(E->getArg(2), Opcode, Info) || + !EvaluateInteger(E->getArg(3), Mask, Info)) + return false; + + assert(LHS.getVectorLength() == RHS.getVectorLength()); + + APSInt RetMask = APSInt::getUnsigned(0); + unsigned VectorLen = LHS.getVectorLength(); + + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + APSInt A = LHS.getVectorElt(ElemNum).getInt(); + APSInt B = RHS.getVectorElt(ElemNum).getInt(); + bool result = false; + + switch (Opcode.getExtValue() & 0x7) { + case 0: // _MM_CMPINT_EQ + result = (A == B); + break; + case 1: // _MM_CMPINT_LT + result = IsUnsigned ? A.ult(B) : A.slt(B); + break; + case 2: // _MM_CMPINT_LE + result = IsUnsigned ? A.ule(B) : A.sle(B); + break; + case 3: // _MM_CMPINT_FALSE + result = false; + break; + case 4: // _MM_CMPINT_NE + result = (A != B); + break; + case 5: // _MM_CMPINT_NLT (>=) + result = IsUnsigned ? A.uge(B) : A.sge(B); + break; + case 6: // _MM_CMPINT_NLE (>) + result = IsUnsigned ? A.ugt(B) : A.sgt(B); + break; + case 7: // _MM_CMPINT_TRUE + result = true; + break; + } + + RetMask.setBitVal(ElemNum, Mask[ElemNum] && result); + } + + RetMask.setIsUnsigned(true); + return Success(APValue(RetMask), E); + } } } diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 639fb60f476c6..ff7ee777ea82a 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2385,20 +2385,20 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) (__mmask32) __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_test_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_test_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), @@ -2439,7 +2439,7 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testn_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d569283928a0a..1cc4518484c19 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -2890,6 +2890,12 @@ __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) { return _mm_test_epi8_mask(__A, __B); } +TEST_CONSTEXPR(_mm_test_epi8_mask( + (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} +) +== (__mmask16)0xfffb); + __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_test_epi8_mask // CHECK: and <2 x i64> %{{.*}}, %{{.*}} @@ -2897,6 +2903,12 @@ __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK: and <16 x i1> %{{.*}}, %{{.*}} return _mm_mask_test_epi8_mask(__U, __A, __B); } +TEST_CONSTEXPR(_mm_mask_test_epi8_mask( + 0xFFFF, + (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} +) +== (__mmask16)0xfffb); __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_test_epi8_mask @@ -2904,6 +2916,11 @@ __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) { // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}} return _mm256_test_epi8_mask(__A, __B); } +TEST_CONSTEXPR(_mm256_test_epi8_mask( + (__m256i)(__v32qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + (__m256i)(__v32qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} +) +== (__mmask32)0xfffbfffb); __mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_test_epi8_mask @@ -2950,6 +2967,12 @@ __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) { return _mm_testn_epi8_mask(__A, __B); } +TEST_CONSTEXPR(_mm_testn_epi8_mask( + (__m128i)(__v16qi){1, 2, 77, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 16, 16}, + (__m128i)(__v16qi){2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15} +) +== (__mmask16)0xe001); + __mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_testn_epi8_mask // CHECK: and <2 x i64> %{{.*}}, %{{.*}} >From fec68e4aecd482f216dcf520b94fb60f44075e22 Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Fri, 17 Oct 2025 21:32:39 -0400 Subject: [PATCH 2/3] build error --- clang/include/clang/Basic/BuiltinsX86.td | 54 ++++++++---- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 104 ++++++++++++----------- clang/lib/AST/ExprConstant.cpp | 60 +++++++------ clang/lib/Headers/avx512vlbwintrin.h | 12 +-- 4 files changed, 125 insertions(+), 105 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index e409042c5818e..a2a1746fd1cae 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1272,81 +1272,99 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def cmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def cmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def cmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def cmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def cmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def cmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def ucmpb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, _Constant int, unsigned short)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def ucmpd128_mask : X86Builtin<"unsigned char(_Vector<4, int>, _Vector<4, int>, _Constant int, unsigned char)">; def ucmpq128_mask : X86Builtin<"unsigned char(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def ucmpw128_mask : X86Builtin<"unsigned char(_Vector<8, short>, _Vector<8, short>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def ucmpb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, _Constant int, unsigned int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def ucmpd256_mask : X86Builtin<"unsigned char(_Vector<8, int>, _Vector<8, int>, _Constant int, unsigned char)">; def ucmpq256_mask : X86Builtin<"unsigned char(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def ucmpw256_mask : X86Builtin<"unsigned short(_Vector<16, short>, _Vector<16, short>, _Constant int, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def ucmpb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, _Constant int, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def ucmpd512_mask : X86Builtin<"unsigned short(_Vector<16, int>, _Vector<16, int>, _Constant int, unsigned short)">; def ucmpq512_mask : X86Builtin<"unsigned char(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int, unsigned char)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512bw", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0ba8fc0cbc203..2526c13b529a7 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3101,61 +3101,61 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC, return true; } - static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC, - const CallExpr *Call, unsigned ID, - bool IsUnsigned) { - assert(Call->getNumArgs() == 4); +static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID, + bool IsUnsigned) { + assert(Call->getNumArgs() == 4); - APSInt Mask = popToAPSInt(S, Call->getArg(3)); - APSInt Opcode = popToAPSInt(S, Call->getArg(2)); - const Pointer &LHS = S.Stk.pop<Pointer>(); - const Pointer &RHS = S.Stk.pop<Pointer>(); - - assert(LHS.getNumElems() == RHS.getNumElems()); + APSInt Mask = popToAPSInt(S, Call->getArg(3)); + APSInt Opcode = popToAPSInt(S, Call->getArg(2)); + const Pointer &LHS = S.Stk.pop<Pointer>(); + const Pointer &RHS = S.Stk.pop<Pointer>(); - APInt RetMask = APInt::getZero(LHS.getNumElems()); - unsigned VectorLen = LHS.getNumElems(); - PrimType ElemT = LHS.getFieldDesc()->getPrimType(); + assert(LHS.getNumElems() == RHS.getNumElems()); - INT_TYPE_SWITCH_NO_BOOL(ElemT, { - for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { - APSInt A = LHS.elem<T>(ElemNum).toAPSInt(); - APSInt B = RHS.elem<T>(ElemNum).toAPSInt(); - bool result = false; - switch (Opcode.getExtValue() & 0x7) { - case 0x00: // _MM_CMPINT_EQ - result = (A == B); - break; - case 0x01: // _MM_CMPINT_LT - result = IsUnsigned ? A.ult(B) : A.slt(B); - break; - case 0x02: // _MM_CMPINT_LE - result = IsUnsigned ? A.ule(B) : A.sle(B); - break; - case 0x03: // _MM_CMPINT_FALSE - result = false; - break; - case 0x04: // _MM_CMPINT_NE - result = (A != B); - break; - case 0x05: // _MM_CMPINT_NLT (>=) - result = IsUnsigned ? A.uge(B) : A.sge(B); - break; - case 0x06: // _MM_CMPINT_NLE (>) - result = IsUnsigned ? A.ugt(B) : A.sgt(B); - break; - case 0x07: // _MM_CMPINT_TRUE - result = true; - break; - } + APInt RetMask = APInt::getZero(LHS.getNumElems()); + unsigned VectorLen = LHS.getNumElems(); + PrimType ElemT = LHS.getFieldDesc()->getPrimType(); - RetMask.setBitVal(ElemNum, Mask[ElemNum] && result); + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { + APSInt A = LHS.elem<T>(ElemNum).toAPSInt(); + APSInt B = RHS.elem<T>(ElemNum).toAPSInt(); + bool result = false; + switch (Opcode.getExtValue() & 0x7) { + case 0x00: // _MM_CMPINT_EQ + result = (A == B); + break; + case 0x01: // _MM_CMPINT_LT + result = IsUnsigned ? A.ult(B) : A.slt(B); + break; + case 0x02: // _MM_CMPINT_LE + result = IsUnsigned ? A.ule(B) : A.sle(B); + break; + case 0x03: // _MM_CMPINT_FALSE + result = false; + break; + case 0x04: // _MM_CMPINT_NE + result = (A != B); + break; + case 0x05: // _MM_CMPINT_NLT (>=) + result = IsUnsigned ? A.uge(B) : A.sge(B); + break; + case 0x06: // _MM_CMPINT_NLE (>) + result = IsUnsigned ? A.ugt(B) : A.sgt(B); + break; + case 0x07: // _MM_CMPINT_TRUE + result = true; + break; } - }); - pushInteger(S, RetMask, Call->getType()); - return true; - } + RetMask.setBitVal(ElemNum, Mask[ElemNum] && result); + } + }); + + pushInteger(S, RetMask, Call->getType()); + return true; +} static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, const CallExpr *Call) { @@ -4209,7 +4209,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_cmpw512_mask: case X86::BI__builtin_ia32_cmpd512_mask: case X86::BI__builtin_ia32_cmpq512_mask: - return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/false); + return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, + /*IsUnsigned*/ false); case X86::BI__builtin_ia32_ucmpb128_mask: case X86::BI__builtin_ia32_ucmpw128_mask: @@ -4223,7 +4224,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_ucmpw512_mask: case X86::BI__builtin_ia32_ucmpd512_mask: case X86::BI__builtin_ia32_ucmpq512_mask: - return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned*/true); + return interp__builtin_cmp_mask(S, OpPC, Call, BuiltinID, + /*IsUnsigned*/ true); default: S.FFDiag(S.Current->getLocation(OpPC), diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ef17e16388fd8..ac3e3c51fb983 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15476,8 +15476,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case clang::X86::BI__builtin_ia32_ucmpq512_mask: { assert(E->getNumArgs() == 4); - bool IsUnsigned = (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask && - BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask); + bool IsUnsigned = + (BuiltinOp >= clang::X86::BI__builtin_ia32_ucmpb128_mask && + BuiltinOp <= clang::X86::BI__builtin_ia32_ucmpq512_mask); APValue LHS, RHS; APSInt Mask, Opcode; @@ -15487,41 +15488,44 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, !EvaluateInteger(E->getArg(3), Mask, Info)) return false; - assert(LHS.getVectorLength() == RHS.getVectorLength()); + assert(LHS.getVectorLength() == RHS.getVectorLength()); - APSInt RetMask = APSInt::getUnsigned(0); unsigned VectorLen = LHS.getVectorLength(); + unsigned RetWidth = VectorLen ? VectorLen : 1; + if (Mask.getBitWidth() > RetWidth) + RetWidth = Mask.getBitWidth(); + APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) { APSInt A = LHS.getVectorElt(ElemNum).getInt(); APSInt B = RHS.getVectorElt(ElemNum).getInt(); bool result = false; switch (Opcode.getExtValue() & 0x7) { - case 0: // _MM_CMPINT_EQ - result = (A == B); - break; - case 1: // _MM_CMPINT_LT - result = IsUnsigned ? A.ult(B) : A.slt(B); - break; - case 2: // _MM_CMPINT_LE - result = IsUnsigned ? A.ule(B) : A.sle(B); - break; - case 3: // _MM_CMPINT_FALSE - result = false; - break; - case 4: // _MM_CMPINT_NE - result = (A != B); - break; - case 5: // _MM_CMPINT_NLT (>=) - result = IsUnsigned ? A.uge(B) : A.sge(B); - break; - case 6: // _MM_CMPINT_NLE (>) - result = IsUnsigned ? A.ugt(B) : A.sgt(B); - break; - case 7: // _MM_CMPINT_TRUE - result = true; - break; + case 0: // _MM_CMPINT_EQ + result = (A == B); + break; + case 1: // _MM_CMPINT_LT + result = IsUnsigned ? A.ult(B) : A.slt(B); + break; + case 2: // _MM_CMPINT_LE + result = IsUnsigned ? A.ule(B) : A.sle(B); + break; + case 3: // _MM_CMPINT_FALSE + result = false; + break; + case 4: // _MM_CMPINT_NE + result = (A != B); + break; + case 5: // _MM_CMPINT_NLT (>=) + result = IsUnsigned ? A.uge(B) : A.sge(B); + break; + case 6: // _MM_CMPINT_NLE (>) + result = IsUnsigned ? A.ugt(B) : A.sgt(B); + break; + case 7: // _MM_CMPINT_TRUE + result = true; + break; } RetMask.setBitVal(ElemNum, Mask[ElemNum] && result); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index ff7ee777ea82a..97e48357f3ccc 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -2386,21 +2386,18 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_test_epi8_mask (__m128i __A, __m128i __B) -{ +_mm_test_epi8_mask(__m128i __A, __m128i __B) { return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) -{ +_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_test_epi8_mask (__m256i __A, __m256i __B) -{ +_mm256_test_epi8_mask(__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } @@ -2440,8 +2437,7 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) } static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_testn_epi8_mask (__m128i __A, __m128i __B) -{ +_mm_testn_epi8_mask(__m128i __A, __m128i __B) { return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } >From ee703e034a3e044ba85af6ab1bf3b3144422015c Mon Sep 17 00:00:00 2001 From: sskzakaria <[email protected]> Date: Fri, 17 Oct 2025 23:59:25 -0400 Subject: [PATCH 3/3] fixed argument order --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- clang/test/CodeGen/X86/avx512vlbw-builtins.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 2526c13b529a7..1f428360f73aa 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3108,8 +3108,8 @@ static bool interp__builtin_cmp_mask(InterpState &S, CodePtr OpPC, APSInt Mask = popToAPSInt(S, Call->getArg(3)); APSInt Opcode = popToAPSInt(S, Call->getArg(2)); - const Pointer &LHS = S.Stk.pop<Pointer>(); const Pointer &RHS = S.Stk.pop<Pointer>(); + const Pointer &LHS = S.Stk.pop<Pointer>(); assert(LHS.getNumElems() == RHS.getNumElems()); diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 1cc4518484c19..5c6a343559c52 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -645,6 +645,21 @@ __mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) { return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0); } +TEST_CONSTEXPR(_mm_cmpeq_epi8_mask( + ((__m128i)(__v16qi){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}), + ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}) +) == (__mmask16)0x0000); + +TEST_CONSTEXPR(_mm_cmplt_epi8_mask( + ((__m128i)(__v16qi){5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}), + ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}) +) == (__mmask16)0x0u); + +TEST_CONSTEXPR(_mm_cmple_epi8_mask( + ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}), + ((__m128i)(__v16qi){3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}) +) == (__mmask16)0xffff); + __mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { // CHECK-LABEL: test_mm_mask_cmp_epi8_mask // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
