https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/159998
>From 7b91ea75b304a0b0fb3f14945b5c2f21f035bffe Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 02:40:44 +0800 Subject: [PATCH 01/10] Allow kand* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 22 ++++++++++++++++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 6 ++++++ 9 files changed, 65 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index aac502091b57e..e5555236c7666 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3083,15 +3083,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in { def kadddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kandqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kandhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kandsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 40b9e04aa335c..41d5d76c8c659 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3588,6 +3588,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpd_512: return interp__builtin_select(S, OpPC, Call); + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 12e4e25bd29c4..cd41c54087d08 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13542,6 +13542,20 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info, bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { + + auto HandleMaskBinOp = + [&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn) + -> bool { + APValue LHS, RHS; + if (!Evaluate(LHS, Info, E->getArg(0)) || + !Evaluate(RHS, Info, E->getArg(1))) + return false; + + APSInt ResultInt = Fn(LHS.getInt(), RHS.getInt()); + + return Success(APValue(ResultInt), E); + }; + switch (BuiltinOp) { default: return false; @@ -14640,6 +14654,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, Result.setBitVal(P++, Val[I]); return Success(Result, E); } + + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 599cfbe479676..cfe16062d8f65 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -42,14 +42,13 @@ static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kand_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 95fdc2851cb9b..52b3e9d148d6e 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -35,9 +35,8 @@ _knot_mask8(__mmask8 __M) return __builtin_ia32_knotqi(__M); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kand_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 8ebfb75170e17..9ffde30cba915 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8116,9 +8116,8 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kand (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 + __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 3be708aea8a4d..28e9ff611472a 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -36,6 +36,12 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness +TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated +TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero +TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one +TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint + __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -46,6 +52,12 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCCull, 0xAAAAAAAAAAAAAAAAull) == 0x8888888888888888ull); // data correctness +TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0ull, 0xFFFFFFFFFFFFFFFFull) == 0x123456789ABCDEF0ull); // full 64-bit value +TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789ull, 0x0000000000000000ull) == 0x0000000000000000ull); // all-zero +TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234ull, 0xFFFFFFFFFFFFFFFFull) == 0x56789ABCDEF01234ull); // all-one +TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAAull, 0x5555555555555555ull) == 0x0000000000000000ull); // disjoint + __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index df096e3607f30..cc15517fe5bb5 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -29,6 +29,12 @@ __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } +TEST_CONSTEXPR(_kand_mask8(0x0C, 0x0A) == 0x08); // data correctness +TEST_CONSTEXPR(_kand_mask8(0x123, 0xFF) == 0x23); // should be truncated +TEST_CONSTEXPR(_kand_mask8(0xAB, 0x00) == 0x00); // all-zero +TEST_CONSTEXPR(_kand_mask8(0x56, 0xFF) == 0x56); // all-one +TEST_CONSTEXPR(_kand_mask8(0xAA, 0x55) == 0x00); // disjoint + __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f93216e546a63..70a7ff80f9026 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8502,6 +8502,12 @@ __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_mm512_kand(0xCC, 0xAA) == 0x88); // data correctness +TEST_CONSTEXPR(_mm512_kand(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_mm512_kand(0xABCD, 0x0000) == 0x0000); // all-zero +TEST_CONSTEXPR(_mm512_kand(0x5678, 0xFFFF) == 0x5678); // all-one +TEST_CONSTEXPR(_mm512_kand(0xAAAA, 0x5555) == 0x0000); // disjoint + __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kandn // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From a7f6dad463a3b1386412736a31f6632f2b92876a Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 03:13:30 +0800 Subject: [PATCH 02/10] Allow int2mask, mask2int to be used in constexpr --- clang/lib/Headers/avx512fintrin.h | 10 ++++------ clang/test/CodeGen/X86/avx512f-builtins.c | 4 ++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9ffde30cba915..5a45082f76e81 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -520,15 +520,13 @@ _mm512_castsi512_si256(__m512i __A) { return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_int2mask(int __a) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_int2mask(int __a) { return (__mmask16)__a; } -static __inline__ int __DEFAULT_FN_ATTRS -_mm512_mask2int(__mmask16 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_mask2int(__mmask16 __a) { return (int)__a; } diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 70a7ff80f9026..e7f6b5aac8279 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -11037,6 +11037,8 @@ __mmask16 test_mm512_int2mask(int __a) return _mm512_int2mask(__a); } +TEST_CONSTEXPR(_mm512_int2mask((int)0xDEADBEEF) == 0xBEEF); + int test_mm512_mask2int(__mmask16 __a) { // CHECK-LABEL: test_mm512_mask2int @@ -11044,6 +11046,8 @@ int test_mm512_mask2int(__mmask16 __a) return _mm512_mask2int(__a); } +TEST_CONSTEXPR(_mm512_mask2int(0x8000) == 0x00008000); // Zero-extended + __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: test_mm_mask_move_ss >From 666a158e34aa508d59c548b4ac1e8a763230eff1 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 20:58:35 +0800 Subject: [PATCH 03/10] Add constexpr testcase for _kand_mask16 _kand_mask16 is an alias of _mm512_kand. Although they are semantically identical, the existing test suite contains a dedicated set of tests for _kand_mask16. This commit follows that convention by adding the same constexpr test case for completeness. --- clang/test/CodeGen/X86/avx512f-builtins.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index e7f6b5aac8279..5a131ab6af367 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8656,6 +8656,12 @@ __mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kand_mask16(0xCC, 0xAA) == 0x88); // data correctness +TEST_CONSTEXPR(_kand_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_kand_mask16(0xABCD, 0x0000) == 0x0000); // all-zero +TEST_CONSTEXPR(_kand_mask16(0x5678, 0xFFFF) == 0x5678); // all-one +TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // intersect + __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 03225502b6e66dadf9703338f00fc809357515f9 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 21:16:43 +0800 Subject: [PATCH 04/10] Allow kandn* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 23 +++++++++++++++++----- clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 +++++++++++ 9 files changed, 63 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index e5555236c7666..dc74685299736 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3096,15 +3096,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kandnqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kandnhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kandnsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kandndi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 41d5d76c8c659..28a7e838d8e98 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3596,6 +3596,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index cd41c54087d08..257ffbe8b74e4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14662,6 +14662,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); } + + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index cfe16062d8f65..bdabc2382c5e0 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -52,14 +52,13 @@ _kand_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kandn_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 52b3e9d148d6e..dbc50b464396d 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -40,9 +40,8 @@ _kand_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kandn_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 5a45082f76e81..9dee968fad0a8 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8119,9 +8119,8 @@ static __inline__ __mmask16 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kandn (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kandn(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 28e9ff611472a..a55b913fc859b 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -52,11 +52,11 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCCull, 0xAAAAAAAAAAAAAAAAull) == 0x8888888888888888ull); // data correctness -TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0ull, 0xFFFFFFFFFFFFFFFFull) == 0x123456789ABCDEF0ull); // full 64-bit value -TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789ull, 0x0000000000000000ull) == 0x0000000000000000ull); // all-zero -TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234ull, 0xFFFFFFFFFFFFFFFFull) == 0x56789ABCDEF01234ull); // all-one -TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAAull, 0x5555555555555555ull) == 0x0000000000000000ull); // disjoint +TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA) == 0x8888888888888888); // data correctness +TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0, 0xFFFFFFFFFFFFFFFF) == 0x123456789ABCDEF0); // full 64-bit value +TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789, 0x0000000000000000) == 0x0000000000000000); // all-zero +TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234, 0xFFFFFFFFFFFFFFFF) == 0x56789ABCDEF01234); // all-one +TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); // disjoint __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask32 @@ -68,6 +68,13 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } +// 1100 +// +TEST_CONSTEXPR(_kandn_mask32(0xA0A0F0F0, 0xCCCCCCCC) == 0x4C4C0C0C); // data correctness +TEST_CONSTEXPR(_kandn_mask32(0x123456789, 0xFFFFFFFF) == 0xDCBA9876); // truncated and inverted +TEST_CONSTEXPR(_kandn_mask32(0x00000000, 0x1234ABCD) == 0x1234ABCD); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask32(0xFFFFFFFF, 0x87654321) == 0x00000000); // all-zero (~0xFFFFFFFF) +TEST_CONSTEXPR(_kandn_mask32(0xAAAAAAAA, 0xAAAAAAAA) == 0x00000000); // ~A & A is 0 __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask64 @@ -80,6 +87,12 @@ __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kandn_mask64(0xA0A0F0F0C3C33C3C, 0xCCCCCCCCFFFF0000) == 0x4C4C0C0C3C3C0000); // data correctness +TEST_CONSTEXPR(_kandn_mask64(0x0123456789ABCDEF, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); // inverted with all-ones mask +TEST_CONSTEXPR(_kandn_mask64(0x0, 0x1122334455667788) == 0x1122334455667788); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask64(0xFFFFFFFFFFFFFFFF, 0x8877665544332211) == 0x0); // all-zero (~0xFFFFFFFFFFFFFFFF) +TEST_CONSTEXPR(_kandn_mask64(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA) == 0x0); // ~A & A is 0 + __mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index cc15517fe5bb5..6a6d6611dc18e 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -46,6 +46,12 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kandn_mask8(0xC3, 0xA5) == 0x24); // data correctness +TEST_CONSTEXPR(_kandn_mask8(0x1F0, 0xFF) == 0x0F); // truncated (to 0xF0) and inverted +TEST_CONSTEXPR(_kandn_mask8(0x00, 0xB7) == 0xB7); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask8(0xFF, 0x7E) == 0x00); // all-zero (~0xFF) +TEST_CONSTEXPR(_kandn_mask8(0x55, 0x55) == 0x00); // ~A & A is 0 + __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 5a131ab6af367..b18ba76473360 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8520,6 +8520,12 @@ __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_mm512_kandn(0xCC, 0xAA) == 0x22); // data correctness +TEST_CONSTEXPR(_mm512_kandn(0x12345, 0xFFFF) == 0xDCBA); // truncated and inverted +TEST_CONSTEXPR(_mm512_kandn(0x0000, 0xABCD) == 0xABCD); // all-ones (~0) +TEST_CONSTEXPR(_mm512_kandn(0xFFFF, 0x5678) == 0x0000); // all-zero (~0xFFFF) +TEST_CONSTEXPR(_mm512_kandn(0xAAAA, 0xAAAA) == 0x0000); // ~A & A is 0 + __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8674,6 +8680,12 @@ __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kandn_mask16(0xCC, 0xAA) == 0x22); // data correctness +TEST_CONSTEXPR(_kandn_mask16(0x12345, 0xFFFF) == 0xDCBA); // truncated and inverted +TEST_CONSTEXPR(_kandn_mask16(0x0000, 0xABCD) == 0xABCD); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask16(0xFFFF, 0x5678) == 0x0000); // all-zero (~0xFFFF) +TEST_CONSTEXPR(_kandn_mask16(0xAAAA, 0xAAAA) == 0x0000); // ~A & A is 0 + __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 2312a1c1ca36b7f36908e642c903ae61001ec228 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 22:20:12 +0800 Subject: [PATCH 05/10] Allow kor* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 20 ++++++++++++++++---- clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 ++++++++++++ 9 files changed, 61 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index dc74685299736..edae7d66f037f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3109,15 +3109,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kandndi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def korqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def korhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def korsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 28a7e838d8e98..0701af2ac49f2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3604,6 +3604,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 257ffbe8b74e4..61682314dfea2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14670,6 +14670,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); } + + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index bdabc2382c5e0..30874f54815a7 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -62,14 +62,13 @@ _kandn_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index dbc50b464396d..1d9b772da7840 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -45,9 +45,8 @@ _kandn_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9dee968fad0a8..28a66df1b7534 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8124,9 +8124,8 @@ _mm512_kandn(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index a55b913fc859b..91916ab5a0081 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -36,11 +36,11 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness +TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated -TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero -TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one -TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint +TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero +TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one +TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask64 @@ -103,6 +103,12 @@ __mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kor_mask32(0xF0F0A5A5, 0x0F0F5A5A) == 0xFFFFFFFF); // data correctness +TEST_CONSTEXPR(_kor_mask32(0x12345ABCD, 0x12345ABCD) == 0x2345ABCD); // truncated +TEST_CONSTEXPR(_kor_mask32(0x1A2B3C4D, 0x00000000) == 0x1A2B3C4D); // all-zero +TEST_CONSTEXPR(_kor_mask32(0xDEADBEEF, 0xFFFFFFFF) == 0xFFFFFFFF); // all-ones +TEST_CONSTEXPR(_kor_mask32(0xAAAAAAAA, 0x55555555) == 0xFFFFFFFF); // disjoint + __mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -113,6 +119,12 @@ __mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kor_mask64(0xF0A5C33C00FF11EE, 0x0F5AC33CFF00EE11) == 0xFFFFC33CFFFFFFFF); // data correctness +TEST_CONSTEXPR(_kor_mask64(0x123456789ABCDEF0, 0x123456789ABCDEF0) == 0x123456789ABCDEF0); // full 64-bit value +TEST_CONSTEXPR(_kor_mask64(0x1122334455667788, 0x0) == 0x1122334455667788); // all-zero +TEST_CONSTEXPR(_kor_mask64(0x8877665544332211, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF); // all-ones +TEST_CONSTEXPR(_kor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0xFFFFFFFFFFFFFFFF); // disjoint + __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 6a6d6611dc18e..49a47021d337c 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -62,6 +62,12 @@ __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m5 __E, __F); } +TEST_CONSTEXPR(_kor_mask8(0xB3, 0x6C) == 0xFF); // data correctness +TEST_CONSTEXPR(_kor_mask8(0x1A5, 0x1A5) == 0xA5); // truncated +TEST_CONSTEXPR(_kor_mask8(0xDE, 0x00) == 0xDE); // all-zero +TEST_CONSTEXPR(_kor_mask8(0x42, 0xFF) == 0xFF); // all-ones +TEST_CONSTEXPR(_kor_mask8(0xAA, 0x55) == 0xFF); // disjoint + __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index b18ba76473360..90d09da8807fe 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8537,6 +8537,12 @@ __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } +TEST_CONSTEXPR(_mm512_kor(0xC1, 0xA8) == 0xE9); // data correctness +TEST_CONSTEXPR(_mm512_kor(0x12345, 0x12345) == 0x2345); // truncated +TEST_CONSTEXPR(_mm512_kor(0xABCD, 0x0000) == 0xABCD); // all-zero +TEST_CONSTEXPR(_mm512_kor(0xABCD, 0xFFFF) == 0xFFFF); // all-ones +TEST_CONSTEXPR(_mm512_kor(0xAAAA, 0x5555) == 0xFFFF); // disjoint + int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: test_mm512_kortestc // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8697,6 +8703,12 @@ __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kor_mask16(0xC1, 0xA8) == 0xE9); // data correctness +TEST_CONSTEXPR(_kor_mask16(0x12345, 0x12345) == 0x2345); // truncated +TEST_CONSTEXPR(_kor_mask16(0xABCD, 0x0000) == 0xABCD); // all-zero +TEST_CONSTEXPR(_kor_mask16(0xABCD, 0xFFFF) == 0xFFFF); // all-ones +TEST_CONSTEXPR(_kor_mask16(0xAAAA, 0x5555) == 0xFFFF); // disjoint + __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 1132cd33804fab47a9a85976238c558c9b602f76 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Tue, 23 Sep 2025 00:42:42 +0800 Subject: [PATCH 06/10] Allow kxnor* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 14 +++++++++++++- 9 files changed, 58 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index edae7d66f037f..0d4034922f3d5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3157,15 +3157,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const] in { def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kxnorqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kxnorhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kxnorsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kxnordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0701af2ac49f2..98bb8932a76b5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3612,6 +3612,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 61682314dfea2..710b4c36499d1 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14678,6 +14678,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); } + + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 30874f54815a7..0c470d0253c80 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -72,14 +72,13 @@ _kor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kxnor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 1d9b772da7840..bf7f620de1629 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -50,9 +50,8 @@ _kor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kxnor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 28a66df1b7534..4b27f20192edb 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8165,9 +8165,8 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B) return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kxnor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kxnor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 91916ab5a0081..0b596b6322d6a 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -136,6 +136,12 @@ __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kxnor_mask32(0x1234ABCD, 0xFFFF0000) == 0x12345432); // data correctness +TEST_CONSTEXPR(_kxnor_mask32(0x123456789ABCDEF0, 0xFFFFFFFF) == 0x9ABCDEF0); // should be truncated to 32 bits +TEST_CONSTEXPR(_kxnor_mask32(0xAABBCCDD, 0x00000000) == 0x55443322); // all-zero mask, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask32(0x87654321, 0xFFFFFFFF) == 0x87654321); // all-one mask, res = LHS +TEST_CONSTEXPR(_kxnor_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // ~A xnor A == 0 + __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -147,6 +153,12 @@ __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kxnor_mask64(0x0123456789ABCDEF, 0xFFFFFFFF00000000) == 0x0123456776543210); // data correctness +TEST_CONSTEXPR(_kxnor_mask64(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F) == 0xFFFFFFFFFFFFFFFF); // full 64 bits +TEST_CONSTEXPR(_kxnor_mask64(0xFEDCBA9876543210, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); // all-one mask, res = LHS +TEST_CONSTEXPR(_kxnor_mask64(0xAABBCCDD11223344, 0x0000000000000000) == 0x55443322EEDDCCBB); // all-zero mask, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); // ~A xnor A == 0 + __mmask32 test_kxor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 49a47021d337c..6480e8c4237ae 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -79,6 +79,12 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kxnor_mask8(0xC5, 0xAF) == 0x95); // data correctness +TEST_CONSTEXPR(_kxnor_mask8(0x1234, 0xFF) == 0x34); // should be truncated to 8 bits +TEST_CONSTEXPR(_kxnor_mask8(0x3A, 0x00) == 0xC5); // all-zero mask, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask8(0xB4, 0xFF) == 0xB4); // all-one mask, res = LHS +TEST_CONSTEXPR(_kxnor_mask8(0xAA, 0x55) == 0x00); // ~A xnor A == 0 + __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 90d09da8807fe..7180ff1b31f6a 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8638,6 +8638,12 @@ __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_mm512_kxnor(0x00C5, 0xFFAF) == 0x95); // data correctness +TEST_CONSTEXPR(_mm512_kxnor(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_mm512_kxnor(0xABCD, 0x0000) == 0x5432); // all-zero, res = ~LHS +TEST_CONSTEXPR(_mm512_kxnor(0x5678, 0xFFFF) == 0x5678); // all-one, res = LHS +TEST_CONSTEXPR(_mm512_kxnor(0xAAAA, 0x5555) == 0x0000); // ~A xnor A == 0 + __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kxor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8672,7 +8678,7 @@ TEST_CONSTEXPR(_kand_mask16(0xCC, 0xAA) == 0x88); // data correctness TEST_CONSTEXPR(_kand_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated TEST_CONSTEXPR(_kand_mask16(0xABCD, 0x0000) == 0x0000); // all-zero TEST_CONSTEXPR(_kand_mask16(0x5678, 0xFFFF) == 0x5678); // all-one -TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // intersect +TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // disjoint __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask16 @@ -8721,6 +8727,12 @@ __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kxnor_mask16(0x00C5, 0xFFAF) == 0x95); // data correctness +TEST_CONSTEXPR(_kxnor_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_kxnor_mask16(0xABCD, 0x0000) == 0x5432); // all-zero, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask16(0x5678, 0xFFFF) == 0x5678); // all-one, res = LHS +TEST_CONSTEXPR(_kxnor_mask16(0xAAAA, 0x5555) == 0x0000); // ~A xnor A == 0 + __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 641a09069d69ef83d7e9def7727336faef1ac2db Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Tue, 23 Sep 2025 01:03:38 +0800 Subject: [PATCH 07/10] Remove too verbose comments Some comments in the constexpr test suite were a bit too verbose. This commit removes them to keep the code concise. --- clang/test/CodeGen/X86/avx512bw-builtins.c | 80 +++++++++++----------- clang/test/CodeGen/X86/avx512dq-builtins.c | 40 +++++------ clang/test/CodeGen/X86/avx512f-builtins.c | 80 +++++++++++----------- 3 files changed, 100 insertions(+), 100 deletions(-) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 0b596b6322d6a..3128078bfef6d 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -36,11 +36,11 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness -TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated -TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero -TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one -TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint +TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); +TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); +TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); +TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); +TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask64 @@ -52,11 +52,11 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA) == 0x8888888888888888); // data correctness -TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0, 0xFFFFFFFFFFFFFFFF) == 0x123456789ABCDEF0); // full 64-bit value -TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789, 0x0000000000000000) == 0x0000000000000000); // all-zero -TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234, 0xFFFFFFFFFFFFFFFF) == 0x56789ABCDEF01234); // all-one -TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); // disjoint +TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA) == 0x8888888888888888); +TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0, 0xFFFFFFFFFFFFFFFF) == 0x123456789ABCDEF0); +TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789, 0x0000000000000000) == 0x0000000000000000); +TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234, 0xFFFFFFFFFFFFFFFF) == 0x56789ABCDEF01234); +TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask32 @@ -70,11 +70,11 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, } // 1100 // -TEST_CONSTEXPR(_kandn_mask32(0xA0A0F0F0, 0xCCCCCCCC) == 0x4C4C0C0C); // data correctness -TEST_CONSTEXPR(_kandn_mask32(0x123456789, 0xFFFFFFFF) == 0xDCBA9876); // truncated and inverted -TEST_CONSTEXPR(_kandn_mask32(0x00000000, 0x1234ABCD) == 0x1234ABCD); // all-ones (~0) -TEST_CONSTEXPR(_kandn_mask32(0xFFFFFFFF, 0x87654321) == 0x00000000); // all-zero (~0xFFFFFFFF) -TEST_CONSTEXPR(_kandn_mask32(0xAAAAAAAA, 0xAAAAAAAA) == 0x00000000); // ~A & A is 0 +TEST_CONSTEXPR(_kandn_mask32(0xA0A0F0F0, 0xCCCCCCCC) == 0x4C4C0C0C); +TEST_CONSTEXPR(_kandn_mask32(0x123456789, 0xFFFFFFFF) == 0xDCBA9876); +TEST_CONSTEXPR(_kandn_mask32(0x00000000, 0x1234ABCD) == 0x1234ABCD); +TEST_CONSTEXPR(_kandn_mask32(0xFFFFFFFF, 0x87654321) == 0x00000000); +TEST_CONSTEXPR(_kandn_mask32(0xAAAAAAAA, 0xAAAAAAAA) == 0x00000000); __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask64 @@ -87,11 +87,11 @@ __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } -TEST_CONSTEXPR(_kandn_mask64(0xA0A0F0F0C3C33C3C, 0xCCCCCCCCFFFF0000) == 0x4C4C0C0C3C3C0000); // data correctness -TEST_CONSTEXPR(_kandn_mask64(0x0123456789ABCDEF, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); // inverted with all-ones mask -TEST_CONSTEXPR(_kandn_mask64(0x0, 0x1122334455667788) == 0x1122334455667788); // all-ones (~0) -TEST_CONSTEXPR(_kandn_mask64(0xFFFFFFFFFFFFFFFF, 0x8877665544332211) == 0x0); // all-zero (~0xFFFFFFFFFFFFFFFF) -TEST_CONSTEXPR(_kandn_mask64(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA) == 0x0); // ~A & A is 0 +TEST_CONSTEXPR(_kandn_mask64(0xA0A0F0F0C3C33C3C, 0xCCCCCCCCFFFF0000) == 0x4C4C0C0C3C3C0000); +TEST_CONSTEXPR(_kandn_mask64(0x0123456789ABCDEF, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); +TEST_CONSTEXPR(_kandn_mask64(0x0, 0x1122334455667788) == 0x1122334455667788); +TEST_CONSTEXPR(_kandn_mask64(0xFFFFFFFFFFFFFFFF, 0x8877665544332211) == 0x0); +TEST_CONSTEXPR(_kandn_mask64(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA) == 0x0); __mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask32 @@ -103,11 +103,11 @@ __mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } -TEST_CONSTEXPR(_kor_mask32(0xF0F0A5A5, 0x0F0F5A5A) == 0xFFFFFFFF); // data correctness -TEST_CONSTEXPR(_kor_mask32(0x12345ABCD, 0x12345ABCD) == 0x2345ABCD); // truncated -TEST_CONSTEXPR(_kor_mask32(0x1A2B3C4D, 0x00000000) == 0x1A2B3C4D); // all-zero -TEST_CONSTEXPR(_kor_mask32(0xDEADBEEF, 0xFFFFFFFF) == 0xFFFFFFFF); // all-ones -TEST_CONSTEXPR(_kor_mask32(0xAAAAAAAA, 0x55555555) == 0xFFFFFFFF); // disjoint +TEST_CONSTEXPR(_kor_mask32(0xF0F0A5A5, 0x0F0F5A5A) == 0xFFFFFFFF); +TEST_CONSTEXPR(_kor_mask32(0x12345ABCD, 0x12345ABCD) == 0x2345ABCD); +TEST_CONSTEXPR(_kor_mask32(0x1A2B3C4D, 0x00000000) == 0x1A2B3C4D); +TEST_CONSTEXPR(_kor_mask32(0xDEADBEEF, 0xFFFFFFFF) == 0xFFFFFFFF); +TEST_CONSTEXPR(_kor_mask32(0xAAAAAAAA, 0x55555555) == 0xFFFFFFFF); __mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask64 @@ -119,11 +119,11 @@ __mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } -TEST_CONSTEXPR(_kor_mask64(0xF0A5C33C00FF11EE, 0x0F5AC33CFF00EE11) == 0xFFFFC33CFFFFFFFF); // data correctness -TEST_CONSTEXPR(_kor_mask64(0x123456789ABCDEF0, 0x123456789ABCDEF0) == 0x123456789ABCDEF0); // full 64-bit value -TEST_CONSTEXPR(_kor_mask64(0x1122334455667788, 0x0) == 0x1122334455667788); // all-zero -TEST_CONSTEXPR(_kor_mask64(0x8877665544332211, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF); // all-ones -TEST_CONSTEXPR(_kor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0xFFFFFFFFFFFFFFFF); // disjoint +TEST_CONSTEXPR(_kor_mask64(0xF0A5C33C00FF11EE, 0x0F5AC33CFF00EE11) == 0xFFFFC33CFFFFFFFF); +TEST_CONSTEXPR(_kor_mask64(0x123456789ABCDEF0, 0x123456789ABCDEF0) == 0x123456789ABCDEF0); +TEST_CONSTEXPR(_kor_mask64(0x1122334455667788, 0x0) == 0x1122334455667788); +TEST_CONSTEXPR(_kor_mask64(0x8877665544332211, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF); +TEST_CONSTEXPR(_kor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0xFFFFFFFFFFFFFFFF); __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask32 @@ -136,11 +136,11 @@ __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } -TEST_CONSTEXPR(_kxnor_mask32(0x1234ABCD, 0xFFFF0000) == 0x12345432); // data correctness -TEST_CONSTEXPR(_kxnor_mask32(0x123456789ABCDEF0, 0xFFFFFFFF) == 0x9ABCDEF0); // should be truncated to 32 bits -TEST_CONSTEXPR(_kxnor_mask32(0xAABBCCDD, 0x00000000) == 0x55443322); // all-zero mask, res = ~LHS -TEST_CONSTEXPR(_kxnor_mask32(0x87654321, 0xFFFFFFFF) == 0x87654321); // all-one mask, res = LHS -TEST_CONSTEXPR(_kxnor_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // ~A xnor A == 0 +TEST_CONSTEXPR(_kxnor_mask32(0x1234ABCD, 0xFFFF0000) == 0x12345432); +TEST_CONSTEXPR(_kxnor_mask32(0x123456789ABCDEF0, 0xFFFFFFFF) == 0x9ABCDEF0); +TEST_CONSTEXPR(_kxnor_mask32(0xAABBCCDD, 0x00000000) == 0x55443322); +TEST_CONSTEXPR(_kxnor_mask32(0x87654321, 0xFFFFFFFF) == 0x87654321); +TEST_CONSTEXPR(_kxnor_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask64 @@ -153,11 +153,11 @@ __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } -TEST_CONSTEXPR(_kxnor_mask64(0x0123456789ABCDEF, 0xFFFFFFFF00000000) == 0x0123456776543210); // data correctness -TEST_CONSTEXPR(_kxnor_mask64(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F) == 0xFFFFFFFFFFFFFFFF); // full 64 bits -TEST_CONSTEXPR(_kxnor_mask64(0xFEDCBA9876543210, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); // all-one mask, res = LHS -TEST_CONSTEXPR(_kxnor_mask64(0xAABBCCDD11223344, 0x0000000000000000) == 0x55443322EEDDCCBB); // all-zero mask, res = ~LHS -TEST_CONSTEXPR(_kxnor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); // ~A xnor A == 0 +TEST_CONSTEXPR(_kxnor_mask64(0x0123456789ABCDEF, 0xFFFFFFFF00000000) == 0x0123456776543210); +TEST_CONSTEXPR(_kxnor_mask64(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F) == 0xFFFFFFFFFFFFFFFF); +TEST_CONSTEXPR(_kxnor_mask64(0xFEDCBA9876543210, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); +TEST_CONSTEXPR(_kxnor_mask64(0xAABBCCDD11223344, 0x0000000000000000) == 0x55443322EEDDCCBB); +TEST_CONSTEXPR(_kxnor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); __mmask32 test_kxor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask32 diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 6480e8c4237ae..fa4fd9ddee42e 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -29,11 +29,11 @@ __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } -TEST_CONSTEXPR(_kand_mask8(0x0C, 0x0A) == 0x08); // data correctness -TEST_CONSTEXPR(_kand_mask8(0x123, 0xFF) == 0x23); // should be truncated -TEST_CONSTEXPR(_kand_mask8(0xAB, 0x00) == 0x00); // all-zero -TEST_CONSTEXPR(_kand_mask8(0x56, 0xFF) == 0x56); // all-one -TEST_CONSTEXPR(_kand_mask8(0xAA, 0x55) == 0x00); // disjoint +TEST_CONSTEXPR(_kand_mask8(0x0C, 0x0A) == 0x08); +TEST_CONSTEXPR(_kand_mask8(0x123, 0xFF) == 0x23); +TEST_CONSTEXPR(_kand_mask8(0xAB, 0x00) == 0x00); +TEST_CONSTEXPR(_kand_mask8(0x56, 0xFF) == 0x56); +TEST_CONSTEXPR(_kand_mask8(0xAA, 0x55) == 0x00); __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask8 @@ -46,11 +46,11 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } -TEST_CONSTEXPR(_kandn_mask8(0xC3, 0xA5) == 0x24); // data correctness -TEST_CONSTEXPR(_kandn_mask8(0x1F0, 0xFF) == 0x0F); // truncated (to 0xF0) and inverted -TEST_CONSTEXPR(_kandn_mask8(0x00, 0xB7) == 0xB7); // all-ones (~0) -TEST_CONSTEXPR(_kandn_mask8(0xFF, 0x7E) == 0x00); // all-zero (~0xFF) -TEST_CONSTEXPR(_kandn_mask8(0x55, 0x55) == 0x00); // ~A & A is 0 +TEST_CONSTEXPR(_kandn_mask8(0xC3, 0xA5) == 0x24); +TEST_CONSTEXPR(_kandn_mask8(0x1F0, 0xFF) == 0x0F); +TEST_CONSTEXPR(_kandn_mask8(0x00, 0xB7) == 0xB7); +TEST_CONSTEXPR(_kandn_mask8(0xFF, 0x7E) == 0x00); +TEST_CONSTEXPR(_kandn_mask8(0x55, 0x55) == 0x00); __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask8 @@ -62,11 +62,11 @@ __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m5 __E, __F); } -TEST_CONSTEXPR(_kor_mask8(0xB3, 0x6C) == 0xFF); // data correctness -TEST_CONSTEXPR(_kor_mask8(0x1A5, 0x1A5) == 0xA5); // truncated -TEST_CONSTEXPR(_kor_mask8(0xDE, 0x00) == 0xDE); // all-zero -TEST_CONSTEXPR(_kor_mask8(0x42, 0xFF) == 0xFF); // all-ones -TEST_CONSTEXPR(_kor_mask8(0xAA, 0x55) == 0xFF); // disjoint +TEST_CONSTEXPR(_kor_mask8(0xB3, 0x6C) == 0xFF); +TEST_CONSTEXPR(_kor_mask8(0x1A5, 0x1A5) == 0xA5); +TEST_CONSTEXPR(_kor_mask8(0xDE, 0x00) == 0xDE); +TEST_CONSTEXPR(_kor_mask8(0x42, 0xFF) == 0xFF); +TEST_CONSTEXPR(_kor_mask8(0xAA, 0x55) == 0xFF); __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask8 @@ -79,11 +79,11 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } -TEST_CONSTEXPR(_kxnor_mask8(0xC5, 0xAF) == 0x95); // data correctness -TEST_CONSTEXPR(_kxnor_mask8(0x1234, 0xFF) == 0x34); // should be truncated to 8 bits -TEST_CONSTEXPR(_kxnor_mask8(0x3A, 0x00) == 0xC5); // all-zero mask, res = ~LHS -TEST_CONSTEXPR(_kxnor_mask8(0xB4, 0xFF) == 0xB4); // all-one mask, res = LHS -TEST_CONSTEXPR(_kxnor_mask8(0xAA, 0x55) == 0x00); // ~A xnor A == 0 +TEST_CONSTEXPR(_kxnor_mask8(0xC5, 0xAF) == 0x95); +TEST_CONSTEXPR(_kxnor_mask8(0x1234, 0xFF) == 0x34); +TEST_CONSTEXPR(_kxnor_mask8(0x3A, 0x00) == 0xC5); +TEST_CONSTEXPR(_kxnor_mask8(0xB4, 0xFF) == 0xB4); +TEST_CONSTEXPR(_kxnor_mask8(0xAA, 0x55) == 0x00); __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask8 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 7180ff1b31f6a..5e5b335e755a5 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8502,11 +8502,11 @@ __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } -TEST_CONSTEXPR(_mm512_kand(0xCC, 0xAA) == 0x88); // data correctness -TEST_CONSTEXPR(_mm512_kand(0x12345, 0xFFFF) == 0x2345); // should be truncated -TEST_CONSTEXPR(_mm512_kand(0xABCD, 0x0000) == 0x0000); // all-zero -TEST_CONSTEXPR(_mm512_kand(0x5678, 0xFFFF) == 0x5678); // all-one -TEST_CONSTEXPR(_mm512_kand(0xAAAA, 0x5555) == 0x0000); // disjoint +TEST_CONSTEXPR(_mm512_kand(0xCC, 0xAA) == 0x88); +TEST_CONSTEXPR(_mm512_kand(0x12345, 0xFFFF) == 0x2345); +TEST_CONSTEXPR(_mm512_kand(0xABCD, 0x0000) == 0x0000); +TEST_CONSTEXPR(_mm512_kand(0x5678, 0xFFFF) == 0x5678); +TEST_CONSTEXPR(_mm512_kand(0xAAAA, 0x5555) == 0x0000); __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kandn @@ -8520,11 +8520,11 @@ __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_mm512_kandn(0xCC, 0xAA) == 0x22); // data correctness -TEST_CONSTEXPR(_mm512_kandn(0x12345, 0xFFFF) == 0xDCBA); // truncated and inverted -TEST_CONSTEXPR(_mm512_kandn(0x0000, 0xABCD) == 0xABCD); // all-ones (~0) -TEST_CONSTEXPR(_mm512_kandn(0xFFFF, 0x5678) == 0x0000); // all-zero (~0xFFFF) -TEST_CONSTEXPR(_mm512_kandn(0xAAAA, 0xAAAA) == 0x0000); // ~A & A is 0 +TEST_CONSTEXPR(_mm512_kandn(0xCC, 0xAA) == 0x22); +TEST_CONSTEXPR(_mm512_kandn(0x12345, 0xFFFF) == 0xDCBA); +TEST_CONSTEXPR(_mm512_kandn(0x0000, 0xABCD) == 0xABCD); +TEST_CONSTEXPR(_mm512_kandn(0xFFFF, 0x5678) == 0x0000); +TEST_CONSTEXPR(_mm512_kandn(0xAAAA, 0xAAAA) == 0x0000); __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kor @@ -8537,11 +8537,11 @@ __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } -TEST_CONSTEXPR(_mm512_kor(0xC1, 0xA8) == 0xE9); // data correctness -TEST_CONSTEXPR(_mm512_kor(0x12345, 0x12345) == 0x2345); // truncated -TEST_CONSTEXPR(_mm512_kor(0xABCD, 0x0000) == 0xABCD); // all-zero -TEST_CONSTEXPR(_mm512_kor(0xABCD, 0xFFFF) == 0xFFFF); // all-ones -TEST_CONSTEXPR(_mm512_kor(0xAAAA, 0x5555) == 0xFFFF); // disjoint +TEST_CONSTEXPR(_mm512_kor(0xC1, 0xA8) == 0xE9); +TEST_CONSTEXPR(_mm512_kor(0x12345, 0x12345) == 0x2345); +TEST_CONSTEXPR(_mm512_kor(0xABCD, 0x0000) == 0xABCD); +TEST_CONSTEXPR(_mm512_kor(0xABCD, 0xFFFF) == 0xFFFF); +TEST_CONSTEXPR(_mm512_kor(0xAAAA, 0x5555) == 0xFFFF); int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: test_mm512_kortestc @@ -8638,11 +8638,11 @@ __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_mm512_kxnor(0x00C5, 0xFFAF) == 0x95); // data correctness -TEST_CONSTEXPR(_mm512_kxnor(0x12345, 0xFFFF) == 0x2345); // should be truncated -TEST_CONSTEXPR(_mm512_kxnor(0xABCD, 0x0000) == 0x5432); // all-zero, res = ~LHS -TEST_CONSTEXPR(_mm512_kxnor(0x5678, 0xFFFF) == 0x5678); // all-one, res = LHS -TEST_CONSTEXPR(_mm512_kxnor(0xAAAA, 0x5555) == 0x0000); // ~A xnor A == 0 +TEST_CONSTEXPR(_mm512_kxnor(0x00C5, 0xFFAF) == 0x95); +TEST_CONSTEXPR(_mm512_kxnor(0x12345, 0xFFFF) == 0x2345); +TEST_CONSTEXPR(_mm512_kxnor(0xABCD, 0x0000) == 0x5432); +TEST_CONSTEXPR(_mm512_kxnor(0x5678, 0xFFFF) == 0x5678); +TEST_CONSTEXPR(_mm512_kxnor(0xAAAA, 0x5555) == 0x0000); __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kxor @@ -8674,11 +8674,11 @@ __mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask16(0xCC, 0xAA) == 0x88); // data correctness -TEST_CONSTEXPR(_kand_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated -TEST_CONSTEXPR(_kand_mask16(0xABCD, 0x0000) == 0x0000); // all-zero -TEST_CONSTEXPR(_kand_mask16(0x5678, 0xFFFF) == 0x5678); // all-one -TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // disjoint +TEST_CONSTEXPR(_kand_mask16(0xCC, 0xAA) == 0x88); +TEST_CONSTEXPR(_kand_mask16(0x12345, 0xFFFF) == 0x2345); +TEST_CONSTEXPR(_kand_mask16(0xABCD, 0x0000) == 0x0000); +TEST_CONSTEXPR(_kand_mask16(0x5678, 0xFFFF) == 0x5678); +TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask16 @@ -8692,11 +8692,11 @@ __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } -TEST_CONSTEXPR(_kandn_mask16(0xCC, 0xAA) == 0x22); // data correctness -TEST_CONSTEXPR(_kandn_mask16(0x12345, 0xFFFF) == 0xDCBA); // truncated and inverted -TEST_CONSTEXPR(_kandn_mask16(0x0000, 0xABCD) == 0xABCD); // all-ones (~0) -TEST_CONSTEXPR(_kandn_mask16(0xFFFF, 0x5678) == 0x0000); // all-zero (~0xFFFF) -TEST_CONSTEXPR(_kandn_mask16(0xAAAA, 0xAAAA) == 0x0000); // ~A & A is 0 +TEST_CONSTEXPR(_kandn_mask16(0xCC, 0xAA) == 0x22); +TEST_CONSTEXPR(_kandn_mask16(0x12345, 0xFFFF) == 0xDCBA); +TEST_CONSTEXPR(_kandn_mask16(0x0000, 0xABCD) == 0xABCD); +TEST_CONSTEXPR(_kandn_mask16(0xFFFF, 0x5678) == 0x0000); +TEST_CONSTEXPR(_kandn_mask16(0xAAAA, 0xAAAA) == 0x0000); __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask16 @@ -8709,11 +8709,11 @@ __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } -TEST_CONSTEXPR(_kor_mask16(0xC1, 0xA8) == 0xE9); // data correctness -TEST_CONSTEXPR(_kor_mask16(0x12345, 0x12345) == 0x2345); // truncated -TEST_CONSTEXPR(_kor_mask16(0xABCD, 0x0000) == 0xABCD); // all-zero -TEST_CONSTEXPR(_kor_mask16(0xABCD, 0xFFFF) == 0xFFFF); // all-ones -TEST_CONSTEXPR(_kor_mask16(0xAAAA, 0x5555) == 0xFFFF); // disjoint +TEST_CONSTEXPR(_kor_mask16(0xC1, 0xA8) == 0xE9); +TEST_CONSTEXPR(_kor_mask16(0x12345, 0x12345) == 0x2345); +TEST_CONSTEXPR(_kor_mask16(0xABCD, 0x0000) == 0xABCD); +TEST_CONSTEXPR(_kor_mask16(0xABCD, 0xFFFF) == 0xFFFF); +TEST_CONSTEXPR(_kor_mask16(0xAAAA, 0x5555) == 0xFFFF); __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask16 @@ -8727,11 +8727,11 @@ __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } -TEST_CONSTEXPR(_kxnor_mask16(0x00C5, 0xFFAF) == 0x95); // data correctness -TEST_CONSTEXPR(_kxnor_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated -TEST_CONSTEXPR(_kxnor_mask16(0xABCD, 0x0000) == 0x5432); // all-zero, res = ~LHS -TEST_CONSTEXPR(_kxnor_mask16(0x5678, 0xFFFF) == 0x5678); // all-one, res = LHS -TEST_CONSTEXPR(_kxnor_mask16(0xAAAA, 0x5555) == 0x0000); // ~A xnor A == 0 +TEST_CONSTEXPR(_kxnor_mask16(0x00C5, 0xFFAF) == 0x95); +TEST_CONSTEXPR(_kxnor_mask16(0x12345, 0xFFFF) == 0x2345); +TEST_CONSTEXPR(_kxnor_mask16(0xABCD, 0x0000) == 0x5432); +TEST_CONSTEXPR(_kxnor_mask16(0x5678, 0xFFFF) == 0x5678); +TEST_CONSTEXPR(_kxnor_mask16(0xAAAA, 0x5555) == 0x0000); __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask16 >From a64ac24281e484e984a8eb7b42cdfeaf3ad058c5 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Tue, 23 Sep 2025 01:17:25 +0800 Subject: [PATCH 08/10] Allow kxor* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 15 +++++++++++++-- clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 ++++++++++++ 9 files changed, 58 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0d4034922f3d5..ef676ca24549d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3170,15 +3170,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kxnordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kxorqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kxorhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kxorsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kxordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 98bb8932a76b5..bd13ba75d051e 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3620,6 +3620,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); }); + case X86::BI__builtin_ia32_kxorqi: + case X86::BI__builtin_ia32_kxorhi: + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 710b4c36499d1..58f992aec109c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14686,6 +14686,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); }); } + + case X86::BI__builtin_ia32_kxorqi: + case X86::BI__builtin_ia32_kxorhi: + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 0c470d0253c80..53d8593301140 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -82,14 +82,13 @@ _kxnor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kxor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index bf7f620de1629..a2324a69007a7 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -55,9 +55,8 @@ _kxnor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kxor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 4b27f20192edb..858d22e707214 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8170,9 +8170,8 @@ _mm512_kxnor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kxor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kxor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 3128078bfef6d..fbfd170fe360c 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -68,8 +68,7 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } -// 1100 -// + TEST_CONSTEXPR(_kandn_mask32(0xA0A0F0F0, 0xCCCCCCCC) == 0x4C4C0C0C); TEST_CONSTEXPR(_kandn_mask32(0x123456789, 0xFFFFFFFF) == 0xDCBA9876); TEST_CONSTEXPR(_kandn_mask32(0x00000000, 0x1234ABCD) == 0x1234ABCD); @@ -169,6 +168,12 @@ __mmask32 test_kxor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kxor_mask32(0x1234ABCD, 0xFFFF0000) == 0xEDCBABCD); +TEST_CONSTEXPR(_kxor_mask32(0x123456789ABCDEF0, 0x00000000) == 0x9ABCDEF0); +TEST_CONSTEXPR(_kxor_mask32(0xAABBCCDD, 0x00000000) == 0xAABBCCDD); +TEST_CONSTEXPR(_kxor_mask32(0x87654321, 0xFFFFFFFF) == 0x789ABCDE); +TEST_CONSTEXPR(_kxor_mask32(0xAAAAAAAA, 0x55555555) == 0xFFFFFFFF); + __mmask64 test_kxor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -179,6 +184,12 @@ __mmask64 test_kxor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kxor_mask64(0x0123456789ABCDEF, 0xFFFFFFFF00000000) == 0xFEDCBA9889ABCDEF); +TEST_CONSTEXPR(_kxor_mask64(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F) == 0xFFFFFFFFFFFFFFFF); +TEST_CONSTEXPR(_kxor_mask64(0xFEDCBA9876543210, 0xFFFFFFFFFFFFFFFF) == 0x0123456789ABCDEF); +TEST_CONSTEXPR(_kxor_mask64(0xAABBCCDD11223344, 0x0000000000000000) == 0xAABBCCDD11223344); +TEST_CONSTEXPR(_kxor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0xFFFFFFFFFFFFFFFF); + unsigned char test_kortestz_mask32_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: test_kortestz_mask32_u8 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index fa4fd9ddee42e..ca60ee09245b7 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -95,6 +95,12 @@ __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } +TEST_CONSTEXPR(_kxor_mask8(0xC5, 0xAF) == 0x6A); +TEST_CONSTEXPR(_kxor_mask8(0x1234, 0xFFFF) == 0xCB); +TEST_CONSTEXPR(_kxor_mask8(0xCD, 0x00) == 0xCD); +TEST_CONSTEXPR(_kxor_mask8(0x78, 0xFF) == 0x87); +TEST_CONSTEXPR(_kxor_mask8(0xAA, 0x55) == 0xFF); + unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: test_kortestz_mask8_u8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 5e5b335e755a5..d1dcf7ceee587 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8655,6 +8655,12 @@ __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_mm512_kxor(0xC5, 0xAF) == 0x6A); +TEST_CONSTEXPR(_mm512_kxor(0x12345, 0xFFFF) == 0xDCBA); +TEST_CONSTEXPR(_mm512_kxor(0xABCD, 0x0000) == 0xABCD); +TEST_CONSTEXPR(_mm512_kxor(0x5678, 0xFFFF) == 0xA987); +TEST_CONSTEXPR(_mm512_kxor(0xAAAA, 0x5555) == 0xFFFF); + __mmask16 test_knot_mask16(__mmask16 a) { // CHECK-LABEL: test_knot_mask16 // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8744,6 +8750,12 @@ __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kxor_mask16(0xC5, 0xAF) == 0x6A); +TEST_CONSTEXPR(_kxor_mask16(0x12345, 0xFFFF) == 0xDCBA); +TEST_CONSTEXPR(_kxor_mask16(0xABCD, 0x0000) == 0xABCD); +TEST_CONSTEXPR(_kxor_mask16(0x5678, 0xFFFF) == 0xA987); +TEST_CONSTEXPR(_kxor_mask16(0xAAAA, 0x5555) == 0xFFFF); + __mmask16 test_kshiftli_mask16(__m512i A, __m512i B, __m512i C, __m512i D) { // CHECK-LABEL: test_kshiftli_mask16 // CHECK: [[VAL:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 61acf3713db048ccb9bee1c138b0b7b271da517a Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Tue, 23 Sep 2025 11:15:36 +0800 Subject: [PATCH 09/10] Allow knot* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 16 ++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 11 +++++++++++ clang/lib/Headers/avx512bwintrin.h | 8 ++++---- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 3 +++ clang/test/CodeGen/X86/avx512f-builtins.c | 6 ++++++ 9 files changed, 53 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index ef676ca24549d..4bd700421f0c2 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1216,15 +1216,15 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { def scatterdiv16si : X86Builtin<"void(void *, unsigned char, _Vector<8, long long int>, _Vector<8, int>, _Constant int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def knotqi : X86Builtin<"unsigned char(unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def knothi : X86Builtin<"unsigned short(unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def knotsi : X86Builtin<"unsigned int(unsigned int)">; def knotdi : X86Builtin<"unsigned long long int(unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index bd13ba75d051e..2632e4323c8e5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -659,6 +659,16 @@ static bool interp__builtin_abs(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_knot(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, + const CallExpr *Call) { + APSInt Val = + popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(0)->getType())); + APInt Result = ~Val; + pushInteger(S, APSInt(std::move(Result), true), Call->getType()); + return true; +} + static bool interp__builtin_popcount(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -3628,6 +3638,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; }); + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: + return interp__builtin_knot(S, OpPC, Frame, Call); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 58f992aec109c..610e69d74b44a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14694,6 +14694,17 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return LHS ^ RHS; }); } + + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: { + APSInt Val; + if (!EvaluateInteger(E->getArg(0), Val, Info)) + return false; + APSInt Result = ~Val; + return Success(APValue(Result), E); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 53d8593301140..8e5fa9b5270a7 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -32,13 +32,13 @@ typedef unsigned long long __mmask64; #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif -static __inline __mmask32 __DEFAULT_FN_ATTRS -_knot_mask32(__mmask32 __M) -{ +static __inline __mmask32 + __DEFAULT_FN_ATTRS_CONSTEXPR _knot_mask32(__mmask32 __M) { return __builtin_ia32_knotsi(__M); } -static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { +static __inline __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index a2324a69007a7..8232896c79905 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -29,9 +29,8 @@ #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif -static __inline __mmask8 __DEFAULT_FN_ATTRS -_knot_mask8(__mmask8 __M) -{ +static __inline __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_knot_mask8(__mmask8 __M) { return __builtin_ia32_knotqi(__M); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 858d22e707214..aca43cef57335 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -4403,9 +4403,8 @@ _mm512_store_epi64 (void *__P, __m512i __A) /* Mask ops */ -static __inline __mmask16 __DEFAULT_FN_ATTRS -_mm512_knot(__mmask16 __M) -{ +static __inline __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_knot(__mmask16 __M) { return __builtin_ia32_knothi(__M); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index fbfd170fe360c..2bb3b696a7bb1 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -19,6 +19,9 @@ __mmask32 test_knot_mask32(__mmask32 a) { return _knot_mask32(a); } +TEST_CONSTEXPR(_knot_mask32(0) == 0xFFFFFFFF); +TEST_CONSTEXPR(_knot_mask32(0x123456789) == 0xDCBA9876); + __mmask64 test_knot_mask64(__mmask64 a) { // CHECK-LABEL: test_knot_mask64 // CHECK: [[IN:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -26,6 +29,9 @@ __mmask64 test_knot_mask64(__mmask64 a) { return _knot_mask64(a); } +TEST_CONSTEXPR(_knot_mask64(0) == 0xFFFFFFFFFFFFFFFF); +TEST_CONSTEXPR(_knot_mask64(0xABCDEF0123456789) == 0x543210FEDCBA9876); + __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index ca60ee09245b7..e0b848ddca278 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -19,6 +19,9 @@ __mmask8 test_knot_mask8(__mmask8 a) { return _knot_mask8(a); } +TEST_CONSTEXPR(_knot_mask8(0) == 0xFF); +TEST_CONSTEXPR(_knot_mask8(0x345) == 0xBA); + __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index d1dcf7ceee587..9fd913052a95a 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -470,6 +470,9 @@ __mmask16 test_mm512_knot(__mmask16 a) return _mm512_knot(a); } +TEST_CONSTEXPR(_mm512_knot(0) == 0xFFFF); +TEST_CONSTEXPR(_mm512_knot(0x12345) == 0xDCBA); + __m512i test_mm512_alignr_epi32(__m512i a, __m512i b) { // CHECK-LABEL: test_mm512_alignr_epi32 @@ -8669,6 +8672,9 @@ __mmask16 test_knot_mask16(__mmask16 a) { return _knot_mask16(a); } +TEST_CONSTEXPR(_knot_mask16(0) == 0xFFFF); +TEST_CONSTEXPR(_knot_mask16(0x12345) == 0xDCBA); + __mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From ff71edf117512979e29c5f774955bf73eac429e2 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Tue, 23 Sep 2025 15:40:05 +0800 Subject: [PATCH 10/10] Allow kadd* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 4 ++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 10 ++++------ clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 2 +- 8 files changed, 51 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 4bd700421f0c2..61f35f8c1efe5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3073,12 +3073,12 @@ let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<128 def fpclassss_mask : X86Builtin<"unsigned char(_Vector<4, float>, _Constant int, unsigned char)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kaddqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; def kaddhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kaddsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kadddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 2632e4323c8e5..b1ec06aba0224 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3644,6 +3644,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_knotdi: return interp__builtin_knot(S, OpPC, Frame, Call); + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 610e69d74b44a..6dad7417ad485 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14705,6 +14705,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, APSInt Result = ~Val; return Success(APValue(Result), E); } + + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 8e5fa9b5270a7..707864ae5b1ae 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -160,14 +160,13 @@ _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kadd_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 8232896c79905..fb65bf933b8ad 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -113,15 +113,13 @@ _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kadd_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_kadd_mask16(__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_kadd_mask16(__mmask16 __A, __mmask16 __B) { return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 2bb3b696a7bb1..1271c8707e2dc 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -366,6 +366,12 @@ __mmask32 test_kadd_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kadd_mask32(100000, 200000) == 300000); +TEST_CONSTEXPR(_kadd_mask32(2147483648, 0) == 2147483648); +TEST_CONSTEXPR(_kadd_mask32(0xFFFFFFFF, 1) == 0); +TEST_CONSTEXPR(_kadd_mask32(0xEE6B2800, 0x1DCD6500) == 0x0C388D00); +TEST_CONSTEXPR(_kadd_mask32(0xFFFFFFFA, 10) == 4); + __mmask64 test_kadd_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kadd_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -376,6 +382,12 @@ __mmask64 test_kadd_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kadd_mask64(10000000000, 20000000000) == 30000000000); +TEST_CONSTEXPR(_kadd_mask64(0x8000000000000000, 0) == 0x8000000000000000); +TEST_CONSTEXPR(_kadd_mask64(0xFFFFFFFFFFFFFFFF, 1) == 0); +TEST_CONSTEXPR(_kadd_mask64(0xFFFFFFFFFFFFFFFA, 10) == 4); +TEST_CONSTEXPR(_kadd_mask64(0xFA0A1F2C6C729C00, 0x0DE0B6B3A7640000) == 0x07EAD5E013D69C00); + __mmask32 test_kshiftli_mask32(__m512i A, __m512i B, __m512i C, __m512i D) { // CHECK-LABEL: test_kshiftli_mask32 // CHECK: [[VAL:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index e0b848ddca278..7276c0129bdcd 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -228,6 +228,12 @@ __mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } +TEST_CONSTEXPR(_kadd_mask8(20, 30) == 50); +TEST_CONSTEXPR(_kadd_mask8(128, 0) == 128); +TEST_CONSTEXPR(_kadd_mask8(0xFF, 1) == 0); +TEST_CONSTEXPR(_kadd_mask8(0xC8, 0x64) == 0x2C); +TEST_CONSTEXPR(_kadd_mask8(0xFA, 0x0F) == 0x09); + __mmask16 test_kadd_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kadd_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -238,6 +244,12 @@ __mmask16 test_kadd_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kadd_mask16(1000, 2000) == 3000); +TEST_CONSTEXPR(_kadd_mask16(32768, 0) == 32768); +TEST_CONSTEXPR(_kadd_mask16(0xFFFF, 1) == 0); +TEST_CONSTEXPR(_kadd_mask16(0xEA60, 0x2710) == 0x1170); +TEST_CONSTEXPR(_kadd_mask16(0xFFFA, 0x14) == 0x0E); + __mmask8 test_kshiftli_mask8(__m512i A, __m512i B, __m512i C, __m512i D) { // CHECK-LABEL: test_kshiftli_mask8 // CHECK: [[VAL:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 9fd913052a95a..e7f4ca342f795 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -11106,7 +11106,7 @@ int test_mm512_mask2int(__mmask16 __a) return _mm512_mask2int(__a); } -TEST_CONSTEXPR(_mm512_mask2int(0x8000) == 0x00008000); // Zero-extended +TEST_CONSTEXPR(_mm512_mask2int(0x8000) == 0x00008000); __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
