https://github.com/fennecJ updated https://github.com/llvm/llvm-project/pull/159998
>From 7b91ea75b304a0b0fb3f14945b5c2f21f035bffe Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 02:40:44 +0800 Subject: [PATCH 1/6] Allow kand* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 22 ++++++++++++++++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 6 ++++++ 9 files changed, 65 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index aac502091b57e..e5555236c7666 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3083,15 +3083,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in { def kadddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kandqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kandhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kandsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 40b9e04aa335c..41d5d76c8c659 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3588,6 +3588,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpd_512: return interp__builtin_select(S, OpPC, Call); + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 12e4e25bd29c4..cd41c54087d08 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13542,6 +13542,20 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info, bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { + + auto HandleMaskBinOp = + [&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn) + -> bool { + APValue LHS, RHS; + if (!Evaluate(LHS, Info, E->getArg(0)) || + !Evaluate(RHS, Info, E->getArg(1))) + return false; + + APSInt ResultInt = Fn(LHS.getInt(), RHS.getInt()); + + return Success(APValue(ResultInt), E); + }; + switch (BuiltinOp) { default: return false; @@ -14640,6 +14654,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, Result.setBitVal(P++, Val[I]); return Success(Result, E); } + + case X86::BI__builtin_ia32_kandqi: + case X86::BI__builtin_ia32_kandhi: + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 599cfbe479676..cfe16062d8f65 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -42,14 +42,13 @@ static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { return __builtin_ia32_knotdi(__M); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kand_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 95fdc2851cb9b..52b3e9d148d6e 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -35,9 +35,8 @@ _knot_mask8(__mmask8 __M) return __builtin_ia32_knotqi(__M); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kand_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kand_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 8ebfb75170e17..9ffde30cba915 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8116,9 +8116,8 @@ _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kand (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 + __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kand(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 3be708aea8a4d..28e9ff611472a 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -36,6 +36,12 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness +TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated +TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero +TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one +TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint + __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -46,6 +52,12 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCCull, 0xAAAAAAAAAAAAAAAAull) == 0x8888888888888888ull); // data correctness +TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0ull, 0xFFFFFFFFFFFFFFFFull) == 0x123456789ABCDEF0ull); // full 64-bit value +TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789ull, 0x0000000000000000ull) == 0x0000000000000000ull); // all-zero +TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234ull, 0xFFFFFFFFFFFFFFFFull) == 0x56789ABCDEF01234ull); // all-one +TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAAull, 0x5555555555555555ull) == 0x0000000000000000ull); // disjoint + __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index df096e3607f30..cc15517fe5bb5 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -29,6 +29,12 @@ __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } +TEST_CONSTEXPR(_kand_mask8(0x0C, 0x0A) == 0x08); // data correctness +TEST_CONSTEXPR(_kand_mask8(0x123, 0xFF) == 0x23); // should be truncated +TEST_CONSTEXPR(_kand_mask8(0xAB, 0x00) == 0x00); // all-zero +TEST_CONSTEXPR(_kand_mask8(0x56, 0xFF) == 0x56); // all-one +TEST_CONSTEXPR(_kand_mask8(0xAA, 0x55) == 0x00); // disjoint + __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f93216e546a63..70a7ff80f9026 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8502,6 +8502,12 @@ __mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_mm512_kand(0xCC, 0xAA) == 0x88); // data correctness +TEST_CONSTEXPR(_mm512_kand(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_mm512_kand(0xABCD, 0x0000) == 0x0000); // all-zero +TEST_CONSTEXPR(_mm512_kand(0x5678, 0xFFFF) == 0x5678); // all-one +TEST_CONSTEXPR(_mm512_kand(0xAAAA, 0x5555) == 0x0000); // disjoint + __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kandn // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From a7f6dad463a3b1386412736a31f6632f2b92876a Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 03:13:30 +0800 Subject: [PATCH 2/6] Allow int2mask, mask2int to be used in constexpr --- clang/lib/Headers/avx512fintrin.h | 10 ++++------ clang/test/CodeGen/X86/avx512f-builtins.c | 4 ++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9ffde30cba915..5a45082f76e81 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -520,15 +520,13 @@ _mm512_castsi512_si256(__m512i __A) { return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_int2mask(int __a) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_int2mask(int __a) { return (__mmask16)__a; } -static __inline__ int __DEFAULT_FN_ATTRS -_mm512_mask2int(__mmask16 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_mask2int(__mmask16 __a) { return (int)__a; } diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 70a7ff80f9026..e7f6b5aac8279 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -11037,6 +11037,8 @@ __mmask16 test_mm512_int2mask(int __a) return _mm512_int2mask(__a); } +TEST_CONSTEXPR(_mm512_int2mask((int)0xDEADBEEF) == 0xBEEF); + int test_mm512_mask2int(__mmask16 __a) { // CHECK-LABEL: test_mm512_mask2int @@ -11044,6 +11046,8 @@ int test_mm512_mask2int(__mmask16 __a) return _mm512_mask2int(__a); } +TEST_CONSTEXPR(_mm512_mask2int(0x8000) == 0x00008000); // Zero-extended + __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: test_mm_mask_move_ss >From 666a158e34aa508d59c548b4ac1e8a763230eff1 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 20:58:35 +0800 Subject: [PATCH 3/6] Add constexpr testcase for _kand_mask16 _kand_mask16 is an alias of _mm512_kand. Although they are semantically identical, the existing test suite contains a dedicated set of tests for _kand_mask16. This commit follows that convention by adding the same constexpr test case for completeness. --- clang/test/CodeGen/X86/avx512f-builtins.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index e7f6b5aac8279..5a131ab6af367 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8656,6 +8656,12 @@ __mmask16 test_kand_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_kand_mask16(0xCC, 0xAA) == 0x88); // data correctness +TEST_CONSTEXPR(_kand_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_kand_mask16(0xABCD, 0x0000) == 0x0000); // all-zero +TEST_CONSTEXPR(_kand_mask16(0x5678, 0xFFFF) == 0x5678); // all-one +TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // intersect + __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 03225502b6e66dadf9703338f00fc809357515f9 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 21:16:43 +0800 Subject: [PATCH 4/6] Allow kandn* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 23 +++++++++++++++++----- clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 +++++++++++ 9 files changed, 63 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index e5555236c7666..dc74685299736 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3096,15 +3096,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kanddi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kandnqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kandnhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kandnsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kandndi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 41d5d76c8c659..28a7e838d8e98 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3596,6 +3596,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index cd41c54087d08..257ffbe8b74e4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14662,6 +14662,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return LHS & RHS; }); } + + case X86::BI__builtin_ia32_kandnqi: + case X86::BI__builtin_ia32_kandnhi: + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index cfe16062d8f65..bdabc2382c5e0 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -52,14 +52,13 @@ _kand_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kandn_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 52b3e9d148d6e..dbc50b464396d 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -40,9 +40,8 @@ _kand_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kandn_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kandn_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 5a45082f76e81..9dee968fad0a8 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8119,9 +8119,8 @@ static __inline__ __mmask16 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kandn (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kandn(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 28e9ff611472a..a55b913fc859b 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -52,11 +52,11 @@ __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCCull, 0xAAAAAAAAAAAAAAAAull) == 0x8888888888888888ull); // data correctness -TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0ull, 0xFFFFFFFFFFFFFFFFull) == 0x123456789ABCDEF0ull); // full 64-bit value -TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789ull, 0x0000000000000000ull) == 0x0000000000000000ull); // all-zero -TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234ull, 0xFFFFFFFFFFFFFFFFull) == 0x56789ABCDEF01234ull); // all-one -TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAAull, 0x5555555555555555ull) == 0x0000000000000000ull); // disjoint +TEST_CONSTEXPR(_kand_mask64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA) == 0x8888888888888888); // data correctness +TEST_CONSTEXPR(_kand_mask64(0x123456789ABCDEF0, 0xFFFFFFFFFFFFFFFF) == 0x123456789ABCDEF0); // full 64-bit value +TEST_CONSTEXPR(_kand_mask64(0xABCDEF0123456789, 0x0000000000000000) == 0x0000000000000000); // all-zero +TEST_CONSTEXPR(_kand_mask64(0x56789ABCDEF01234, 0xFFFFFFFFFFFFFFFF) == 0x56789ABCDEF01234); // all-one +TEST_CONSTEXPR(_kand_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); // disjoint __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask32 @@ -68,6 +68,13 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } +// 1100 +// +TEST_CONSTEXPR(_kandn_mask32(0xA0A0F0F0, 0xCCCCCCCC) == 0x4C4C0C0C); // data correctness +TEST_CONSTEXPR(_kandn_mask32(0x123456789, 0xFFFFFFFF) == 0xDCBA9876); // truncated and inverted +TEST_CONSTEXPR(_kandn_mask32(0x00000000, 0x1234ABCD) == 0x1234ABCD); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask32(0xFFFFFFFF, 0x87654321) == 0x00000000); // all-zero (~0xFFFFFFFF) +TEST_CONSTEXPR(_kandn_mask32(0xAAAAAAAA, 0xAAAAAAAA) == 0x00000000); // ~A & A is 0 __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask64 @@ -80,6 +87,12 @@ __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kandn_mask64(0xA0A0F0F0C3C33C3C, 0xCCCCCCCCFFFF0000) == 0x4C4C0C0C3C3C0000); // data correctness +TEST_CONSTEXPR(_kandn_mask64(0x0123456789ABCDEF, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); // inverted with all-ones mask +TEST_CONSTEXPR(_kandn_mask64(0x0, 0x1122334455667788) == 0x1122334455667788); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask64(0xFFFFFFFFFFFFFFFF, 0x8877665544332211) == 0x0); // all-zero (~0xFFFFFFFFFFFFFFFF) +TEST_CONSTEXPR(_kandn_mask64(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA) == 0x0); // ~A & A is 0 + __mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index cc15517fe5bb5..6a6d6611dc18e 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -46,6 +46,12 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kandn_mask8(0xC3, 0xA5) == 0x24); // data correctness +TEST_CONSTEXPR(_kandn_mask8(0x1F0, 0xFF) == 0x0F); // truncated (to 0xF0) and inverted +TEST_CONSTEXPR(_kandn_mask8(0x00, 0xB7) == 0xB7); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask8(0xFF, 0x7E) == 0x00); // all-zero (~0xFF) +TEST_CONSTEXPR(_kandn_mask8(0x55, 0x55) == 0x00); // ~A & A is 0 + __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 5a131ab6af367..b18ba76473360 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8520,6 +8520,12 @@ __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_mm512_kandn(0xCC, 0xAA) == 0x22); // data correctness +TEST_CONSTEXPR(_mm512_kandn(0x12345, 0xFFFF) == 0xDCBA); // truncated and inverted +TEST_CONSTEXPR(_mm512_kandn(0x0000, 0xABCD) == 0xABCD); // all-ones (~0) +TEST_CONSTEXPR(_mm512_kandn(0xFFFF, 0x5678) == 0x0000); // all-zero (~0xFFFF) +TEST_CONSTEXPR(_mm512_kandn(0xAAAA, 0xAAAA) == 0x0000); // ~A & A is 0 + __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8674,6 +8680,12 @@ __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kandn_mask16(0xCC, 0xAA) == 0x22); // data correctness +TEST_CONSTEXPR(_kandn_mask16(0x12345, 0xFFFF) == 0xDCBA); // truncated and inverted +TEST_CONSTEXPR(_kandn_mask16(0x0000, 0xABCD) == 0xABCD); // all-ones (~0) +TEST_CONSTEXPR(_kandn_mask16(0xFFFF, 0x5678) == 0x0000); // all-zero (~0xFFFF) +TEST_CONSTEXPR(_kandn_mask16(0xAAAA, 0xAAAA) == 0x0000); // ~A & A is 0 + __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 2312a1c1ca36b7f36908e642c903ae61001ec228 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Mon, 22 Sep 2025 22:20:12 +0800 Subject: [PATCH 5/6] Allow kor* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 20 ++++++++++++++++---- clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 ++++++++++++ 9 files changed, 61 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index dc74685299736..edae7d66f037f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3109,15 +3109,15 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kandndi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def korqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def korhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def korsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 28a7e838d8e98..0701af2ac49f2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3604,6 +3604,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 257ffbe8b74e4..61682314dfea2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14670,6 +14670,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return ~LHS & RHS; }); } + + case X86::BI__builtin_ia32_korqi: + case X86::BI__builtin_ia32_korhi: + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index bdabc2382c5e0..30874f54815a7 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -62,14 +62,13 @@ _kandn_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index dbc50b464396d..1d9b772da7840 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -45,9 +45,8 @@ _kandn_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9dee968fad0a8..28a66df1b7534 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8124,9 +8124,8 @@ _mm512_kandn(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index a55b913fc859b..91916ab5a0081 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -36,11 +36,11 @@ __mmask32 test_kand_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } -TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness +TEST_CONSTEXPR(_kand_mask32(0xCCCCCCCC, 0xAAAAAAAA) == 0x88888888); // data correctness TEST_CONSTEXPR(_kand_mask32(0x123456789, 0xFFFFFFFF) == 0x23456789); // should be truncated -TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero -TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one -TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint +TEST_CONSTEXPR(_kand_mask32(0xABCDEF01, 0x00000000) == 0x00000000); // all-zero +TEST_CONSTEXPR(_kand_mask32(0x56789ABC, 0xFFFFFFFF) == 0x56789ABC); // all-one +TEST_CONSTEXPR(_kand_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // disjoint __mmask64 test_kand_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kand_mask64 @@ -103,6 +103,12 @@ __mmask32 test_kor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kor_mask32(0xF0F0A5A5, 0x0F0F5A5A) == 0xFFFFFFFF); // data correctness +TEST_CONSTEXPR(_kor_mask32(0x12345ABCD, 0x12345ABCD) == 0x2345ABCD); // truncated +TEST_CONSTEXPR(_kor_mask32(0x1A2B3C4D, 0x00000000) == 0x1A2B3C4D); // all-zero +TEST_CONSTEXPR(_kor_mask32(0xDEADBEEF, 0xFFFFFFFF) == 0xFFFFFFFF); // all-ones +TEST_CONSTEXPR(_kor_mask32(0xAAAAAAAA, 0x55555555) == 0xFFFFFFFF); // disjoint + __mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -113,6 +119,12 @@ __mmask64 test_kor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kor_mask64(0xF0A5C33C00FF11EE, 0x0F5AC33CFF00EE11) == 0xFFFFC33CFFFFFFFF); // data correctness +TEST_CONSTEXPR(_kor_mask64(0x123456789ABCDEF0, 0x123456789ABCDEF0) == 0x123456789ABCDEF0); // full 64-bit value +TEST_CONSTEXPR(_kor_mask64(0x1122334455667788, 0x0) == 0x1122334455667788); // all-zero +TEST_CONSTEXPR(_kor_mask64(0x8877665544332211, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF); // all-ones +TEST_CONSTEXPR(_kor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0xFFFFFFFFFFFFFFFF); // disjoint + __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 6a6d6611dc18e..49a47021d337c 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -62,6 +62,12 @@ __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m5 __E, __F); } +TEST_CONSTEXPR(_kor_mask8(0xB3, 0x6C) == 0xFF); // data correctness +TEST_CONSTEXPR(_kor_mask8(0x1A5, 0x1A5) == 0xA5); // truncated +TEST_CONSTEXPR(_kor_mask8(0xDE, 0x00) == 0xDE); // all-zero +TEST_CONSTEXPR(_kor_mask8(0x42, 0xFF) == 0xFF); // all-ones +TEST_CONSTEXPR(_kor_mask8(0xAA, 0x55) == 0xFF); // disjoint + __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index b18ba76473360..90d09da8807fe 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8537,6 +8537,12 @@ __mmask16 test_mm512_kor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m __E, __F); } +TEST_CONSTEXPR(_mm512_kor(0xC1, 0xA8) == 0xE9); // data correctness +TEST_CONSTEXPR(_mm512_kor(0x12345, 0x12345) == 0x2345); // truncated +TEST_CONSTEXPR(_mm512_kor(0xABCD, 0x0000) == 0xABCD); // all-zero +TEST_CONSTEXPR(_mm512_kor(0xABCD, 0xFFFF) == 0xFFFF); // all-ones +TEST_CONSTEXPR(_mm512_kor(0xAAAA, 0x5555) == 0xFFFF); // disjoint + int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: test_mm512_kortestc // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8697,6 +8703,12 @@ __mmask16 test_kor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kor_mask16(0xC1, 0xA8) == 0xE9); // data correctness +TEST_CONSTEXPR(_kor_mask16(0x12345, 0x12345) == 0x2345); // truncated +TEST_CONSTEXPR(_kor_mask16(0xABCD, 0x0000) == 0xABCD); // all-zero +TEST_CONSTEXPR(_kor_mask16(0xABCD, 0xFFFF) == 0xFFFF); // all-ones +TEST_CONSTEXPR(_kor_mask16(0xAAAA, 0x5555) == 0xFFFF); // disjoint + __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From 1132cd33804fab47a9a85976238c558c9b602f76 Mon Sep 17 00:00:00 2001 From: fennecJ <[email protected]> Date: Tue, 23 Sep 2025 00:42:42 +0800 Subject: [PATCH 6/6] Allow kxnor* to be used in constexpr --- clang/include/clang/Basic/BuiltinsX86.td | 6 +++--- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++ clang/lib/AST/ExprConstant.cpp | 8 ++++++++ clang/lib/Headers/avx512bwintrin.h | 9 ++++----- clang/lib/Headers/avx512dqintrin.h | 5 ++--- clang/lib/Headers/avx512fintrin.h | 5 ++--- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512dq-builtins.c | 6 ++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 14 +++++++++++++- 9 files changed, 58 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index edae7d66f037f..0d4034922f3d5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -3157,15 +3157,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const] in { def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr] in { def kxnorqi : X86Builtin<"unsigned char(unsigned char, unsigned char)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kxnorhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kxnorsi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; def kxnordi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0701af2ac49f2..98bb8932a76b5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3612,6 +3612,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, + [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); }); + case Builtin::BI__builtin_elementwise_fshl: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshl); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 61682314dfea2..710b4c36499d1 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14678,6 +14678,14 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return HandleMaskBinOp( [](const APSInt &LHS, const APSInt &RHS) { return LHS | RHS; }); } + + case X86::BI__builtin_ia32_kxnorqi: + case X86::BI__builtin_ia32_kxnorhi: + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: { + return HandleMaskBinOp( + [](const APSInt &LHS, const APSInt &RHS) { return ~(LHS ^ RHS); }); + } } } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 30874f54815a7..0c470d0253c80 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -72,14 +72,13 @@ _kor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_kxnor_mask32(__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask32(__mmask32 __A, __mmask32 __B) { return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask64(__mmask64 __A, __mmask64 __B) { return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); } diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 1d9b772da7840..bf7f620de1629 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -50,9 +50,8 @@ _kor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS -_kxnor_mask8(__mmask8 __A, __mmask8 __B) -{ +static __inline__ __mmask8 __DEFAULT_FN_ATTRS_CONSTEXPR +_kxnor_mask8(__mmask8 __A, __mmask8 __B) { return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 28a66df1b7534..4b27f20192edb 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8165,9 +8165,8 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B) return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kxnor (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kxnor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 91916ab5a0081..0b596b6322d6a 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -136,6 +136,12 @@ __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kxnor_mask32(0x1234ABCD, 0xFFFF0000) == 0x12345432); // data correctness +TEST_CONSTEXPR(_kxnor_mask32(0x123456789ABCDEF0, 0xFFFFFFFF) == 0x9ABCDEF0); // should be truncated to 32 bits +TEST_CONSTEXPR(_kxnor_mask32(0xAABBCCDD, 0x00000000) == 0x55443322); // all-zero mask, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask32(0x87654321, 0xFFFFFFFF) == 0x87654321); // all-one mask, res = LHS +TEST_CONSTEXPR(_kxnor_mask32(0xAAAAAAAA, 0x55555555) == 0x00000000); // ~A xnor A == 0 + __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxnor_mask64 // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> @@ -147,6 +153,12 @@ __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kxnor_mask64(0x0123456789ABCDEF, 0xFFFFFFFF00000000) == 0x0123456776543210); // data correctness +TEST_CONSTEXPR(_kxnor_mask64(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F) == 0xFFFFFFFFFFFFFFFF); // full 64 bits +TEST_CONSTEXPR(_kxnor_mask64(0xFEDCBA9876543210, 0xFFFFFFFFFFFFFFFF) == 0xFEDCBA9876543210); // all-one mask, res = LHS +TEST_CONSTEXPR(_kxnor_mask64(0xAABBCCDD11223344, 0x0000000000000000) == 0x55443322EEDDCCBB); // all-zero mask, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask64(0xAAAAAAAAAAAAAAAA, 0x5555555555555555) == 0x0000000000000000); // ~A xnor A == 0 + __mmask32 test_kxor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask32 // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 49a47021d337c..6480e8c4237ae 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -79,6 +79,12 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __ __E, __F); } +TEST_CONSTEXPR(_kxnor_mask8(0xC5, 0xAF) == 0x95); // data correctness +TEST_CONSTEXPR(_kxnor_mask8(0x1234, 0xFF) == 0x34); // should be truncated to 8 bits +TEST_CONSTEXPR(_kxnor_mask8(0x3A, 0x00) == 0xC5); // all-zero mask, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask8(0xB4, 0xFF) == 0xB4); // all-one mask, res = LHS +TEST_CONSTEXPR(_kxnor_mask8(0xAA, 0x55) == 0x00); // ~A xnor A == 0 + __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask8 // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 90d09da8807fe..7180ff1b31f6a 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -8638,6 +8638,12 @@ __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _ __E, __F); } +TEST_CONSTEXPR(_mm512_kxnor(0x00C5, 0xFFAF) == 0x95); // data correctness +TEST_CONSTEXPR(_mm512_kxnor(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_mm512_kxnor(0xABCD, 0x0000) == 0x5432); // all-zero, res = ~LHS +TEST_CONSTEXPR(_mm512_kxnor(0x5678, 0xFFFF) == 0x5678); // all-one, res = LHS +TEST_CONSTEXPR(_mm512_kxnor(0xAAAA, 0x5555) == 0x0000); // ~A xnor A == 0 + __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kxor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> @@ -8672,7 +8678,7 @@ TEST_CONSTEXPR(_kand_mask16(0xCC, 0xAA) == 0x88); // data correctness TEST_CONSTEXPR(_kand_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated TEST_CONSTEXPR(_kand_mask16(0xABCD, 0x0000) == 0x0000); // all-zero TEST_CONSTEXPR(_kand_mask16(0x5678, 0xFFFF) == 0x5678); // all-one -TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // intersect +TEST_CONSTEXPR(_kand_mask16(0xAAAA, 0x5555) == 0x0000); // disjoint __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kandn_mask16 @@ -8721,6 +8727,12 @@ __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __E, __F); } +TEST_CONSTEXPR(_kxnor_mask16(0x00C5, 0xFFAF) == 0x95); // data correctness +TEST_CONSTEXPR(_kxnor_mask16(0x12345, 0xFFFF) == 0x2345); // should be truncated +TEST_CONSTEXPR(_kxnor_mask16(0xABCD, 0x0000) == 0x5432); // all-zero, res = ~LHS +TEST_CONSTEXPR(_kxnor_mask16(0x5678, 0xFFFF) == 0x5678); // all-one, res = LHS +TEST_CONSTEXPR(_kxnor_mask16(0xAAAA, 0x5555) == 0x0000); // ~A xnor A == 0 + __mmask16 test_kxor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_kxor_mask16 // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
