https://github.com/ahmednoursphinx updated https://github.com/llvm/llvm-project/pull/167683
>From e91be48deb3f895f88e9ab6a34ffd730e3fce47f Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Wed, 12 Nov 2025 13:01:46 +0200 Subject: [PATCH 1/8] [clang] Add _mm512_kunpackd and _mm512_kunpackw to avx512fintrin.h Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic. These intrinsics extract and concatenate the lower halves of mask registers, using the existing backend support for __builtin_ia32_kunpckdi and __builtin_ia32_kunpcksi builtins. Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h for completeness. Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c. --- clang/lib/Headers/avx512fintrin.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 997e9608e112f..e735c8a35f1ce 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -40,6 +40,8 @@ typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))) typedef unsigned char __mmask8; typedef unsigned short __mmask16; +typedef unsigned int __mmask32; +typedef unsigned long long __mmask64; /* Rounding mode macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 @@ -8100,6 +8102,18 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B) return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_kunpackd (__mmask64 __A, __mmask64 __B) +{ + return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm512_kunpackw (__mmask32 __A, __mmask32 __B) +{ + return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); +} + static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); >From c66f105a35d071001c1f126178565dfb75bd540c Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Wed, 12 Nov 2025 13:05:30 +0200 Subject: [PATCH 2/8] [clang] Add _mm512_kunpackd and _mm512_kunpackw intrinsics with constexpr support Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic. These intrinsics extract and concatenate the lower halves of mask registers, using the existing backend support for __builtin_ia32_kunpckdi and __builtin_ia32_kunpcksi builtins. Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h for completeness. This patch adds constexpr support for all three kunpack intrinsics by: 1. Using __DEFAULT_FN_ATTRS_CONSTEXPR attribute 2. Adding builtin interpretation in ExprConstant.cpp for compile-time evaluation in constexpr contexts 3. Adding constexpr tests to verify correct behavior Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c for runtime code generation validation. --- clang/lib/AST/ExprConstant.cpp | 51 +++++++++++++++++++++++ clang/lib/Headers/avx512bwintrin.h | 6 +-- clang/lib/Headers/avx512fintrin.h | 16 ++++--- clang/test/CodeGen/X86/avx512f-builtins.c | 18 ++++++++ 4 files changed, 79 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1bfea24b228e8..62a0a26ff1087 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16287,6 +16287,42 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success((A | B) == 0, E); } + case clang::X86::BI__builtin_ia32_kunpckhi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + // Extract lower 8 bits of each operand and concatenate + // Result = (A[7:0] << 8) | B[7:0] + APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF); + return Success(Result, E); + } + + case clang::X86::BI__builtin_ia32_kunpckdi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + // Extract lower 32 bits of each operand and concatenate + // Result = (A[31:0] << 32) | B[31:0] + APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL); + return Success(Result, E); + } + + case clang::X86::BI__builtin_ia32_kunpcksi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + // Extract lower 16 bits of each operand and concatenate + // Result = (A[15:0] << 16) | B[15:0] + APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF); + return Success(Result, E); + } + case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: case clang::X86::BI__builtin_ia32_lzcnt_u64: { @@ -16413,6 +16449,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(APValue(Result), E); } + case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_kunpcksi: + case X86::BI__builtin_ia32_kunpckdi: { + return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) { + // Unpack: concatenate lower half of RHS with lower half of LHS + unsigned HalfBits = LHS.getBitWidth() / 2; + APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned()); + Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth()); + + APSInt LowerLHS = LHS & Mask; + APSInt LowerRHS = RHS & Mask; + return LowerRHS | (LowerLHS << HalfBits); + }); + } + case X86::BI__builtin_ia32_kaddqi: case X86::BI__builtin_ia32_kaddhi: case X86::BI__builtin_ia32_kaddsi: diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 4a02c96620335..ff850973a1833 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1606,13 +1606,13 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) { (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackd(__mmask64 __A, __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackw (__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e735c8a35f1ce..bd8a15b5224db 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8096,22 +8096,20 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kunpackb (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS -_mm512_kunpackd (__mmask64 __A, __mmask64 __B) -{ - return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackd(__mmask64 __A, __mmask64 __B) { + return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ - return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackw(__mmask32 __A, __mmask32 __B) { + return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 17778b52d3671..9dcc749910175 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9126,6 +9126,24 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D __E, __F); } +TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00); +TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34); +TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000); +TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55); +TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD); + +TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu); +TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu); +TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u); +TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu); +TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u); + +TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull); +TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull); +TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull); +TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull); +TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull); + __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kxnor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >From e56047a338d8e4b589adabb4a51ea0664add022a Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Wed, 12 Nov 2025 14:18:10 +0200 Subject: [PATCH 3/8] chore: update formatting --- clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/Headers/avx512bwintrin.h | 3 +-- clang/lib/Headers/avx512fintrin.h | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 62a0a26ff1087..a0a1d0ce2a94b 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16457,7 +16457,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned HalfBits = LHS.getBitWidth() / 2; APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned()); Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth()); - + APSInt LowerLHS = LHS & Mask; APSInt LowerRHS = RHS & Mask; return LowerRHS | (LowerLHS << HalfBits); diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index ff850973a1833..d247f648b9eb5 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1613,8 +1613,7 @@ _mm512_kunpackd(__mmask64 __A, __mmask64 __B) { } static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ +_mm512_kunpackw(__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index bd8a15b5224db..badc30a7eb26c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8097,8 +8097,7 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { } static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_kunpackb (__mmask16 __A, __mmask16 __B) -{ +_mm512_kunpackb(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } >From e46b536cc1fe5df978f3b536f87706cce578a5af Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Sun, 16 Nov 2025 16:54:54 +0200 Subject: [PATCH 4/8] refactor: PR Feedback --- clang/include/clang/Basic/BuiltinsX86.td | 4 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 ++++++++++++ clang/lib/AST/ExprConstant.cpp | 49 ++++------------------- clang/lib/Headers/avx512fintrin.h | 10 ----- clang/test/CodeGen/X86/avx512f-builtins.c | 10 ++--- 5 files changed, 41 insertions(+), 58 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cb08e2107f072..4872f350bb4ff 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2151,7 +2151,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; } @@ -3194,7 +3194,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6c7b2f502cc51..c1389424154be 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4579,6 +4579,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_kunpckdi: + case X86::BI__builtin_ia32_kunpcksi: + return interp__builtin_elementwise_int_binop(S, OpPC, Call, + [](const APSInt &A, + const APSInt &B) { + // Generic kunpack: extract + // lower half of each operand + // and concatenate Result = + // (A[HalfWidth-1:0] << + // HalfWidth) | + // B[HalfWidth-1:0] + unsigned HalfWidth = + A.getBitWidth() / 2; + APSInt Result( + A.getLoBits(HalfWidth) + .zext(A.getBitWidth()), + A.isUnsigned()); + Result <<= HalfWidth; + Result |= APSInt( + B.getLoBits(HalfWidth) + .zext(B.getBitWidth()), + B.isUnsigned()); + return Result; + }); + case X86::BI__builtin_ia32_phminposuw128: return interp__builtin_ia32_phminposuw(S, OpPC, Call); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a0a1d0ce2a94b..854d2969b6784 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16287,39 +16287,20 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success((A | B) == 0, E); } - case clang::X86::BI__builtin_ia32_kunpckhi: { - APSInt A, B; - if (!EvaluateInteger(E->getArg(0), A, Info) || - !EvaluateInteger(E->getArg(1), B, Info)) - return false; - - // Extract lower 8 bits of each operand and concatenate - // Result = (A[7:0] << 8) | B[7:0] - APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF); - return Success(Result, E); - } - - case clang::X86::BI__builtin_ia32_kunpckdi: { - APSInt A, B; - if (!EvaluateInteger(E->getArg(0), A, Info) || - !EvaluateInteger(E->getArg(1), B, Info)) - return false; - - // Extract lower 32 bits of each operand and concatenate - // Result = (A[31:0] << 32) | B[31:0] - APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL); - return Success(Result, E); - } - + case clang::X86::BI__builtin_ia32_kunpckhi: + case clang::X86::BI__builtin_ia32_kunpckdi: case clang::X86::BI__builtin_ia32_kunpcksi: { APSInt A, B; if (!EvaluateInteger(E->getArg(0), A, Info) || !EvaluateInteger(E->getArg(1), B, Info)) return false; - // Extract lower 16 bits of each operand and concatenate - // Result = (A[15:0] << 16) | B[15:0] - APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF); + // Generic kunpack: extract lower half of each operand and concatenate + // Result = (A[HalfWidth-1:0] << HalfWidth) | B[HalfWidth-1:0] + unsigned HalfWidth = A.getBitWidth() / 2; + APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), A.isUnsigned()); + Result <<= HalfWidth; + Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned()); return Success(Result, E); } @@ -16449,20 +16430,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(APValue(Result), E); } - case X86::BI__builtin_ia32_kunpckhi: - case X86::BI__builtin_ia32_kunpcksi: - case X86::BI__builtin_ia32_kunpckdi: { - return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) { - // Unpack: concatenate lower half of RHS with lower half of LHS - unsigned HalfBits = LHS.getBitWidth() / 2; - APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned()); - Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth()); - - APSInt LowerLHS = LHS & Mask; - APSInt LowerRHS = RHS & Mask; - return LowerRHS | (LowerLHS << HalfBits); - }); - } case X86::BI__builtin_ia32_kaddqi: case X86::BI__builtin_ia32_kaddhi: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index badc30a7eb26c..a927de739b644 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8101,16 +8101,6 @@ _mm512_kunpackb(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_kunpackd(__mmask64 __A, __mmask64 __B) { - return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B); -} - -static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_kunpackw(__mmask32 __A, __mmask32 __B) { - return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B); -} - static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_kxnor(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 9dcc749910175..13c7eec76233f 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9126,21 +9126,21 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D __E, __F); } -TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00); +TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF); TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34); -TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000); +TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00); TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55); TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD); TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu); TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu); -TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u); +TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u); TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu); TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u); TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull); -TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull); -TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull); +TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull); +TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull); TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull); TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull); >From 31d0b0975bc2824ab2fd20789ba8f6ea1eb1f989 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Sun, 16 Nov 2025 16:58:17 +0200 Subject: [PATCH 5/8] Format files --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 38 ++++++++++-------------- clang/lib/AST/ExprConstant.cpp | 4 +-- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index c1389424154be..ce213e52df3a0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4582,28 +4582,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_kunpckhi: case X86::BI__builtin_ia32_kunpckdi: case X86::BI__builtin_ia32_kunpcksi: - return interp__builtin_elementwise_int_binop(S, OpPC, Call, - [](const APSInt &A, - const APSInt &B) { - // Generic kunpack: extract - // lower half of each operand - // and concatenate Result = - // (A[HalfWidth-1:0] << - // HalfWidth) | - // B[HalfWidth-1:0] - unsigned HalfWidth = - A.getBitWidth() / 2; - APSInt Result( - A.getLoBits(HalfWidth) - .zext(A.getBitWidth()), - A.isUnsigned()); - Result <<= HalfWidth; - Result |= APSInt( - B.getLoBits(HalfWidth) - .zext(B.getBitWidth()), - B.isUnsigned()); - return Result; - }); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + // Generic kunpack: extract + // lower half of each operand + // and concatenate Result = + // (A[HalfWidth-1:0] << + // HalfWidth) | + // B[HalfWidth-1:0] + unsigned HalfWidth = A.getBitWidth() / 2; + APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), + A.isUnsigned()); + Result <<= HalfWidth; + Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), + B.isUnsigned()); + return Result; + }); case X86::BI__builtin_ia32_phminposuw128: return interp__builtin_ia32_phminposuw(S, OpPC, Call); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 854d2969b6784..9f3d534ddd2f4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16300,7 +16300,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned HalfWidth = A.getBitWidth() / 2; APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), A.isUnsigned()); Result <<= HalfWidth; - Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned()); + Result |= + APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned()); return Success(Result, E); } @@ -16430,7 +16431,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(APValue(Result), E); } - case X86::BI__builtin_ia32_kaddqi: case X86::BI__builtin_ia32_kaddhi: case X86::BI__builtin_ia32_kaddsi: >From 19a8c5781053cc43dd27fe3bc76b0f4e13ec6c3e Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Sun, 16 Nov 2025 17:09:11 +0200 Subject: [PATCH 6/8] feat: Use APInt built-in methods --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 17 ++++------------- clang/lib/AST/ExprConstant.cpp | 9 +++------ 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ce213e52df3a0..952c9934336a4 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4584,19 +4584,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_kunpcksi: return interp__builtin_elementwise_int_binop( S, OpPC, Call, [](const APSInt &A, const APSInt &B) { - // Generic kunpack: extract - // lower half of each operand - // and concatenate Result = - // (A[HalfWidth-1:0] << - // HalfWidth) | - // B[HalfWidth-1:0] - unsigned HalfWidth = A.getBitWidth() / 2; - APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), - A.isUnsigned()); - Result <<= HalfWidth; - Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), - B.isUnsigned()); - return Result; + // Generic kunpack: extract lower half of each operand and concatenate + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned()); }); case X86::BI__builtin_ia32_phminposuw128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 9f3d534ddd2f4..1243f36b59ab4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16296,12 +16296,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; // Generic kunpack: extract lower half of each operand and concatenate - // Result = (A[HalfWidth-1:0] << HalfWidth) | B[HalfWidth-1:0] - unsigned HalfWidth = A.getBitWidth() / 2; - APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), A.isUnsigned()); - Result <<= HalfWidth; - Result |= - APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned()); + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + APSInt Result(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned()); return Success(Result, E); } >From c3bb38d75b2a7ac0382cd058db0d22f8768cc6d4 Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Sun, 16 Nov 2025 17:09:29 +0200 Subject: [PATCH 7/8] Apply formatting --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 952c9934336a4..3e0a6cd8402d6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4587,7 +4587,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, // Generic kunpack: extract lower half of each operand and concatenate // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] unsigned BW = A.getBitWidth(); - return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned()); + return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)), + A.isUnsigned()); }); case X86::BI__builtin_ia32_phminposuw128: >From 900cd2212276bfd1b109ec1d41814dbbbd287a4a Mon Sep 17 00:00:00 2001 From: ahmed <[email protected]> Date: Sun, 16 Nov 2025 18:07:37 +0200 Subject: [PATCH 8/8] fix: Move tests to right location --- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 2749dc5741b58..be584b36a52b1 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -2741,6 +2741,18 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } +TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu); +TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu); +TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u); +TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu); +TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u); + +TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull); +TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull); +TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull); +TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull); +TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull); + __m512i test_mm512_loadu_epi16 (void *__P) { // CHECK-LABEL: test_mm512_loadu_epi16 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 13c7eec76233f..4cf59a44d2fd1 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9132,18 +9132,6 @@ TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00); TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55); TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD); -TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu); -TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu); -TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u); -TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu); -TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u); - -TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull); -TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull); -TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull); -TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull); -TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull); - __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kxnor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
