Author: Ahmed Nour
Date: 2025-11-17T10:06:50Z
New Revision: 44f72fb39c833a8d53f433c6a03e88c81534c83e

URL: 
https://github.com/llvm/llvm-project/commit/44f72fb39c833a8d53f433c6a03e88c81534c83e
DIFF: 
https://github.com/llvm/llvm-project/commit/44f72fb39c833a8d53f433c6a03e88c81534c83e.diff

LOG: [X86][Clang] Add AVX512 kunpck intrinsics to be used in constexp (#167683)

Resolves #166976

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/AST/ByteCode/InterpBuiltin.cpp
    clang/lib/AST/ExprConstant.cpp
    clang/lib/Headers/avx512bwintrin.h
    clang/lib/Headers/avx512fintrin.h
    clang/test/CodeGen/X86/avx512bw-builtins.c
    clang/test/CodeGen/X86/avx512f-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 69d18679fd6ec..bbe0aa3657c06 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -2142,7 +2142,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
   def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, 
_Constant int, _Constant int)">;
 }
 
-let Features = "avx512bw", Attributes = [NoThrow, Const] in {
+let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
   def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, 
unsigned long long int)">;
   def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
 }
@@ -3185,7 +3185,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, 
Constexpr] in {
   def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long 
int)">;
 }
 
-let Features = "avx512f", Attributes = [NoThrow, Const] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
   def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
 }
 

diff  --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index afcbe9d4f5b81..a2f99c7c234fe 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4619,6 +4619,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
         S, OpPC, Call,
         [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
 
+  case X86::BI__builtin_ia32_kunpckhi:
+  case X86::BI__builtin_ia32_kunpckdi:
+  case X86::BI__builtin_ia32_kunpcksi:
+    return interp__builtin_elementwise_int_binop(
+        S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
+          // Generic kunpack: extract lower half of each operand and 
concatenate
+          // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
+          unsigned BW = A.getBitWidth();
+          return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)),
+                        A.isUnsigned());
+        });
+
   case X86::BI__builtin_ia32_phminposuw128:
     return interp__builtin_ia32_phminposuw(S, OpPC, Call);
 

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 888dca1e3a613..d9b3ee20e919f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16348,6 +16348,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const 
CallExpr *E,
     return Success((A | B) == 0, E);
   }
 
+  case clang::X86::BI__builtin_ia32_kunpckhi:
+  case clang::X86::BI__builtin_ia32_kunpckdi:
+  case clang::X86::BI__builtin_ia32_kunpcksi: {
+    APSInt A, B;
+    if (!EvaluateInteger(E->getArg(0), A, Info) ||
+        !EvaluateInteger(E->getArg(1), B, Info))
+      return false;
+
+    // Generic kunpack: extract lower half of each operand and concatenate
+    // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
+    unsigned BW = A.getBitWidth();
+    APSInt Result(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned());
+    return Success(Result, E);
+  }
+
   case clang::X86::BI__builtin_ia32_lzcnt_u16:
   case clang::X86::BI__builtin_ia32_lzcnt_u32:
   case clang::X86::BI__builtin_ia32_lzcnt_u64: {

diff  --git a/clang/lib/Headers/avx512bwintrin.h 
b/clang/lib/Headers/avx512bwintrin.h
index 3cfa32eb9e727..c37b42d965167 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1606,15 +1606,14 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
                                               (__v64qi) 
_mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
-                                                               __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
                 (__mmask64) __B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
   return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
                 (__mmask32) __B);
 }

diff  --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 976eba816b8bf..53b18df764370 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8069,9 +8069,8 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned 
char *__C) {
   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) 
__B);
 }
 

diff  --git a/clang/test/CodeGen/X86/avx512bw-builtins.c 
b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 45f1a600d380a..140a2c0dcbb56 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2731,6 +2731,12 @@ __mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, 
__m512i __C, __m512i __D
   return 
_mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, 
__A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F); 
 }
 
+TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) 
== 0x00000000FFFFFFFFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) 
== 0x2345678989ABCDEFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) 
== 0xFFFFFFFF00000000ull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) 
== 0xAAAA55555555AAAAull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 
0x89ABCDEF76543210ull);
+
 __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i 
__D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: test_mm512_kunpackw
   // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -2741,6 +2747,12 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, 
__m512i __C, __m512i __D
   return 
_mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, 
__A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); 
 }
 
+TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
+TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u);
+TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
+
 __m512i test_mm512_loadu_epi16 (void *__P)
 {
   // CHECK-LABEL: test_mm512_loadu_epi16

diff  --git a/clang/test/CodeGen/X86/avx512f-builtins.c 
b/clang/test/CodeGen/X86/avx512f-builtins.c
index 71e700af0069e..ec813e5acd7cf 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -9162,6 +9162,12 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, 
__m512i __C, __m512i __D
                                                        __E, __F);
 }
 
+TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF);
+TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
+TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00);
+TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
+TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
+
 __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, 
__m512i __E, __m512i __F) {
   // CHECK-LABEL: test_mm512_kxnor
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to