https://github.com/GeneraluseAI updated 
https://github.com/llvm/llvm-project/pull/169985

>From 0cc7a282cfb27dc422bd37adf16912da06df7a0e Mon Sep 17 00:00:00 2001
From: generaluseai <[email protected]>
Date: Sat, 29 Nov 2025 18:16:55 +0800
Subject: [PATCH] [CIR][X86] Implement lowering for AVX512 ktest builtins
 (kortestc, kortestz)

This patch adds CIR codegen support for the AVX512 mask test builtins on
X86, including kortestc and kortestz across all supported mask widths
(qi, hi, si, di). Each builtin is lowered to the expected vector<i1>
mask logic and scalar comparison form in CIR, consistent with the
semantics of the corresponding LLVM implementations.

Because ClangIR does not yet provide a dedicated `zext` operation,
the lowering emulates zero-extension by first converting the boolean
result through `bool_to_int` and then performing an integer cast to the
final result type. This reproduces the `icmp` + `zext` pattern used in
LLVM IR and maintains semantic equivalence.
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  59 ++++-
 .../CodeGenBuiltins/X86/avx512bw-builtins.c   | 213 ++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512dq-builtins.c   | 151 +++++++++++++
 .../CodeGenBuiltins/X86/avx512f-builtins.c    |  57 +++++
 4 files changed, 474 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 98652a624baa5..5d2179c42eb1c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -220,6 +220,18 @@ static mlir::Value emitX86MaskLogic(CIRGenBuilderTy 
&builder,
                                ops[0].getType());
 }
 
+static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location 
loc,
+                                   const std::string &intrinsicName,
+                                   SmallVectorImpl<mlir::Value> &ops) {
+  auto intTy = cast<cir::IntType>(ops[0].getType());
+  unsigned numElts = intTy.getWidth();
+  mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
+  mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
+  mlir::Type vecTy = lhsVec.getType();
+  return emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
+                             mlir::ValueRange{lhsVec, rhsVec});
+}
+
 static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
                                  mlir::Value vec, mlir::Value value,
                                  mlir::Value indexOp) {
@@ -1155,26 +1167,61 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_vpcomuw:
   case X86::BI__builtin_ia32_vpcomud:
   case X86::BI__builtin_ia32_vpcomuq:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_kortestcqi:
   case X86::BI__builtin_ia32_kortestchi:
   case X86::BI__builtin_ia32_kortestcsi:
-  case X86::BI__builtin_ia32_kortestcdi:
+  case X86::BI__builtin_ia32_kortestcdi: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    cir::IntType ty = cast<cir::IntType>(ops[0].getType());
+    mlir::Value allOnesOp =
+        builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth()));
+    mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
+    mlir::Value cmp =
+        cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
+    return builder.createCast(cir::CastKind::bool_to_int, cmp,
+                              cgm.convertType(expr->getType()));
+  }
   case X86::BI__builtin_ia32_kortestzqi:
   case X86::BI__builtin_ia32_kortestzhi:
   case X86::BI__builtin_ia32_kortestzsi:
-  case X86::BI__builtin_ia32_kortestzdi:
+  case X86::BI__builtin_ia32_kortestzdi: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    cir::IntType ty = cast<cir::IntType>(ops[0].getType());
+    mlir::Value allZerosOp = builder.getNullValue(ty, loc).getResult();
+    mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
+    mlir::Value cmp =
+        cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
+    return builder.createCast(cir::CastKind::bool_to_int, cmp,
+                              cgm.convertType(expr->getType()));
+  }
   case X86::BI__builtin_ia32_ktestcqi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestc.b", ops);
   case X86::BI__builtin_ia32_ktestzqi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestz.b", ops);
   case X86::BI__builtin_ia32_ktestchi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestc.w", ops);
   case X86::BI__builtin_ia32_ktestzhi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestz.w", ops);
   case X86::BI__builtin_ia32_ktestcsi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestc.d", ops);
   case X86::BI__builtin_ia32_ktestzsi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestz.d", ops);
   case X86::BI__builtin_ia32_ktestcdi:
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestc.q", ops);
   case X86::BI__builtin_ia32_ktestzdi:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented X86 builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinID));
-    return {};
+    return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
+                           "x86.avx512.ktestz.q", ops);
   case X86::BI__builtin_ia32_kaddqi:
     return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
                                "x86.avx512.kadd.b", ops);
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 48a89769ea10f..d3480caab9dd9 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -559,3 +559,216 @@ __m512i test_mm512_shufflehi_epi16(__m512i __A) {
   // OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, 
i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, 
i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, 
i32 28>
   return _mm512_shufflehi_epi16(__A, 5);
 }
+
+unsigned char test_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
+  // CIR-LABEL: _kortestc_mask32_u8
+  // CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]]  = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<32 x 
!cir.int<u, 1>> -> !u32i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ALL_ONES]]) : !u32i, 
!cir.bool
+  // CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+  // CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
+
+  // LLVM-LABEL: _kortestc_mask32_u8
+  // LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[OR:.*]]  = or <32 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
+  // LLVM: %[[CMP:.*]] = icmp eq i32 %[[CAST]], -1
+  // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // LLVM: trunc i32 %[[ZEXT]] to i8
+
+  // OGCG-LABEL: _kortestc_mask32_u8
+  // OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[OR:.*]]  = or <32 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
+  // OGCG: %[[CMP:.*]] = icmp eq i32 %[[CAST]], -1
+  // OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // OGCG: trunc i32 %[[ZEXT]] to i8
+  return _kortestc_mask32_u8(__A, __B);
+}
+
+unsigned char test_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
+  // CIR-LABEL: _kortestc_mask64_u8
+  // CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]]  = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<64 x 
!cir.int<u, 1>> -> !u64i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ALL_ONES]]) : !u64i, 
!cir.bool
+  // CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+  // CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
+
+  // LLVM-LABEL: _kortestc_mask64_u8
+  // LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[OR:.*]]  = or <64 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
+  // LLVM: %[[CMP:.*]] = icmp eq i64 %[[CAST]], -1
+  // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // LLVM: trunc i32 %[[ZEXT]] to i8
+
+  // OGCG-LABEL: _kortestc_mask64_u8
+  // OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[OR:.*]]  = or <64 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
+  // OGCG: %[[CMP:.*]] = icmp eq i64 %[[CAST]], -1
+  // OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // OGCG: trunc i32 %[[ZEXT]] to i8
+  return _kortestc_mask64_u8(__A, __B);
+}
+
+unsigned char test_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
+  // CIR-LABEL: _kortestz_mask32_u8
+  // CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]]  = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<32 x 
!cir.int<u, 1>> -> !u32i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ZERO]]) : !u32i, !cir.bool
+  // CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+  // CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
+
+  // LLVM-LABEL: _kortestz_mask32_u8
+  // LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[OR:.*]]  = or <32 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
+  // LLVM: %[[CMP:.*]] = icmp eq i32 %[[CAST]], 0
+  // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // LLVM: trunc i32 %[[ZEXT]] to i8
+
+  // OGCG-LABEL: _kortestz_mask32_u8
+  // OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[OR:.*]]  = or <32 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
+  // OGCG: %[[CMP:.*]] = icmp eq i32 %[[CAST]], 0
+  // OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // OGCG: trunc i32 %[[ZEXT]] to i8
+  return _kortestz_mask32_u8(__A, __B);
+}
+
+unsigned char test_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
+  // CIR-LABEL: _kortestz_mask64_u8
+  // CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]]  = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<64 x 
!cir.int<u, 1>> -> !u64i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ZERO]]) : !u64i, !cir.bool
+  // CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+  // CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
+
+  // LLVM-LABEL: _kortestz_mask64_u8
+  // LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[OR:.*]]  = or <64 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
+  // LLVM: %[[CMP:.*]] = icmp eq i64 %[[CAST]], 0
+  // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // LLVM: trunc i32 %[[ZEXT]] to i8
+
+  // OGCG-LABEL: _kortestz_mask64_u8
+  // OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[OR:.*]]  = or <64 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
+  // OGCG: %[[CMP:.*]] = icmp eq i64 %[[CAST]], 0
+  // OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // OGCG: trunc i32 %[[ZEXT]] to i8
+  return _kortestz_mask64_u8(__A, __B);
+}
+
+
+unsigned char test_ktestc_mask32_u8(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _ktestc_mask32_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestc.d"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<32 x 
!cir.int<u,1>> -> !u32i
+  // CIR: cir.cast integral %[[CAST]] : !u32i -> !u8i
+
+  // LLVM-LABEL: _ktestc_mask32_u8
+  // LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> 
%[[LHS]], <32 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestc_mask32_u8
+  // OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.d
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestc_mask32_u8(A, B);
+}
+
+unsigned char test_ktestz_mask32_u8(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _ktestz_mask32_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestz.d"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<32 x 
!cir.int<u,1>> -> !u32i
+  // CIR: cir.cast integral %[[CAST]] : !u32i -> !u8i
+
+  // LLVM-LABEL: _ktestz_mask32_u8
+  // LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> 
%[[LHS]], <32 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestz_mask32_u8
+  // OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.d
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestz_mask32_u8(A, B);
+}
+
+unsigned char test_ktestc_mask64_u8(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _ktestc_mask64_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestc.q"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<64 x 
!cir.int<u,1>> -> !u64i
+  // CIR: cir.cast integral %[[CAST]] : !u64i -> !u8i
+
+  // LLVM-LABEL: _ktestc_mask64_u8
+  // LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> 
%[[LHS]], <64 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestc_mask64_u8
+  // OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.q
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestc_mask64_u8(A, B);
+}
+
+unsigned char test_ktestz_mask64_u8(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _ktestz_mask64_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestz.q"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<64 x 
!cir.int<u,1>> -> !u64i
+  // CIR: cir.cast integral %[[CAST]] : !u64i -> !u8i
+
+  // LLVM-LABEL: _ktestz_mask64_u8
+  // LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> 
%[[LHS]], <64 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestz_mask64_u8
+  // OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.q
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestz_mask64_u8(A, B);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
index 5d81f666271be..85a5b01302788 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
@@ -208,3 +208,154 @@ __mmask8 test_kmov_b(__mmask8 A) {
  // OGCG: bitcast <8 x i1> {{.*}} to i8
  return __builtin_ia32_kmovb(A);
 }
+
+unsigned char test_kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) {
+  // CIR-LABEL: _kortestc_mask8_u8
+  // CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<255> : !u8i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<8 x 
!cir.int<u, 1>> -> !u8i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ALL_ONES]]) : !u8i, 
!cir.bool
+  // CIR: cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+  // CIR: cir.cast integral {{.*}} : !s32i -> !u8i
+
+  // LLVM-LABEL: _kortestc_mask8_u8
+  // LLVM: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[OR:.*]] = or <8 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <8 x i1> %[[OR]] to i8
+  // LLVM: %[[CMP:.*]] = icmp eq i8 %[[CAST]], -1
+  // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // LLVM: trunc i32 %[[ZEXT]] to i8
+
+  // OGCG-LABEL: _kortestc_mask8_u8
+  // OGCG: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[OR:.*]] = or <8 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <8 x i1> %[[OR]] to i8
+  // OGCG: %[[CMP:.*]] = icmp eq i8 %[[CAST]], -1
+  // OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // OGCG: trunc i32 %[[ZEXT]] to i8
+  return _kortestc_mask8_u8(__A,__B);
+}
+
+unsigned char test_kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) {
+  // CIR-LABEL: _kortestz_mask8_u8
+  // CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !u8i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<8 x 
!cir.int<u, 1>> -> !u8i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ZERO]]) : !u8i, !cir.bool
+  // CIR: cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+  // CIR: cir.cast integral {{.*}} : !s32i -> !u8i
+
+  // LLVM-LABEL: _kortestz_mask8_u8
+  // LLVM: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[OR:.*]] = or <8 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <8 x i1> %[[OR]] to i8
+  // LLVM: %[[CMP:.*]] = icmp eq i8 %[[CAST]], 0
+  // LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // LLVM: trunc i32 %[[ZEXT]] to i8
+
+  // OGCG-LABEL: _kortestz_mask8_u8
+  // OGCG: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[OR:.*]] = or <8 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <8 x i1> %[[OR]] to i8
+  // OGCG: %[[CMP:.*]] = icmp eq i8 %[[CAST]], 0
+  // OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
+  // OGCG: trunc i32 %[[ZEXT]] to i8
+  return _kortestz_mask8_u8(__A,__B);
+}
+
+
+unsigned char test_ktestc_mask8_u8(__mmask8 A, __mmask8 B) {
+  // CIR-LABEL: _ktestc_mask8_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestc.b"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x 
!cir.int<u,1>> -> !u8i
+  // CIR: cir.cast integral %[[CAST]] : !u8i -> !u8i
+
+  // LLVM-LABEL: _ktestc_mask8_u8
+  // LLVM: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> %[[LHS]], 
<8 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestc_mask8_u8
+  // OGCG: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.b
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestc_mask8_u8(A, B);
+}
+
+unsigned char test_ktestz_mask8_u8(__mmask8 A, __mmask8 B) {
+  // CIR-LABEL: _ktestz_mask8_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestz.b"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x 
!cir.int<u,1>> -> !u8i
+  // CIR: cir.cast integral %[[CAST]] : !u8i -> !u8i
+
+  // LLVM-LABEL: _ktestz_mask8_u8
+  // LLVM: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> %[[LHS]], 
<8 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestz_mask8_u8
+  // OGCG: %[[LHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.b
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestz_mask8_u8(A, B);
+}
+
+unsigned char test_ktestc_mask16_u8(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _ktestc_mask16_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestc.w"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x 
!cir.int<u,1>> -> !u16i
+  // CIR: cir.cast integral %[[CAST]] : !u16i -> !u8i
+
+  // LLVM-LABEL: _ktestc_mask16_u8
+  // LLVM: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> 
%[[LHS]], <16 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestc_mask16_u8
+  // OGCG: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.w
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestc_mask16_u8(A, B);
+}
+
+unsigned char test_ktestz_mask16_u8(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _ktestz_mask16_u8
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestz.w"
+  // CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x 
!cir.int<u,1>> -> !u16i
+  // CIR: cir.cast integral %[[CAST]] : !u16i -> !u8i
+
+  // LLVM-LABEL: _ktestz_mask16_u8
+  // LLVM: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> 
%[[LHS]], <16 x i1> %[[RHS]])
+  // LLVM: trunc i32 %[[RES]] to i8
+
+  // OGCG-LABEL: _ktestz_mask16_u8
+  // OGCG: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.w
+  // OGCG: trunc i32 %[[RES]] to i8
+  return _ktestz_mask16_u8(A, B);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index b0ed2397624d7..918cf40c4f4c1 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -747,3 +747,60 @@ __m512i test_mm512_mul_epu32(__m512i __A, __m512i __B) {
 
 return _mm512_mul_epu32(__A, __B);
 }
+
+int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
+  // CIR-LABEL: _mm512_kortestc
+  // CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<65535> : !u16i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<16 x 
!cir.int<u, 1>> -> !u16i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ALL_ONES]]) : !u16i, 
!cir.bool
+  // CIR: cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM-LABEL: _mm512_kortestc
+  // LLVM: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[OR:.*]] = or <16 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <16 x i1> %[[OR]] to i16
+  // LLVM: %[[CMP:.*]] = icmp eq i16 %[[CAST]], -1
+  // LLVM: zext i1 %[[CMP]] to i32
+
+  // OGCG-LABEL: _mm512_kortestc
+  // OGCG: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[OR:.*]] = or <16 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <16 x i1> %[[OR]] to i16
+  // OGCG: %[[CMP:.*]] = icmp eq i16 %[[CAST]], -1
+  // OGCG: zext i1 %[[CMP]] to i32
+  return _mm512_kortestc(__A,__B);
+}
+
+
+int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
+  // CIR-LABEL: _mm512_kortestz
+  // CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !u16i
+  // CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<16 x 
!cir.int<u, 1>>
+  // CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<16 x 
!cir.int<u, 1>> -> !u16i
+  // CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ZERO]]) : !u16i, !cir.bool
+  // CIR: cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM-LABEL: _mm512_kortestz
+  // LLVM: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %[[OR:.*]] = or <16 x i1> %[[LHS]], %[[RHS]]
+  // LLVM: %[[CAST:.*]] = bitcast <16 x i1> %[[OR]] to i16
+  // LLVM: %[[CMP:.*]] = icmp eq i16 %[[CAST]], 0
+  // LLVM: zext i1 %[[CMP]] to i32
+
+  // OGCG-LABEL: _mm512_kortestz
+  // OGCG: %[[LHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[RHS:.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %[[OR:.*]] = or <16 x i1> %[[LHS]], %[[RHS]]
+  // OGCG: %[[CAST:.*]] = bitcast <16 x i1> %[[OR]] to i16
+  // OGCG: %[[CMP:.*]] = icmp eq i16 %[[CAST]], 0
+  // OGCG: zext i1 %[[CMP]] to i32
+  return _mm512_kortestz(__A,__B);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to