[clang] [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (PR #173802)

via cfe-commits Sun, 28 Dec 2025 14:22:02 -0800

https://github.com/DannyDaoBoYang created 
https://github.com/llvm/llvm-project/pull/173802


As requested by #167765 
-Add CIR support for pmovqd512_mask.
-In addition, I noticed pmovwb512_mask uses identical truncation-and-selection 
logic 
[X86.cpp.](https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/TargetBuiltins/X86.cpp#L1638),
 so I  added support for pmovwb512_mask using the same logic. 
-Added a new LIT test case in clang/test/CIR/CodeGen/pmovqd-mask-builtins.c to 
verify correct CIR generation for both builtins.
-Added a type cast fix in CirGenCleanup.cpp where std::max encountered a type 
mismatch between size_t and unsigned long on Win64. This caused compile error 
on Windows for while building it. 

This is my first request to this repo. If you noticed anything wrong please 
comment and I'm looking forward for corrections. 

Addresses #167765 

>From df2937fa546f52af32f0db7d5d94ff5611b59f71 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <[email protected]>
Date: Sun, 28 Dec 2025 17:00:49 -0500
Subject: [PATCH] Add support for pmovqd512_mask and pmovwb512_mask

Add CIR support for pmovqd512_mask and pmovwb512_mask. And a minor type cast 
fix in CirGenCleanup.cpp that caused compile error on Windows.
---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      | 16 ++++++++++
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  5 +++-
 clang/lib/CIR/CodeGen/CIRGenCleanup.cpp       |  2 +-
 .../X86/pmovqd-mask-builtins.c                | 29 +++++++++++++++++++
 4 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cc28941aaa079..481d06091f012 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -425,6 +425,22 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   // Cast/Conversion Operators
   
//===--------------------------------------------------------------------===//
 
+  /// Create an value truncation to a narrower type.
+  /// Returns the source if types already match. CIR casts do not
+  /// encode NUW/NSW; wrap semantics should be handled by callers.
+  /// Supports both scalar integers and vectors of integers.
+  mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
+                          mlir::Type newTy) {
+    auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
+    if (newTy == srcIntTy)
+      return src;
+    return createCast(loc, cir::CastKind::integral, src, newTy);
+  }
+
+  mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+    return createTrunc(src.getLoc(), src, newTy);
+  }
+
   mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
                          mlir::Value src, mlir::Type newTy) {
     if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..b553327f676f5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1274,7 +1274,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
                                     mask);
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
-  case X86::BI__builtin_ia32_pmovwb512_mask:
+  case X86::BI__builtin_ia32_pmovwb512_mask: {
+    mlir::Value Res = builder.createTrunc(ops[0], 
cast<cir::VectorType>(ops[1].getType()));
+    return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, 
ops[1]);
+  }
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
index 6c6cb402d1190..8d9ea7c6c22eb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -97,7 +97,7 @@ EHScopeStack::getInnermostActiveNormalCleanup() const {
 char *EHScopeStack::allocate(size_t size) {
   size = llvm::alignTo(size, ScopeStackAlignment);
   if (!startOfBuffer) {
-    unsigned capacity = llvm::PowerOf2Ceil(std::max(size, 1024ul));
+    unsigned capacity = llvm::PowerOf2Ceil(std::max<size_t>(size, 1024ul));
     startOfBuffer = std::make_unique<char[]>(capacity);
     startOfData = endOfBuffer = startOfBuffer.get() + capacity;
   } else if (static_cast<size_t>(startOfData - startOfBuffer.get()) < size) {
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
new file mode 100644
index 0000000000000..b43d2dc8f050d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux 
-target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror 
-Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+#include <immintrin.h>
+
+__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
+  // CIR-LABEL: test_pmovqd_mask
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x 
!cir.int<s, 1>>, !cir.vector<8 x !s32i>
+  return __builtin_ia32_pmovqd512_mask(a, b, mask);
+}
+
+__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
+  // CIR-LABEL: test_pmovqd_maskz
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x 
!cir.int<s, 1>>, !cir.vector<8 x !s32i>
+  __m256i zero = _mm256_setzero_si256();
+  return __builtin_ia32_pmovqd512_mask(a, zero, mask);
+}
+
+__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
+  // CIR-LABEL: test_pmovwb_mask
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> 
-> !cir.vector<32 x !s8i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 
x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 
x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+  return __builtin_ia32_pmovwb512_mask(a, b, mask);
+}
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (PR #173802)

Reply via email to