[clang] [CIR][X86]Implement handling for convert-half builtins (PR #173143)

Priyanshu Kumar via cfe-commits Sat, 20 Dec 2025 11:17:03 -0800

https://github.com/Priyanshu3820 updated 
https://github.com/llvm/llvm-project/pull/173143


>From 0dd977df6b2fda837c32044c199a13a32c232b6f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 08:25:59 +0000
Subject: [PATCH 1/3] Implement handling for convert-half builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 55 ++++++++++++-
 .../X86/avx512vlbf16-builtins.c               | 80 +++++++++++++++++++
 2 files changed, 132 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..59d467da3a9fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -362,6 +362,27 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, 
mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            mlir::Type dstTy,
+                                            SmallVectorImpl<mlir::Value> &ops) 
{
+
+  mlir::Value src = ops[0];
+  mlir::Value passthru = ops[1];
+
+  auto vecTy = mlir::cast<cir::VectorType>(src.getType());
+  uint64_t numElems = vecTy.getSize();
+
+  mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems);
+
+  auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems);
+  mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
+
+  mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
+
+  return emitX86Select(builder, loc, mask, res, passthru);
+}
+
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1662,12 +1683,40 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
-  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()),
+                                    ops);
+  }
+  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4);
+    return emitIntrinsicCallOp(builder, loc,
+                               "x86.avx512bf16.mask.cvtneps2bf16.128",
+                               convertType(expr->getType()),
+                               mlir::ValueRange{ops[0], ops[1], 
intrinsicMask});
+  }
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
-  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+    mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
+    StringRef intrinsicName;
+    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+    else
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
+    mlir::Value intrinsicResult =
+        emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(),
+                            mlir::ValueRange{ops[0]});
+    return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
+  }
   case X86::BI__cpuid:
   case X86::BI__cpuidex:
   case X86::BI__emul:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
new file mode 100644
index 0000000000000..ccfc0d4a6a813
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl 
-target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir -Wall -Werror 
-Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl 
-target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll -Wall -Werror 
-Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl 
-target-feature +avx512bf16 -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m256bh test_mm512_mask_cvtneps_pbh(__m256bh src, __mmask16 k, __m512 a) {
+  // CIR-LABEL: @test_mm512_mask_cvtneps_pbh
+  // CIR: cir.call @_mm512_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : 
(!cir.vector<16 x !cir.bf16>, !u16i, !cir.vector<16 x !cir.float>) -> 
!cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_mm512_mask_cvtneps_pbh
+  // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+
+  // OGCG-LABEL: @test_mm512_mask_cvtneps_pbh
+  // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+  return _mm512_mask_cvtneps_pbh(src, k, a);
+}
+
+__m256bh test_mm512_maskz_cvtneps_pbh(__mmask16 k, __m512 a) {
+  // CIR-LABEL: @test_mm512_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm512_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u16i, 
!cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_mm512_maskz_cvtneps_pbh
+  // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x 
float> {{.+}})
+
+  // OGCG-LABEL:  @test_mm512_maskz_cvtneps_pbh
+  // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x 
float> {{.+}})
+  return _mm512_maskz_cvtneps_pbh(k, a);
+}
+
+__m128bh test_mm256_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m256 a) {
+  // CIR-LABEL: test_mm256_mask_cvtneps_pbh
+  // CIR: cir.call @_mm256_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : 
(!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<8 x !cir.float>) -> 
!cir.vector<8 x !cir.bf16>
+  
+  // LLVM-LABEL: test_mm256_mask_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})
+
+  // OGCG-LABEL: test_mm256_mask_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})  
+  return _mm256_mask_cvtneps_pbh(src, k, a);
+}
+
+__m128bh test_mm256_maskz_cvtneps_pbh(__mmask8 k, __m256 a) {
+  // CIR-LABEL: test_mm256_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm256_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, 
!cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+
+  // LLVM-LABEL: test_mm256_maskz_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})
+
+  // OGCG-LABEL: test_mm256_maskz_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> 
{{.+}})
+  return _mm256_maskz_cvtneps_pbh(k, a);
+}
+
+__m128bh test_mm_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m128 a) {
+  // CIR-LABEL: test_mm_mask_cvtneps_pbh
+  // CIR: cir.call @_mm_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : 
(!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<4 x !cir.float>) -> 
!cir.vector<8 x !cir.bf1{{.+}}
+
+  // LLVM-LABEL: test_mm_mask_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+
+  // OGCG-LABEL: test_mm_mask_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+  return _mm_mask_cvtneps_pbh(src, k, a);
+}
+
+__m128bh test_mm_maskz_cvtneps_pbh(__mmask8 k, __m128 a) {
+  // CIR-LABEL: test_mm_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, 
!cir.vector<4 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+  
+  // LLVM-LABEL: test_mm_maskz_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+
+  // OGCG-LABEL: test_mm_maskz_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x 
float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+  return _mm_maskz_cvtneps_pbh(k, a);
+}

>From ed0155382bf68c99fa3a7b158407da3717a73741 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 12:21:53 +0000
Subject: [PATCH 2/3] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 45 ++++++++++++++++------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 59d467da3a9fb..f27b68ca4a437 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -364,23 +364,46 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, 
mlir::Location loc,
 
 static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
                                             mlir::Location loc,
-                                            mlir::Type dstTy,
-                                            SmallVectorImpl<mlir::Value> &ops) 
{
+                                            SmallVectorImpl<mlir::Value> &ops,
+                                            mlir::Type DstTy) {
+  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+         "Unknown cvtph2ps intrinsic");
+
+  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+  if (ops.size() == 4) {
+    cir::ConstantOp constOp = ops[3].getDefiningOp<cir::ConstantOp>();
+    if (constOp &&
+        mlir::cast<mlir::IntegerAttr>(constOp.getValue()).getInt() != 4) {
+      return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
+                                 DstTy, ops);
+    }
+  }
 
-  mlir::Value src = ops[0];
-  mlir::Value passthru = ops[1];
+  uint64_t NumDstElts = mlir::cast<cir::VectorType>(DstTy).getSize();
+  mlir::Value Src = ops[0];
 
-  auto vecTy = mlir::cast<cir::VectorType>(src.getType());
-  uint64_t numElems = vecTy.getSize();
+  // Extract the subvector
+  if (NumDstElts != mlir::cast<cir::VectorType>(Src.getType()).getSize()) {
+    assert(NumDstElts == 4 && "Unexpected vector size");
 
-  mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems);
+    SmallVector<int32_t, 4> indices = {0, 1, 2, 3};
+    Src = builder.createShuffle(loc, Src, Src, indices);
+  }
 
-  auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems);
-  mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
+  // Bitcast from vXi16 to vXf16.
+  cir::VectorType HalfTy =
+      cir::VectorType::get(builder.getF16Type(), NumDstElts);
+  Src = builder.createBitcast(loc, Src, HalfTy);
 
-  mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
+  // Perform the fp-extension.
+  mlir::Value Res = builder.createFloatingCast(Src, DstTy);
+
+  if (ops.size() >= 3) {
+    mlir::Value MaskVec = getMaskVecValue(builder, loc, ops[2], NumDstElts);
+    Res = emitX86Select(builder, loc, MaskVec, Res, ops[1]);
+  }
 
-  return emitX86Select(builder, loc, mask, res, passthru);
+  return Res;
 }
 
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,

>From 50e380f9aef07dc225e37147fa80da57c3839e56 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <[email protected]>
Date: Sat, 20 Dec 2025 19:16:13 +0000
Subject: [PATCH 3/3] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 78 ++++------------------
 1 file changed, 13 insertions(+), 65 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f27b68ca4a437..7862119d659f8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -19,8 +19,9 @@
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
-#include "clang/CIR/MissingFeatures.h"
+
 #include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -362,50 +363,6 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, 
mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
-static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
-                                            mlir::Location loc,
-                                            SmallVectorImpl<mlir::Value> &ops,
-                                            mlir::Type DstTy) {
-  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
-         "Unknown cvtph2ps intrinsic");
-
-  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
-  if (ops.size() == 4) {
-    cir::ConstantOp constOp = ops[3].getDefiningOp<cir::ConstantOp>();
-    if (constOp &&
-        mlir::cast<mlir::IntegerAttr>(constOp.getValue()).getInt() != 4) {
-      return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
-                                 DstTy, ops);
-    }
-  }
-
-  uint64_t NumDstElts = mlir::cast<cir::VectorType>(DstTy).getSize();
-  mlir::Value Src = ops[0];
-
-  // Extract the subvector
-  if (NumDstElts != mlir::cast<cir::VectorType>(Src.getType()).getSize()) {
-    assert(NumDstElts == 4 && "Unexpected vector size");
-
-    SmallVector<int32_t, 4> indices = {0, 1, 2, 3};
-    Src = builder.createShuffle(loc, Src, Src, indices);
-  }
-
-  // Bitcast from vXi16 to vXf16.
-  cir::VectorType HalfTy =
-      cir::VectorType::get(builder.getF16Type(), NumDstElts);
-  Src = builder.createBitcast(loc, Src, HalfTy);
-
-  // Perform the fp-extension.
-  mlir::Value Res = builder.createFloatingCast(Src, DstTy);
-
-  if (ops.size() >= 3) {
-    mlir::Value MaskVec = getMaskVecValue(builder, loc, ops[2], NumDstElts);
-    Res = emitX86Select(builder, loc, MaskVec, Res, ops[1]);
-  }
-
-  return Res;
-}
-
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1706,38 +1663,29 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+  case X86::BI__builtin_ia32_vcvtph2ps_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
     cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented X86 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
-  case X86::BI__builtin_ia32_vcvtph2ps_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()),
-                                    ops);
-  }
-  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4);
-    return emitIntrinsicCallOp(builder, loc,
-                               "x86.avx512bf16.mask.cvtneps2bf16.128",
-                               convertType(expr->getType()),
-                               mlir::ValueRange{ops[0], ops[1], 
intrinsicMask});
-  }
+  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
     mlir::Location loc = getLoc(expr->getExprLoc());
-    unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+    mlir::Type resTy = convertType(expr->getType());
+    unsigned numElts = cast<cir::VectorType>(resTy).getSize();
     mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
     StringRef intrinsicName;
-    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
+    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_128_mask)
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.128";
+    else if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
       intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
     else
       intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
-    mlir::Value intrinsicResult =
-        emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(),
-                            mlir::ValueRange{ops[0]});
+    mlir::Value intrinsicResult = emitIntrinsicCallOp(
+        builder, loc, intrinsicName, resTy, mlir::ValueRange{ops[0]});
     return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
   }
   case X86::BI__cpuid:

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][X86]Implement handling for convert-half builtins (PR #173143)

Reply via email to