[clang] 7ee923d - [CIR] Upstream convert to mask builtins in CIR codegen (#171694)

via cfe-commits Thu, 18 Dec 2025 17:54:47 -0800

Author: Starlight
Date: 2025-12-19T01:54:31Z
New Revision: 7ee923d0ecc6bd46bf1b7d90a40c36114a20d66b


URL: 
https://github.com/llvm/llvm-project/commit/7ee923d0ecc6bd46bf1b7d90a40c36114a20d66b
DIFF: 
https://github.com/llvm/llvm-project/commit/7ee923d0ecc6bd46bf1b7d90a40c36114a20d66b.diff

LOG: [CIR] Upstream convert to mask builtins in CIR codegen (#171694)

This PR is part of https://github.com/llvm/llvm-project/issues/167752.
It upstreams the codegen and tests for the convert to mask builtins
implemented in the incubator, including:

Upstream X86 mask conversion builtins from clangir:
- cvtmask2b/w/d/q*
- cvtb/w/d/q2mask* 

Upstreamed helpers:
- emitX86MaskedCompare()
- emitX86ConvertToMask()
- emitX86SExtMask()

Added: 
    clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
    clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
    clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index bd373b1423fd0..1c87e945de846 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,13 +14,19 @@
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <string>
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -258,6 +264,105 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy 
&builder, mlir::Location loc,
                              mlir::ValueRange{lhsVec, rhsVec});
 }
 
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value>
+emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
+                           mlir::Value cmp, unsigned numElts,
+                           mlir::Value maskIn, mlir::Location loc) {
+  if (maskIn) {
+    cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult");
+    return {};
+  }
+  if (numElts < 8) {
+    llvm::SmallVector<mlir::Attribute> indices;
+    mlir::Type i64Ty = builder.getSInt64Ty();
+
+    for (unsigned i = 0; i != numElts; ++i)
+      indices.push_back(cir::IntAttr::get(i64Ty, i));
+    for (unsigned i = numElts; i != 8; ++i)
+      indices.push_back(cir::IntAttr::get(i64Ty, i % numElts + numElts));
+
+    // This should shuffle between cmp (first vector) and null (second vector)
+    mlir::Value nullVec = builder.getNullValue(cmp.getType(), loc);
+    cmp = builder.createVecShuffle(loc, cmp, nullVec, indices);
+  }
+  return builder.createBitcast(cmp, builder.getUIntNTy(std::max(numElts, 8U)));
+}
+
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value>
+emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned 
cc,
+                     bool isSigned, ArrayRef<mlir::Value> ops,
+                     mlir::Location loc) {
+  assert((ops.size() == 2 || ops.size() == 4) &&
+         "Unexpected number of arguments");
+  unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+  mlir::Value cmp;
+
+  if (cc == 3) {
+    cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3");
+    return {};
+  } else if (cc == 7) {
+    cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7");
+    return {};
+  } else {
+    cir::CmpOpKind pred;
+    switch (cc) {
+    default:
+      llvm_unreachable("Unknown condition code");
+    case 0:
+      pred = cir::CmpOpKind::eq;
+      break;
+    case 1:
+      pred = cir::CmpOpKind::lt;
+      break;
+    case 2:
+      pred = cir::CmpOpKind::le;
+      break;
+    case 4:
+      pred = cir::CmpOpKind::ne;
+      break;
+    case 5:
+      pred = cir::CmpOpKind::ge;
+      break;
+    case 6:
+      pred = cir::CmpOpKind::gt;
+      break;
+    }
+
+    auto resultTy = cir::VectorType::get(builder.getSIntNTy(1), numElts);
+    cmp = cir::VecCmpOp::create(builder, loc, resultTy, pred, ops[0], ops[1]);
+  }
+
+  mlir::Value maskIn;
+  if (ops.size() == 4)
+    maskIn = ops[3];
+
+  return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc);
+}
+
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf,
+                                                       CIRGenBuilderTy 
&builder,
+                                                       mlir::Value in,
+                                                       mlir::Location loc) {
+  cir::ConstantOp zero = builder.getNullValue(in.getType(), loc);
+  return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc);
+}
+
+static std::optional<mlir::Value> emitX86SExtMask(CIRGenBuilderTy &builder,
+                                                  mlir::Value op,
+                                                  mlir::Type dstTy,
+                                                  mlir::Location loc) {
+  unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize();
+  mlir::Value mask = getMaskVecValue(builder, loc, op, numberOfElements);
+
+  return builder.createCast(loc, cir::CastKind::integral, mask, dstTy);
+}
+
 static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
                                  mlir::Value vec, mlir::Value value,
                                  mlir::Value indexOp) {
@@ -653,6 +758,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_storesh128_mask:
   case X86::BI__builtin_ia32_storess128_mask:
   case X86::BI__builtin_ia32_storesd128_mask:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented x86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case X86::BI__builtin_ia32_cvtmask2b128:
   case X86::BI__builtin_ia32_cvtmask2b256:
   case X86::BI__builtin_ia32_cvtmask2b512:
@@ -665,6 +774,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtmask2q128:
   case X86::BI__builtin_ia32_cvtmask2q256:
   case X86::BI__builtin_ia32_cvtmask2q512:
+    return emitX86SExtMask(this->getBuilder(), ops[0],
+                           convertType(expr->getType()),
+                           getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtb2mask128:
   case X86::BI__builtin_ia32_cvtb2mask256:
   case X86::BI__builtin_ia32_cvtb2mask512:
@@ -677,6 +789,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtq2mask128:
   case X86::BI__builtin_ia32_cvtq2mask256:
   case X86::BI__builtin_ia32_cvtq2mask512:
+    return emitX86ConvertToMask(*this, this->getBuilder(), ops[0],
+                                getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2pd512_mask:

diff  --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 6d01964ebac41..eb0a219f18618 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -3601,9 +3601,14 @@ mlir::LogicalResult 
CIRToLLVMVecCmpOpLowering::matchAndRewrite(
   }
 
   // LLVM IR vector comparison returns a vector of i1. This one-bit vector
-  // must be sign-extended to the correct result type.
-  rewriter.replaceOpWithNewOp<mlir::LLVM::SExtOp>(
-      op, typeConverter->convertType(op.getType()), bitResult);
+  // must be sign-extended to the correct result type, unless a vector of i1 is
+  // the type we need.
+  if (cast<cir::IntType>(cast<cir::VectorType>(op.getType()).getElementType())
+          .getWidth() > 1)
+    rewriter.replaceOpWithNewOp<mlir::LLVM::SExtOp>(
+        op, typeConverter->convertType(op.getType()), bitResult);
+  else
+    rewriter.replaceOp(op, bitResult);
   return mlir::success();
 }
 

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 3db2541506091..ac740b354cb18 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -1,32 +1,20 @@
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
 
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
 
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw  -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux  -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
 
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw  -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux  -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
 
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefix=OGCG
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
 
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefix=OGCG
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
 
 // This test mimics clang/test/CodeGen/X86/avx512bw-builtins.c, which 
eventually
 // CIR shall be able to support fully.
@@ -768,3 +756,51 @@ unsigned char test_ktestz_mask64_u8(__mmask64 A, __mmask64 
B) {
   return _ktestz_mask64_u8(A, B);
 }
 
+
+
+__m512i test_mm512_movm_epi16(__mmask32 __A) {
+  // CIR-LABEL: _mm512_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<32 x !cir.int<s, 
1>> -> !cir.vector<32 x !s16i>
+  
+  // LLVM-LABEL: test_mm512_movm_epi16
+  // LLVM:  %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM:  %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16>
+
+  // OGCG-LABEL: {{.*}}movm_epi16{{.*}}(
+  // OGCG:  %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG:  %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16>
+  return _mm512_movm_epi16(__A); 
+}
+
+__mmask64 test_mm512_movepi8_mask(__m512i __A) {
+  // CIR-LABEL: _mm512_movepi8_mask
+  // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<64 
x !s8i>
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<64 x 
!s8i>, !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<64 x !cir.int<s, 
1>> -> !u64i
+
+  // LLVM-LABEL: test_mm512_movepi8_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <64 x i1> [[CMP]] to i64
+  
+  // OGCG-LABEL: {{.*}}movepi8_mask{{.*}}(
+  // OGCG: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <64 x i1> [[CMP]] to i64
+  return _mm512_movepi8_mask(__A); 
+}
+
+__mmask32 test_mm512_movepi16_mask(__m512i __A) {
+  // CIR-LABEL: _mm512_movepi16_mask
+  // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<32 
x !s16i>
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<32 x 
!s16i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<32 x !cir.int<s, 
1>> -> !u32i
+
+  // LLVM-LABEL: test_mm512_movepi16_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <32 x i1> [[CMP]] to i32
+
+  // OGCG-LABEL: {{.*}}movepi16_mask{{.*}}(
+  // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <32 x i1> [[CMP]] to i32
+  return _mm512_movepi16_mask(__A); 
+}
\ No newline at end of file

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
index 0ba84053c15b1..e49a4bd04956e 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-cir -o 
%t.cir -Wall -Werror
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
 
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fno-signed-char 
-fclangir -emit-cir -o %t.cir -Wall -Werror
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
@@ -13,6 +13,22 @@
 
 #include <immintrin.h>
 
+__m512i test_mm512_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm512_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<8 x !cir.int<s, 
1>> -> !cir.vector<8 x !s64i>
+
+  // LLVM-LABEL: test_mm512_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
+
+  // OGCG-LABEL: {{.*}}test_mm512_movm_epi64{{.*}}(
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
+  return _mm512_movm_epi64(__A); 
+}
+
+
 __mmask8 test_kadd_mask8(__mmask8 A, __mmask8 B) {
  // CIR-LABEL: _kadd_mask8
  // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
@@ -324,50 +340,33 @@ unsigned char test_ktestz_mask16_u8(__mmask16 A, 
__mmask16 B) {
   return _ktestz_mask16_u8(A, B);
 }
 
-__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
-  // CIR-LABEL: test_mm512_insertf32x8
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, 
#cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> 
: !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<16 x 
!cir.float>
+__mmask16 test_mm512_movepi32_mask(__m512i __A) {
+  // CIR-LABEL: _mm512_movepi32_mask
+  // CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<16 
x !s32i>
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x 
!s32i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<16 x !cir.int<s, 
1>> -> !u16i
 
-  // LLVM-LABEL: test_mm512_insertf32x8
-  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, 
i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  // LLVM-LABEL: test_mm512_movepi32_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <16 x i1> [[CMP]] to i16
 
-  // OGCG-LABEL: test_mm512_insertf32x8
-  // OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x 
i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, 
i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  return _mm512_insertf32x8(__A, __B, 1);
+  // OGCG-LABEL: {{.*}}test_mm512_movepi32_mask{{.*}}(
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <16 x i1> [[CMP]] to i16
+  return _mm512_movepi32_mask(__A); 
 }
 
-__m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) {
-  // CIR-LABEL: test_mm512_inserti32x8
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, 
#cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> 
: !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<16 x !s32i>
+__mmask8 test_mm512_movepi64_mask(__m512i __A) {
+  // CIR-LABEL: _mm512_movepi64_mask
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x 
!s64i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<8 x !cir.int<s, 1>> 
-> !u8i
 
-  // LLVM-LABEL: test_mm512_inserti32x8
-  // LLVM: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 
18, i32 19, i32 20, i32 21, i32 22, i32 23>
-
-  // OGCG-LABEL: test_mm512_inserti32x8
-  // OGCG: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 
18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  return _mm512_inserti32x8(__A, __B, 1);
-}
+  // LLVM-LABEL: test_mm512_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <8 x i1> [[CMP]] to i8
 
-__m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) {
-  // CIR-LABEL: test_mm512_insertf64x2
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : 
!s32i, #cir.int<9> : !s32i] : !cir.vector<8 x !cir.double>
-
-  // LLVM-LABEL: test_mm512_insertf64x2
-  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-
-  // OGCG-LABEL: test_mm512_insertf64x2
-  // OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-  return _mm512_insertf64x2(__A, __B, 3);
-}
-
-__m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
-  // CIR-LABEL: test_mm512_inserti64x2
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s64i>) 
[#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : 
!s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<8 x !s64i>
-
-  // LLVM-LABEL: test_mm512_inserti64x2
-  // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 
0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-
-  // OGCG-LABEL: test_mm512_inserti64x2
-  // OGCG: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 
0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
-  return _mm512_inserti64x2(__A, __B, 1);
-}
+  // OGCG-LABEL: {{.*}}test_mm512_movepi64_mask{{.*}}(
+  // OGCG: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <8 x i1> [[CMP]] to i8
+  return _mm512_movepi64_mask(__A);
+}
\ No newline at end of file

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
new file mode 100644
index 0000000000000..b3a786a328117
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature 
+avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature 
+avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx10.1-512 -target-feature 
+avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror 
-Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+
+
+#include <immintrin.h>
+
+__m128i test_mm_movm_epi8(__mmask16 __A) {
+  // CIR-LABEL: _mm_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<16 x !cir.int<s, 
1>> -> !cir.vector<16 x !s8i>
+
+  // LLVM-LABEL: @test_mm_movm_epi8
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8>
+
+  // OGCG-LABEL: @test_mm_movm_epi8
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8>
+  return _mm_movm_epi8(__A); 
+}
+
+__m256i test_mm256_movm_epi8(__mmask32 __A) {
+  // CIR-LABEL: _mm256_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> !cir.vector<32 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<32 x !cir.int<s, 
1>> -> !cir.vector<32 x !s8i>
+
+  // LLVM-LABEL: @test_mm256_movm_epi8
+  // LLVM: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8>
+
+  // OGCG-LABEL: @test_mm256_movm_epi8
+  // OGCG: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8>
+  return _mm256_movm_epi8(__A); 
+}
+
+__m512i test_mm512_movm_epi8(__mmask64 __A) {
+  // CIR-LABEL: _mm512_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> !cir.vector<64 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<64 x !cir.int<s, 
1>> -> !cir.vector<64 x !s8i>
+
+  // LLVM-LABEL: @test_mm512_movm_epi8
+  // LLVM:  %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM:  %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
+
+  // OGCG-LABEL: @test_mm512_movm_epi8
+  // OGCG: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
+  return _mm512_movm_epi8(__A); 
+}
+
+__m128i test_mm_movm_epi16(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<8 x !cir.int<s, 
1>> -> !cir.vector<8 x !s16i>
+
+  // LLVM-LABEL: @test_mm_movm_epi16
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16>
+
+  // OGCG-LABEL: @test_mm_movm_epi16
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16>
+  return _mm_movm_epi16(__A); 
+}
+
+__m256i test_mm256_movm_epi16(__mmask16 __A) {
+  // CIR-LABEL: _mm256_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<16 x !cir.int<s, 
1>> -> !cir.vector<16 x !s16i>
+
+  // LLVM-LABEL: @test_mm256_movm_epi16
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16>
+
+  // OGCG-LABEL: @test_mm256_movm_epi16
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16>
+  return _mm256_movm_epi16(__A); 
+}
+  
+__mmask16 test_mm_movepi8_mask(__m128i __A) {
+  // CIR-LABEL: _mm_movepi8_mask
+  // CIR: cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<16 
x !s8i>
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x 
!s8i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<16 x !cir.int<s, 
1>> -> !u16i
+
+  // LLVM-LABEL: @test_mm_movepi8_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <16 x i1> [[CMP]] to i16
+
+  // OGCG-LABEL: @test_mm_movepi8_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <16 x i1> [[CMP]] to i16
+  return _mm_movepi8_mask(__A); 
+}
+
+__mmask16 test_mm256_movepi16_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi16_mask
+  // CIR: cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 
x !s16i>
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x 
!s16i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<16 x !cir.int<s, 
1>> -> !u16i
+
+  // LLVM-LABEL: @test_mm256_movepi16_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <16 x i1> [[CMP]] to i16
+
+  // OGCG-LABEL: @test_mm256_movepi16_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <16 x i1> [[CMP]] to i16
+  return _mm256_movepi16_mask(__A); 
+}
\ No newline at end of file

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
index 6eb1028275477..9461dde257799 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
@@ -1,31 +1,143 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror 
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature 
+avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
 // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature 
+avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
 
 #include <immintrin.h>
 
-__m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) {
-  // CIR-LABEL: test_mm256_insertf64x2
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, 
#cir.int<5> : !s32i] : !cir.vector<4 x !cir.double>
 
-  // LLVM-LABEL: @test_mm256_insertf64x2
-  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 
<i32 0, i32 1, i32 4, i32 5>
+__m128i test_mm_movm_epi32(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : 
!s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<4 x !cir.int<s, 
1>> -> !cir.vector<4 x !s32i>
 
-  // OGCG-LABEL: @test_mm256_insertf64x2
-  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 
<i32 0, i32 1, i32 4, i32 5>
-  return _mm256_insertf64x2(__A, __B, 1);
+  // LLVM-LABEL: @test_mm_movm_epi32
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32>
+
+  // OGCG-LABEL: @test_mm_movm_epi32
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32>
+  return _mm_movm_epi32(__A); 
+}
+
+__m256i test_mm256_movm_epi32(__mmask8 __A) {
+  // CIR-LABEL: _mm256_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<8 x !cir.int<s, 
1>> -> !cir.vector<8 x !s32i>
+
+  // LLVM-LABEL: @test_mm256_movm_epi32
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32>
+
+  // OGCG-LABEL: @test_mm256_movm_epi32
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32>
+  return _mm256_movm_epi32(__A); 
+}
+
+__m512i test_mm512_movm_epi32(__mmask16 __A) {
+  // CIR-LABEL: _mm512_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<16 x !cir.int<s, 
1>> -> !cir.vector<16 x !s32i>
+
+  // LLVM-LABEL: @test_mm512_movm_epi32
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
+
+  // OGCG-LABEL: @test_mm512_movm_epi32
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
+  return _mm512_movm_epi32(__A); 
+}
+
+__m128i test_mm_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<2 x !cir.int<s, 
1>> -> !cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: @test_mm_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x 
i32> <i32 0, i32 1>
+  // LLVM: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64>
+
+  // OGCG-LABEL: @test_mm_movm_epi64
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x 
i32> <i32 0, i32 1>
+  // OGCG: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64>
+  return _mm_movm_epi64(__A); 
+}
+
+__m256i test_mm256_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm256_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : 
!s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<4 x !cir.int<s, 
1>> -> !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: @test_mm256_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64>
+
+  // OGCG-LABEL: @test_mm256_movm_epi64
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64>
+  return _mm256_movm_epi64(__A); 
+}
+
+__mmask8 test_mm256_movepi32_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi32_mask
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x 
!s32i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[CMP]] : !cir.vector<8 x !cir.int<s, 1>> 
-> !u8i
+
+  // LLVM-LABEL: @test_mm256_movepi32_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+  // LLVM: bitcast <8 x i1> [[CMP]] to i8
+
+  // OGCG-LABEL: @test_mm256_movepi32_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+  // OGCG: bitcast <8 x i1> [[CMP]] to i8
+  return _mm256_movepi32_mask(__A); 
 }
 
-__m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) {
-  // CIR-LABEL: test_mm256_inserti64x2
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !s64i>) 
[#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : 
!s32i] : !cir.vector<4 x !s64i>
+__mmask8 test_mm_movepi64_mask(__m128i __A) {
+  // CIR-LABEL: _mm_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x 
!s64i>, !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x 
!cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : 
!s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, 
#cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !cir.int<s, 1>> 
-> !u8i
 
-  // LLVM-LABEL: @test_mm256_inserti64x2
-  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 
0, i32 1, i32 4, i32 5>
+  // LLVM-LABEL: @test_mm_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // LLVM: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  // LLVM: bitcast <8 x i1> [[SHUF]] to i8
 
-  // OGCG-LABEL: @test_mm256_inserti64x2
-  // OGCG: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 
0, i32 1, i32 4, i32 5>
-  return _mm256_inserti64x2(__A, __B, 1);
+  // OGCG-LABEL: @test_mm_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // OGCG: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  // OGCG: bitcast <8 x i1> [[SHUF]] to i8
+  return _mm_movepi64_mask(__A); 
 }
+
+__mmask8 test_mm256_movepi64_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi64_mask
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x 
!s64i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: [[SHUF:%.*]] = cir.vec.shuffle([[CMP]], %{{.*}} : !cir.vector<4 x 
!cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : 
!s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, 
#cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.cast bitcast [[SHUF]] : !cir.vector<8 x !cir.int<s, 
1>> -> !u8i
+
+  // LLVM-LABEL: @test_mm256_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // LLVM: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // LLVM: bitcast <8 x i1> [[SHUF]] to i8
+
+  // OGCG-LABEL: @test_mm256_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // OGCG: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  // OGCG: bitcast <8 x i1> [[SHUF]] to i8
+  return _mm256_movepi64_mask(__A); 
+}
\ No newline at end of file


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 7ee923d - [CIR] Upstream convert to mask builtins in CIR codegen (#171694)

Reply via email to