https://github.com/DannyDaoBoYang updated https://github.com/llvm/llvm-project/pull/173802
>From 39693720f08a5d77f42bb3e4a38dab1e5a0e2fb5 Mon Sep 17 00:00:00 2001 From: DannyDaoBoYang <[email protected]> Date: Mon, 5 Jan 2026 20:55:51 -0500 Subject: [PATCH 1/3] Add support for pmovqd512_mask and pmovwb512_mask --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++++++++++ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 5 +++- .../X86/pmovqd-mask-builtins.c | 29 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index cc28941aaa079..481d06091f012 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -425,6 +425,22 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { // Cast/Conversion Operators //===--------------------------------------------------------------------===// + /// Create an value truncation to a narrower type. + /// Returns the source if types already match. CIR casts do not + /// encode NUW/NSW; wrap semantics should be handled by callers. + /// Supports both scalar integers and vectors of integers. + mlir::Value createTrunc(mlir::Location loc, mlir::Value src, + mlir::Type newTy) { + auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType()); + if (newTy == srcIntTy) + return src; + return createCast(loc, cir::CastKind::integral, src, newTy); + } + + mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { + return createTrunc(src.getLoc(), src, newTy); + } + mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy) { if (newTy == src.getType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 1c87e945de846..b553327f676f5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1274,7 +1274,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { mask); } case X86::BI__builtin_ia32_pmovqd512_mask: - case X86::BI__builtin_ia32_pmovwb512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: { + mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); + return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]); + } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_blendps: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c new file mode 100644 index 0000000000000..b43d2dc8f050d --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +#include <immintrin.h> + +__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) { + // CIR-LABEL: test_pmovqd_mask + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + return __builtin_ia32_pmovqd512_mask(a, b, mask); +} + +__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) { + // CIR-LABEL: test_pmovqd_maskz + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + __m256i zero = _mm256_setzero_si256(); + return __builtin_ia32_pmovqd512_mask(a, zero, mask); +} + +__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) { + // CIR-LABEL: test_pmovwb_mask + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> + return __builtin_ia32_pmovwb512_mask(a, b, mask); +} \ No newline at end of file >From 10bf2da1dc5166e507ab2344a7a44544bad243f3 Mon Sep 17 00:00:00 2001 From: DannyDaoBoYang <[email protected]> Date: Thu, 1 Jan 2026 19:46:15 -0500 Subject: [PATCH 2/3] add LLVM and OGCG in test, Combine Trunc function calls, format --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 8 +-- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ++- .../X86/pmovqd-mask-builtins.c | 51 +++++++++++++++---- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 481d06091f012..b66b7171e0628 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -429,18 +429,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { /// Returns the source if types already match. CIR casts do not /// encode NUW/NSW; wrap semantics should be handled by callers. /// Supports both scalar integers and vectors of integers. - mlir::Value createTrunc(mlir::Location loc, mlir::Value src, - mlir::Type newTy) { + mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { + mlir::Location loc = src.getLoc(); auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType()); if (newTy == srcIntTy) return src; return createCast(loc, cir::CastKind::integral, src, newTy); } - mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { - return createTrunc(src.getLoc(), src, newTy); - } - mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy) { if (newTy == src.getType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index b553327f676f5..253fca321f742 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1275,8 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { } case X86::BI__builtin_ia32_pmovqd512_mask: case X86::BI__builtin_ia32_pmovwb512_mask: { - mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); - return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]); + mlir::Value Res = + builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); + return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, + ops[1]); } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c index b43d2dc8f050d..797ecf67ea9ec 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c @@ -1,5 +1,9 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s #include <immintrin.h> @@ -8,22 +12,49 @@ __m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) { // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + // LLVM-LABEL: @test_pmovqd_mask + // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer + // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]] + // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64> + // LLVM: store <4 x i64> %[[RETBC]], + // LLVM: %[[RET:.*]] = load <4 x i64>, + // LLVM: ret <4 x i64> %[[RET]] + // OGCG-LABEL: @test_pmovqd_mask + // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]] + // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64> + // OGCG: ret <4 x i64> %[[RET]] return __builtin_ia32_pmovqd512_mask(a, b, mask); } -__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) { - // CIR-LABEL: test_pmovqd_maskz - // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> - __m256i zero = _mm256_setzero_si256(); - return __builtin_ia32_pmovqd512_mask(a, zero, mask); -} - __m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) { // CIR-LABEL: test_pmovwb_mask // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> + // LLVM-LABEL: @test_pmovwb_mask + // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16> + // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8> + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8> + // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer + // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]] + // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64> + // LLVM: store <4 x i64> %[[RETBC]], + // LLVM: %[[RET:.*]] = load <4 x i64>, + // LLVM: ret <4 x i64> %[[RET]] + // OGCG-LABEL: @test_pmovwb_mask + // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16> + // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8> + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8> + // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]] + // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64> + // OGCG: ret <4 x i64> %[[RET]] return __builtin_ia32_pmovwb512_mask(a, b, mask); } \ No newline at end of file >From 0fcc6a5ef4f717ebdbbea7d0b4923a66e79e2dc9 Mon Sep 17 00:00:00 2001 From: DannyDaoBoYang <[email protected]> Date: Tue, 6 Jan 2026 01:42:03 -0500 Subject: [PATCH 3/3] Use createIntCast, move tests around, format, fix tests --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 12 ---- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +- .../CodeGenBuiltins/X86/avx512bw-builtins.c | 63 +++++++++++++++++++ .../CodeGenBuiltins/X86/avx512f-builtins.c | 63 +++++++++++++++++++ .../X86/pmovqd-mask-builtins.c | 60 ------------------ 5 files changed, 127 insertions(+), 73 deletions(-) delete mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index b66b7171e0628..cc28941aaa079 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -425,18 +425,6 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { // Cast/Conversion Operators //===--------------------------------------------------------------------===// - /// Create an value truncation to a narrower type. - /// Returns the source if types already match. CIR casts do not - /// encode NUW/NSW; wrap semantics should be handled by callers. - /// Supports both scalar integers and vectors of integers. - mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { - mlir::Location loc = src.getLoc(); - auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType()); - if (newTy == srcIntTy) - return src; - return createCast(loc, cir::CastKind::integral, src, newTy); - } - mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy) { if (newTy == src.getType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 253fca321f742..429d2b2237b01 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1276,7 +1276,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_pmovqd512_mask: case X86::BI__builtin_ia32_pmovwb512_mask: { mlir::Value Res = - builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); + builder.createIntCast(ops[0], cast<cir::VectorType>(ops[1].getType())); return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]); } diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c index ac740b354cb18..9693d637fc83c 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c @@ -803,4 +803,67 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) { // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer // OGCG: bitcast <32 x i1> [[CMP]] to i32 return _mm512_movepi16_mask(__A); +} + +__m256i test_mm512_cvtepi16_epi8(__m512i __A) { + // CIR-LABEL: test_mm512_cvtepi16_epi8 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<32 x !s8i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_cvtepi16_epi8 + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // LLVM: bitcast <32 x i8> %[[TRUNC]] to <4 x i64> + + // OGCG-LABEL: test_mm512_cvtepi16_epi8 + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // OGCG: bitcast <32 x i8> %[[TRUNC]] to <4 x i64> + return _mm512_cvtepi16_epi8(__A); +} + +__m256i test_mm512_mask_cvtepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) { + // CIR-LABEL: test_mm512_mask_cvtepi16_epi8 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> + // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<32 x !s8i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_mask_cvtepi16_epi8 + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %{{.*}} + // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_mask_cvtepi16_epi8 + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %{{.*}} + // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64> + return _mm512_mask_cvtepi16_epi8(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtepi16_epi8(__mmask32 __M, __m512i __A) { + // CIR-LABEL: test_mm512_maskz_cvtepi16_epi8 + // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u32i, !cir.vector<8 x !s64i>) -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_maskz_cvtepi16_epi8 + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> {{.*}} + // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_maskz_cvtepi16_epi8 + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8> + // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> {{.*}} + // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64> + return _mm512_maskz_cvtepi16_epi8(__M, __A); } \ No newline at end of file diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c index 9d5d5e67d6ad9..37a4baf6f0f9d 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c @@ -1056,3 +1056,66 @@ int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) { // OGCG: zext i1 %[[CMP]] to i32 return _mm512_kortestz(__A,__B); } + +__m256i test_mm512_cvtepi64_epi32(__m512i __A) { + // CIR-LABEL: test_mm512_cvtepi64_epi32 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_cvtepi64_epi32 + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: bitcast <8 x i32> %[[TRUNC]] to <4 x i64> + + // OGCG-LABEL: test_mm512_cvtepi64_epi32 + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: bitcast <8 x i32> %[[TRUNC]] to <4 x i64> + return _mm512_cvtepi64_epi32(__A); +} + +__m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { + // CIR-LABEL: test_mm512_mask_cvtepi64_epi32 + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> + // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_mask_cvtepi64_epi32 + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %{{.*}} + // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_mask_cvtepi64_epi32 + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %{{.*}} + // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64> + return _mm512_mask_cvtepi64_epi32(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) { + // CIR-LABEL: test_mm512_maskz_cvtepi64_epi32 + // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u8i, !cir.vector<8 x !s64i>) -> !cir.vector<4 x !s64i> + // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> + // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> + // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i> + + // LLVM-LABEL: test_mm512_maskz_cvtepi64_epi32 + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> {{.*}} + // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64> + + // OGCG-LABEL: test_mm512_maskz_cvtepi64_epi32 + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> {{.*}} + // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64> + return _mm512_maskz_cvtepi64_epi32(__M, __A); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c deleted file mode 100644 index 797ecf67ea9ec..0000000000000 --- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c +++ /dev/null @@ -1,60 +0,0 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s - -#include <immintrin.h> - -__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) { - // CIR-LABEL: test_pmovqd_mask - // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> - // LLVM-LABEL: @test_pmovqd_mask - // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32> - // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> - // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer - // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]] - // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64> - // LLVM: store <4 x i64> %[[RETBC]], - // LLVM: %[[RET:.*]] = load <4 x i64>, - // LLVM: ret <4 x i64> %[[RET]] - // OGCG-LABEL: @test_pmovqd_mask - // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32> - // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> - // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> - // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]] - // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64> - // OGCG: ret <4 x i64> %[[RET]] - return __builtin_ia32_pmovqd512_mask(a, b, mask); -} - -__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) { - // CIR-LABEL: test_pmovwb_mask - // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> - // LLVM-LABEL: @test_pmovwb_mask - // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16> - // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8> - // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8> - // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> - // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer - // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]] - // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64> - // LLVM: store <4 x i64> %[[RETBC]], - // LLVM: %[[RET:.*]] = load <4 x i64>, - // LLVM: ret <4 x i64> %[[RET]] - // OGCG-LABEL: @test_pmovwb_mask - // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16> - // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8> - // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8> - // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> - // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]] - // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64> - // OGCG: ret <4 x i64> %[[RET]] - return __builtin_ia32_pmovwb512_mask(a, b, mask); -} \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
