https://github.com/woruyu updated https://github.com/llvm/llvm-project/pull/169464
>From 6e9008193037db7048645edeb35653b5d3a66840 Mon Sep 17 00:00:00 2001 From: woruyu <[email protected]> Date: Tue, 10 Feb 2026 16:34:39 +0800 Subject: [PATCH 1/2] [CIR] X86 vector masked load builtins --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 47 ++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenBuilder.h | 16 ++++++ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 27 ++++++++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 20 ++++++++ .../X86/avx512vl-builtins-test.c | 49 +++++++++++++++++++ .../CodeGenBuiltins/X86/avx512vl-builtins.c | 40 +++++++++++++++ 6 files changed, 199 insertions(+) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 6cebf6e62af6f..770b2c76c7fb9 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -665,6 +665,53 @@ def CIR_LoadOp : CIR_Op<"load", [ // FIXME: add verifier. } +//===----------------------------------------------------------------------===// +// MaskLoadOp +//===----------------------------------------------------------------------===// + +def CIR_MaskedLoadOp : CIR_Op<"mask.load", [ + TypesMatchWith<"type of 'result' matches pointee type of 'addr'", + "addr", "result", "mlir::cast<cir::PointerType>($_self).getPointee()"> +]> { + let summary = "Masked vector load from memory"; + let description = [{ + `cir.masked_load` conditionally loads elements from memory based on a mask. + Elements for which the mask is false are taken from `pass_thru`. + + This operation is intended to correspond closely to LLVM's masked load op + (`llvm.intr.maskedload` / `LLVM::MaskedLoadOp`) and lower directly to it. + + `alignment` can be provided to override the default alignment derived from + the pointee/element type data layout. + + Example: + + ```mlir + %v = cir.masked_load align(16) %ptr, %mask, %passthru + : !cir.ptr<i32>, <4xi1>, <4xi32> -> <4xi32> + ``` + }]; + + let arguments = (ins + Arg<CIR_PointerType, "base address (points to element type)", [MemRead]>:$addr, + CIR_VectorType:$mask, + CIR_VectorType:$pass_thru, + OptionalAttr<IntValidAlignment<I64Attr>>:$alignment + ); + + let results = (outs CIR_AnyType:$result); + + let assemblyFormat = [{ + (`align` `(` $alignment^ `)`)? + $addr `,` $mask `,` $pass_thru + `:` qualified(type($addr)) `,` type($mask) `,` type($pass_thru) + `->` type($result) + attr-dict + }]; + + // FIXME: add verifier +} + //===----------------------------------------------------------------------===// // StoreOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index dedb369bf3f67..00a86377f8c99 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -654,6 +654,22 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { addr.getAlignment().getAsAlign().value()); } + mlir::Value createMaskedLoad(mlir::Location loc, mlir::Type ty, + mlir::Value ptr, llvm::Align alignment, + mlir::Value mask, mlir::Value passThru) { + assert(mlir::isa<cir::VectorType>(ty) && "Type should be vector"); + assert(mask && "Mask should not be all-ones (null)"); + + if (!passThru) + passThru = this->getConstant(loc, cir::PoisonAttr::get(ty)); + + auto alignAttr = + this->getI64IntegerAttr(static_cast<int64_t>(alignment.value())); + + return cir::MaskedLoadOp::create(*this, loc, ty, ptr, mask, passThru, + alignAttr); + } + cir::VecShuffleOp createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2, llvm::ArrayRef<mlir::Attribute> maskAttrs) { diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 80022998448ad..0563e5dbea3a9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -759,6 +759,18 @@ static mlir::Value emitX86Aeswide(CIRGenBuilderTy &builder, mlir::Location loc, return cir::ExtractMemberOp::create(builder, loc, rstValueRec, /*index=*/0); } +static mlir::Value emitX86MaskedLoad(CIRGenBuilderTy &builder, + ArrayRef<mlir::Value> ops, + llvm::Align alignment, + mlir::Location loc) { + mlir::Type ty = ops[1].getType(); + mlir::Value ptr = ops[0]; + mlir::Value maskVec = getMaskVecValue(builder, loc, ops[2], + cast<cir::VectorType>(ty).getSize()); + + return builder.createMaskedLoad(loc, ty, ptr, alignment, maskVec, ops[1]); +} + std::optional<mlir::Value> CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { @@ -1112,6 +1124,11 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_movdqa64store512_mask: case X86::BI__builtin_ia32_storeaps512_mask: case X86::BI__builtin_ia32_storeapd512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; + case X86::BI__builtin_ia32_loadups128_mask: case X86::BI__builtin_ia32_loadups256_mask: case X86::BI__builtin_ia32_loadups512_mask: @@ -1134,6 +1151,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_loadsh128_mask: case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: + return emitX86MaskedLoad(builder, ops, llvm::Align(1), + getLoc(expr->getExprLoc())); + case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: case X86::BI__builtin_ia32_loadaps512_mask: @@ -1146,6 +1166,13 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_movdqa64load128_mask: case X86::BI__builtin_ia32_movdqa64load256_mask: case X86::BI__builtin_ia32_movdqa64load512_mask: + return emitX86MaskedLoad( + builder, ops, + getContext() + .getTypeAlignInChars(expr->getArg(1)->getType()) + .getAsAlign(), + getLoc(expr->getExprLoc())); + case X86::BI__builtin_ia32_expandloaddf128_mask: case X86::BI__builtin_ia32_expandloaddf256_mask: case X86::BI__builtin_ia32_expandloaddf512_mask: diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 0e50d9c595564..444a7c15396b5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1796,6 +1796,26 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( return mlir::LogicalResult::success(); } +mlir::LogicalResult cir::direct::CIRToLLVMMaskedLoadOpLowering::matchAndRewrite( + cir::MaskedLoadOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + const mlir::Type llvmResTy = + convertTypeForMemory(*getTypeConverter(), dataLayout, op.getType()); + + std::optional<size_t> opAlign = op.getAlignment(); + unsigned alignment = + (unsigned)opAlign.value_or(dataLayout.getTypeABIAlignment(llvmResTy)); + + auto alignAttr = rewriter.getI32IntegerAttr(alignment); + + mlir::LLVM::MaskedLoadOp newLoad = mlir::LLVM::MaskedLoadOp::create( + rewriter, op.getLoc(), llvmResTy, adaptor.getAddr(), adaptor.getMask(), + adaptor.getPassThru(), alignAttr); + + rewriter.replaceOp(op, newLoad.getResult()); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( cir::StoreOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c new file mode 100644 index 0000000000000..8525af17ddb90 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s + +#include <immintrin.h> + +__m128 test_mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P) { + // CIR-LABEL: _mm_mask_loadu_ps + // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, <4 x !cir.int<s, 1>>, <4 x !cir.float> -> !cir.vector<4 x !cir.float> + + // LLVM-LABEL: test_mm_mask_loadu_ps + // LLVM: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // LLVM: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + + // OGCG-LABEL: test_mm_mask_loadu_ps + // OGCG: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // OGCG: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + return _mm_mask_loadu_ps(__W, __U, __P); +} + +__m128 test_mm_maskz_loadu_ps(__mmask8 __U, void const *__P) { + // CIR-LABEL: _mm_maskz_loadu_ps + // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, <4 x !cir.int<s, 1>>, <4 x !cir.float> -> !cir.vector<4 x !cir.float> + + // LLVM-LABEL: test_mm_maskz_loadu_ps + // LLVM: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // LLVM: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + + // OGCG-LABEL: test_mm_maskz_loadu_ps + // OGCG: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // OGCG: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + return _mm_maskz_loadu_ps(__U, __P); +} + +__m256 test_mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P) { + // CIR-LABEL: _mm256_mask_loadu_ps + // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, <8 x !cir.int<s, 1>>, <8 x !cir.float> -> !cir.vector<8 x !cir.float> + + // LLVM-LABEL: test_mm256_mask_loadu_ps + // LLVM: @llvm.masked.load.v8f32.p0(ptr align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + + // OGCG-LABEL: @test_mm256_mask_loadu_ps + // OGCG: @llvm.masked.load.v8f32.p0(ptr align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + return _mm256_mask_loadu_ps(__W, __U, __P); +} + diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c index f03fc75565b1a..3926062e9efda 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c @@ -363,3 +363,43 @@ __m256i test_mm256_shuffle_i64x2(__m256i a, __m256i b) { // OGCG: shufflevector <4 x i64> %{{.+}}, <4 x i64> %{{.+}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7> return _mm256_shuffle_i64x2(a, b, 0x03); } + +__m128 test_mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P) { + // CIR-LABEL: _mm_mask_loadu_ps + // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, <4 x !cir.int<s, 1>>, <4 x !cir.float> -> !cir.vector<4 x !cir.float> + + // LLVM-LABEL: test_mm_mask_loadu_ps + // LLVM: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // LLVM: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + + // OGCG-LABEL: test_mm_mask_loadu_ps + // OGCG: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // OGCG: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + return _mm_mask_loadu_ps(__W, __U, __P); +} + +__m128 test_mm_maskz_loadu_ps(__mmask8 __U, void const *__P) { + // CIR-LABEL: _mm_maskz_loadu_ps + // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, <4 x !cir.int<s, 1>>, <4 x !cir.float> -> !cir.vector<4 x !cir.float> + + // LLVM-LABEL: test_mm_maskz_loadu_ps + // LLVM: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // LLVM: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + + // OGCG-LABEL: test_mm_maskz_loadu_ps + // OGCG: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + // OGCG: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) + return _mm_maskz_loadu_ps(__U, __P); +} + +__m256 test_mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P) { + // CIR-LABEL: _mm256_mask_loadu_ps + // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, <8 x !cir.int<s, 1>>, <8 x !cir.float> -> !cir.vector<8 x !cir.float> + + // LLVM-LABEL: test_mm256_mask_loadu_ps + // LLVM: @llvm.masked.load.v8f32.p0(ptr align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + + // OGCG-LABEL: @test_mm256_mask_loadu_ps + // OGCG: @llvm.masked.load.v8f32.p0(ptr align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) + return _mm256_mask_loadu_ps(__W, __U, __P); +} >From ae95e0b135e98b3144ca9c2c09d71d7e1dba2b48 Mon Sep 17 00:00:00 2001 From: woruyu <[email protected]> Date: Tue, 10 Feb 2026 16:43:43 +0800 Subject: [PATCH 2/2] Delete clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c --- .../X86/avx512vl-builtins-test.c | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c deleted file mode 100644 index 8525af17ddb90..0000000000000 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins-test.c +++ /dev/null @@ -1,49 +0,0 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion -// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s - -#include <immintrin.h> - -__m128 test_mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P) { - // CIR-LABEL: _mm_mask_loadu_ps - // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, <4 x !cir.int<s, 1>>, <4 x !cir.float> -> !cir.vector<4 x !cir.float> - - // LLVM-LABEL: test_mm_mask_loadu_ps - // LLVM: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // LLVM: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) - - // OGCG-LABEL: test_mm_mask_loadu_ps - // OGCG: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // OGCG: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) - return _mm_mask_loadu_ps(__W, __U, __P); -} - -__m128 test_mm_maskz_loadu_ps(__mmask8 __U, void const *__P) { - // CIR-LABEL: _mm_maskz_loadu_ps - // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, <4 x !cir.int<s, 1>>, <4 x !cir.float> -> !cir.vector<4 x !cir.float> - - // LLVM-LABEL: test_mm_maskz_loadu_ps - // LLVM: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // LLVM: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) - - // OGCG-LABEL: test_mm_maskz_loadu_ps - // OGCG: [[MASK4:%.*]] = shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - // OGCG: call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 1 %{{.+}}, <4 x i1> [[MASK4]], <4 x float> %{{.+}}) - return _mm_maskz_loadu_ps(__U, __P); -} - -__m256 test_mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P) { - // CIR-LABEL: _mm256_mask_loadu_ps - // CIR: cir.mask.load align(1) %{{.*}}, %{{.*}}, %{{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, <8 x !cir.int<s, 1>>, <8 x !cir.float> -> !cir.vector<8 x !cir.float> - - // LLVM-LABEL: test_mm256_mask_loadu_ps - // LLVM: @llvm.masked.load.v8f32.p0(ptr align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) - - // OGCG-LABEL: @test_mm256_mask_loadu_ps - // OGCG: @llvm.masked.load.v8f32.p0(ptr align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}}) - return _mm256_mask_loadu_ps(__W, __U, __P); -} - _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
