https://github.com/HendrikHuebner updated https://github.com/llvm/llvm-project/pull/168051
From cd49cede3451509f64401448f4faa5c6781512c4 Mon Sep 17 00:00:00 2001 From: hhuebner <[email protected]> Date: Fri, 19 Dec 2025 22:23:57 +0100 Subject: [PATCH] Prefetch builtin --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 2 +- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 58 ++++++++++------ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 68 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 5 +- .../test/CIR/CodeGen/X86/prefetchw-builtin.c | 36 ++++++++++ .../CIR/CodeGenBuiltins/X86/sse-builtins.c | 30 ++++++++ 6 files changed, 173 insertions(+), 26 deletions(-) create mode 100644 clang/test/CIR/CodeGen/X86/prefetchw-builtin.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 7e7424fd71878..021021c6ad0e2 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4858,7 +4858,7 @@ def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> { When the `min` attribute is present, the operation returns the minimum guaranteed accessible size. When absent (max mode), it returns the maximum possible object size. Corresponds to `llvm.objectsize`'s `min` argument. - + The `dynamic` attribute determines if the value should be evaluated at runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument. diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index b737c9e618d62..a2b5122c43fc0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -1357,6 +1357,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, } // Now see if we can emit a target-specific builtin. +<<<<<<< HEAD // FIXME: This is a temporary mechanism (double-optional semantics) that will // go away once everything is implemented: // 1. return `mlir::Value{}` for cases where we have issued the diagnostic. @@ -1369,25 +1370,28 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, // even if it executes successfully. if (!v) return RValue::get(nullptr); +======= + RValue value = emitTargetBuiltinExpr(builtinID, e, returnValue); +>>>>>>> 58af6247576f (feedback) - switch (evalKind) { - case cir::TEK_Scalar: - if (mlir::isa<cir::VoidType>(v.getType())) - return RValue::get(nullptr); - return RValue::get(v); - case cir::TEK_Aggregate: - cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin"); - return getUndefRValue(e->getType()); - case cir::TEK_Complex: - llvm_unreachable("No current target builtin returns complex"); - } - llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + if (value.isScalar()) { + if (!value.getValue() || + mlir::isa<cir::VoidType>(value.getValue().getType())) + return RValue::getIgnored(); + + return value; } - cgm.errorNYI(e->getSourceRange(), - std::string("unimplemented builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return getUndefRValue(e->getType()); + if (value.isAggregate()) { + cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin"); + return getUndefRValue(e->getType()); + } + + if (value.isComplex()) { + llvm_unreachable("No current target builtin returns complex"); + } + + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } static std::optional<mlir::Value> @@ -1448,18 +1452,28 @@ emitTargetArchBuiltinExpr(CIRGenFunction *cgf, unsigned builtinID, } } -std::optional<mlir::Value> -CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e, - ReturnValueSlot &returnValue) { +RValue CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, + const CallExpr *e, + ReturnValueSlot &returnValue) { + std::optional<mlir::Value> valueOpt; if (getContext().BuiltinInfo.isAuxBuiltinID(builtinID)) { assert(getContext().getAuxTargetInfo() && "Missing aux target info"); - return emitTargetArchBuiltinExpr( + valueOpt = emitTargetArchBuiltinExpr( this, getContext().BuiltinInfo.getAuxBuiltinID(builtinID), e, returnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); + } else { + valueOpt = emitTargetArchBuiltinExpr(this, builtinID, e, returnValue, + getTarget().getTriple().getArch()); + } + + if (!valueOpt) { + cgm.errorNYI(e->getSourceRange(), + std::string("unimplemented builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return getUndefRValue(e->getType()); } - return emitTargetArchBuiltinExpr(this, builtinID, e, returnValue, - getTarget().getTriple().getArch()); + return RValue::get(*valueOpt); } mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg( diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 1c87e945de846..2f022cf8784dc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -24,13 +24,22 @@ #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" +<<<<<<< HEAD #include "llvm/ADT/Sequence.h" #include "llvm/Support/ErrorHandling.h" #include <string> +======= +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +>>>>>>> 0c79f8deed47 (format) using namespace clang; using namespace clang::CIRGen; +/// Get integer from a mlir::Value that is an int constant or a constant op. +static int64_t getIntValueFromConstOp(mlir::Value val) { + return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue(); +} + template <typename... Operands> static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, mlir::Location loc, const StringRef str, @@ -42,6 +51,35 @@ static mlir::Value emitIntrinsicCallOp(CIRGenBuilderTy &builder, .getResult(); } +<<<<<<< HEAD +======= +static mlir::Value emitPrefetch(CIRGenFunction &cgf, unsigned builtinID, + const CallExpr *e, + const SmallVector<mlir::Value> &ops) { + CIRGenBuilderTy &builder = cgf.getBuilder(); + mlir::Location location = cgf.getLoc(e->getExprLoc()); + mlir::Type voidTy = builder.getVoidTy(); + mlir::Value address = builder.createPtrBitcast(ops[0], voidTy); + bool isWrite{}; + int locality{}; + + assert(builtinID == X86::BI_mm_prefetch || builtinID == X86::BI_m_prefetchw || + builtinID == X86::BI_m_prefetch && "Expected prefetch builtin"); + + if (builtinID == X86::BI_mm_prefetch) { + int hint = getIntValueFromConstOp(ops[1]); + isWrite = (hint >> 2) & 0x1; + locality = hint & 0x3; + } else { + isWrite = (builtinID == X86::BI_m_prefetchw); + locality = 0x3; + } + + cir::PrefetchOp::create(builder, location, address, locality, isWrite); + return mlir::LLVM::UndefOp::create(builder, location, voidTy); +} + +>>>>>>> 0c79f8deed47 (format) // OG has unordered comparison as a form of optimization in addition to // ordered comparison, while CIR doesn't. // @@ -158,6 +196,33 @@ computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec, outIndices.resize(numElts); } + +static mlir::Value emitPrefetch(CIRGenFunction &cgf, unsigned builtinID, + const CallExpr *e, + const SmallVector<mlir::Value> &ops) { + CIRGenBuilderTy &builder = cgf.getBuilder(); + mlir::Location location = cgf.getLoc(e->getExprLoc()); + mlir::Type voidTy = builder.getVoidTy(); + mlir::Value address = builder.createPtrBitcast(ops[0], voidTy); + bool isWrite{}; + int locality{}; + + assert(builtinID == X86::BI_mm_prefetch || builtinID == X86::BI_m_prefetchw || + builtinID == X86::BI_m_prefetch && "Expected prefetch builtin"); + + if (builtinID == X86::BI_mm_prefetch) { + int hint = getIntValueFromConstOp(ops[1]); + isWrite = (hint >> 2) & 0x1; + locality = hint & 0x3; + } else { + isWrite = (builtinID == X86::BI_m_prefetchw); + locality = 0x3; + } + + cir::PrefetchOp::create(builder, location, address, locality, isWrite); + return {}; +} + static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value source, mlir::Value mask, @@ -558,6 +623,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), "x86.sse.sfence", voidTy); case X86::BI_mm_prefetch: + case X86::BI_m_prefetch: + case X86::BI_m_prefetchw: + return emitPrefetch(*this, builtinID, expr, ops); case X86::BI__rdtsc: case X86::BI__builtin_ia32_rdtscp: { cgm.errorNYI(expr->getSourceRange(), diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index faba6878a9707..13821a32c74ab 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1843,9 +1843,8 @@ class CIRGenFunction : public CIRGenTypeCache { bool buildingTopLevelCase); mlir::LogicalResult emitSwitchStmt(const clang::SwitchStmt &s); - std::optional<mlir::Value> - emitTargetBuiltinExpr(unsigned builtinID, const clang::CallExpr *e, - ReturnValueSlot &returnValue); + RValue emitTargetBuiltinExpr(unsigned builtinID, const clang::CallExpr *e, + ReturnValueSlot &returnValue); /// Given a value and its clang type, returns the value casted to its memory /// representation. diff --git a/clang/test/CIR/CodeGen/X86/prefetchw-builtin.c b/clang/test/CIR/CodeGen/X86/prefetchw-builtin.c new file mode 100644 index 0000000000000..7d7ce348b8d88 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/prefetchw-builtin.c @@ -0,0 +1,36 @@ + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG + + +#include <x86intrin.h> + +void test_m_prefetch_w(void *p) { + // CIR-LABEL: test_m_prefetch_w + // LLVM-LABEL: test_m_prefetch_w + // OGCG-LABEL: test_m_prefetch_w + return _m_prefetchw(p); + // CIR: cir.prefetch write locality(3) %{{.*}} : !cir.ptr<!void> + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) +} + +void test_m_prefetch(void *p) { + // CIR-LABEL: test_m_prefetch + // LLVM-LABEL: test_m_prefetch + // OGCG-LABEL: test_m_prefetch + return _m_prefetch(p); + // CIR: cir.prefetch read locality(3) %{{.*}} : !cir.ptr<!void> + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c index db52021d1aa9f..9d01203aefc7a 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c @@ -56,6 +56,36 @@ void test_mm_sfence(void) { // OGCG: call void @llvm.x86.sse.sfence() } +void test_mm_prefetch(char const* p) { + // CIR-LABEL: test_mm_prefetch + // LLVM-LABEL: test_mm_prefetch + // OGCG-LABEL: test_mm_prefetch + _mm_prefetch(p, 0); + // CIR: cir.prefetch read locality(0) %{{.*}} : !cir.ptr<!void> + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1) +} + +void test_mm_prefetch_local(char const* p) { + // CIR-LABEL: test_mm_prefetch_local + // LLVM-LABEL: test_mm_prefetch_local + // OGCG-LABEL: test_mm_prefetch_local + _mm_prefetch(p, 3); + // CIR: cir.prefetch read locality(3) %{{.*}} : !cir.ptr<!void> + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +} + +void test_mm_prefetch_write(char const* p) { + // CIR-LABEL: test_mm_prefetch_write + // LLVM-LABEL: test_mm_prefetch_write + // OGCG-LABEL: test_mm_prefetch_write + _mm_prefetch(p, 7); + // CIR: cir.prefetch write locality(3) %{{.*}} : !cir.ptr<!void> + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) +} + __m128 test_mm_undefined_ps(void) { // CIR-LABEL: _mm_undefined_ps // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.double> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
