https://github.com/choikwa created https://github.com/llvm/llvm-project/pull/129347
…__builtin_scalbn Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a call to device implementation for __builtin_logb and an ldexp intrinsic for __builtin_scalbn. >From 86deca6179c6dce4490e859ffd536f553d8d6c0d Mon Sep 17 00:00:00 2001 From: Kevin Choi <kevin.c...@amd.com> Date: Fri, 28 Feb 2025 16:52:03 -0600 Subject: [PATCH] [AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a call to device implementation for __builtin_logb and an ldexp intrinsic for __builtin_scalbn. --- clang/lib/CodeGen/CGBuiltin.cpp | 49 ++++++++++++++++++++++++++++++- clang/lib/CodeGen/CodeGenModule.h | 5 ++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 03b8d16b76e0d..6a0497df7acfb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -259,6 +259,26 @@ Value *readX18AsPtr(CodeGenFunction &CGF) { return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy); } +llvm::Constant *CodeGenModule::getDeviceLibFunction(const FunctionDecl *FD, + unsigned BuiltinID) { + GlobalDecl D(FD); + llvm::SmallString<64> Name; + if (getTarget().getTriple().isAMDGCN()) { + switch (BuiltinID) { + default: return nullptr; + case Builtin::BIlogb: + case Builtin::BI__builtin_logb: + Name = "__ocml_logb_f64"; + } + } + if (Name.empty()) + return nullptr; + + llvm::FunctionType *Ty = + cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); + return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable*/false); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -6579,10 +6599,32 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } + // Some targets like GPUs do not support library call and must provide + // device overload implementation. + if (getTarget().getTriple().isAMDGCN()) + // Emit library call to device-lib implementation + if (auto *DevLibFunc = CGM.getDeviceLibFunction(FD, BuiltinID)) + return emitLibraryCall(*this, FD, E, DevLibFunc); + + // These will be emitted as Intrinsic later. + auto NeedsDeviceOverloadToIntrin = [&](unsigned BuiltinID) { + if (getTarget().getTriple().isAMDGCN()) { + switch (BuiltinID) { + default: + return false; + case Builtin::BIscalbn: + case Builtin::BI__builtin_scalbn: + return true; + } + } + return false; + }; + // If this is an alias for a lib function (e.g. __builtin_sin), emit // the call using the normal call path, but using the unmangled // version of the function name. - if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) + if (!NeedsDeviceOverloadToIntrin(BuiltinID) && + getContext().BuiltinInfo.isLibFunction(BuiltinID)) return emitLibraryCall(*this, FD, E, CGM.getBuiltinLibFunction(FD, BuiltinID)); @@ -20804,6 +20846,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: return emitBuiltinWithOneOverloadedType<2>( *this, E, Intrinsic::amdgcn_s_prefetch_data); + case Builtin::BIscalbn: + case Builtin::BI__builtin_scalbn: + return emitBinaryExpMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::ldexp, + Intrinsic::experimental_constrained_ldexp); default: return nullptr; } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 4a269f622ece4..890dc8556cc1c 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1231,6 +1231,11 @@ class CodeGenModule : public CodeGenTypeCache { llvm::FunctionType *FnType = nullptr, bool DontDefer = false, ForDefinition_t IsForDefinition = NotForDefinition); + /// Given a builtin id for a function, return a Function* for device + /// overload implementation. + llvm::Constant *getDeviceLibFunction(const FunctionDecl *FD, + unsigned BuiltinID); + /// Given a builtin id for a function like "__builtin_fabsf", return a /// Function* for "fabsf". llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits