https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/129347
>From adfb9c08cf1d62cd2c6596520e65ef9f299d3cdc Mon Sep 17 00:00:00 2001 From: Kevin Choi <kevin.c...@amd.com> Date: Fri, 28 Feb 2025 16:52:03 -0600 Subject: [PATCH] [AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn. --- clang/lib/CodeGen/CGBuiltin.cpp | 43 ++++++++++++++++++++++++++- clang/lib/CodeGen/CodeGenModule.h | 5 ++++ clang/test/CodeGenHIP/logb_scalbn.hip | 42 ++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenHIP/logb_scalbn.hip diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a5ed2595bad4d..5957fec364ae4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6621,10 +6621,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } } + // These will be emitted as Intrinsic later. + auto NeedsDeviceOverloadToIntrin = [&](unsigned BuiltinID) { + if (getTarget().getTriple().isAMDGCN()) { + switch (BuiltinID) { + default: + return false; + case Builtin::BIlogb: + case Builtin::BI__builtin_logb: + case Builtin::BIscalbn: + case Builtin::BI__builtin_scalbn: + return true; + } + } + return false; + }; + // If this is an alias for a lib function (e.g. __builtin_sin), emit // the call using the normal call path, but using the unmangled // version of the function name. - if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) + if (!NeedsDeviceOverloadToIntrin(BuiltinID) && + getContext().BuiltinInfo.isLibFunction(BuiltinID)) return emitLibraryCall(*this, FD, E, CGM.getBuiltinLibFunction(FD, BuiltinID)); @@ -20910,6 +20927,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_s_prefetch_data: return emitBuiltinWithOneOverloadedType<2>( *this, E, Intrinsic::amdgcn_s_prefetch_data); + case Builtin::BIlogb: + case Builtin::BI__builtin_logb: { + auto Src0 = EmitScalarExpr(E->getArg(0)); + auto FrExpFunc = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + {Builder.getInt32Ty(), Src0->getType()}); + auto FrExp = Builder.CreateCall(FrExpFunc, Src0); + auto Add = Builder.CreateAdd( + FrExp, ConstantInt::getSigned(FrExp->getType(), -1), "", false, true); + auto SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy()); + auto Fabs = emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs); + auto FCmpONE = Builder.CreateFCmpONE( + Fabs, ConstantFP::getInfinity(Builder.getDoubleTy())); + auto Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs); + auto FCmpOEQ = + Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy())); + auto Sel2 = Builder.CreateSelect( + FCmpOEQ, ConstantFP::getInfinity(Builder.getDoubleTy(), /*Neg*/ true), + Sel1); + return Sel2; + } + case Builtin::BIscalbn: + case Builtin::BI__builtin_scalbn: + return emitBinaryExpMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp); default: return nullptr; } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 6deb467b2cc9f..ae1441dc3d009 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1231,6 +1231,11 @@ class CodeGenModule : public CodeGenTypeCache { llvm::FunctionType *FnType = nullptr, bool DontDefer = false, ForDefinition_t IsForDefinition = NotForDefinition); + /// Given a builtin id for a function, return a Function* for device + /// overload implementation. + llvm::Constant *getDeviceLibFunction(const FunctionDecl *FD, + unsigned BuiltinID); + /// Given a builtin id for a function like "__builtin_fabsf", return a /// Function* for "fabsf". llvm::Constant *getBuiltinLibFunction(const FunctionDecl *FD, diff --git a/clang/test/CodeGenHIP/logb_scalbn.hip b/clang/test/CodeGenHIP/logb_scalbn.hip new file mode 100644 index 0000000000000..935941bb25939 --- /dev/null +++ b/clang/test/CodeGenHIP/logb_scalbn.hip @@ -0,0 +1,42 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang --cuda-device-only -nogpuinc -nogpulib -emit-llvm -S -o - %s | FileCheck %s +#include <math.h> +#define __device__ __attribute__((device)) + +// CHECK-LABEL: define hidden void @_Z9my_kernelv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D3:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D4:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr +// CHECK-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr +// CHECK-NEXT: [[D3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D3]] to ptr +// CHECK-NEXT: [[D4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D4]] to ptr +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double 1.600000e+01) +// CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1 +// CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK-NEXT: [[TMP3:%.*]] = call contract double @llvm.fabs.f64(double 1.600000e+01) +// CHECK-NEXT: [[TMP4:%.*]] = fcmp contract one double [[TMP3]], 0x7FF0000000000000 +// CHECK-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP4]], double [[TMP2]], double [[TMP3]] +// CHECK-NEXT: [[TMP6:%.*]] = select contract i1 false, double 0xFFF0000000000000, double [[TMP5]] +// CHECK-NEXT: [[CONV:%.*]] = fptrunc contract double [[TMP6]] to float +// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call contract noundef float @_ZSt4logbf(float noundef 1.600000e+01) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: store float [[CALL]], ptr [[D2_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = call contract double @llvm.ldexp.f64.i32(double 1.600000e+01, i32 10) +// CHECK-NEXT: [[CONV1:%.*]] = fptrunc contract double [[TMP7]] to float +// CHECK-NEXT: store float [[CONV1]], ptr [[D3_ASCAST]], align 4 +// CHECK-NEXT: [[CALL2:%.*]] = call contract noundef float @_ZSt6scalbnfi(float noundef 9.000000e+00, i32 noundef 4) #[[ATTR3]] +// CHECK-NEXT: store float [[CALL2]], ptr [[D4_ASCAST]], align 4 +// CHECK-NEXT: ret void +// +__device__ void my_kernel(){ + + float D1 = __builtin_logb((float)16); + float D2 = logb((float)16); + float D3 = __builtin_scalbn((float)16, 10); + float D4 = scalbn((float)9.0, 4); +} + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits