Author: Daniele Castagna Date: 2023-01-19T15:02:14-08:00 New Revision: 32c26e27b6fcd12703dcd00adf178330d0ad8449
URL: https://github.com/llvm/llvm-project/commit/32c26e27b6fcd12703dcd00adf178330d0ad8449 DIFF: https://github.com/llvm/llvm-project/commit/32c26e27b6fcd12703dcd00adf178330d0ad8449.diff LOG: CUDA/HIP: Use kernel name to map to symbol Currently CGCUDANV uses an llvm::Function as a key to map kernels to a symbol in host code. HIP adds one level of indirection and uses the llvm::Function to map to a global variable that will be initialized to the kernel stub ptr. Unfortunately there is no garantee that the llvm::Function created by GetOrCreateLLVMFunction will be the same. In fact, the first time we encounter GetOrCrateLLVMFunction for a kernel, the type might not be completed yet, and the type of llvm::Function will be a generic {}, since the complete type is not required to get a symbol to a function. In this case we end up creating two global variables, one for the llvm::Function with the incomplete type and one for the function with the complete type. The first global variable will be declared by not defined, resulting in a linking error. This change uses the mangled name of the llvm::Function as key in the KernelHandles map, in this way the same llvm::Function will be associated to the same kernel handle even if they types are different. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D140663 Added: clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu Modified: clang/lib/CodeGen/CGCUDANV.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index abf320996dc4d..bb887df3e4e04 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -49,10 +49,10 @@ class CGNVCUDARuntime : public CGCUDARuntime { const Decl *D; }; llvm::SmallVector<KernelInfo, 16> EmittedKernels; - // Map a device stub function to a symbol for identifying kernel in host code. + // Map a kernel mangled name to a symbol for identifying kernel in host code // For CUDA, the symbol for identifying the kernel is the same as the device // stub function. For HIP, they are diff erent. - llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles; + llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles; // Map a kernel handle to the kernel stub. llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs; struct VarInfo { @@ -310,7 +310,8 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); - if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) { + if (auto *GV = + dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn->getName()])) { GV->setLinkage(CGF.CurFn->getLinkage()); GV->setInitializer(CGF.CurFn); } @@ -400,8 +401,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch - llvm::Value *Kernel = - CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy); + llvm::Value *Kernel = CGF.Builder.CreatePointerCast( + KernelHandles[CGF.CurFn->getName()], VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); @@ -456,8 +457,8 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, // Emit the call to cudaLaunch llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); - llvm::Value *Arg = - CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy); + llvm::Value *Arg = CGF.Builder.CreatePointerCast( + KernelHandles[CGF.CurFn->getName()], CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -551,7 +552,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { &GpuBinaryHandlePtr, - Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy), + Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy), KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), @@ -1130,7 +1131,7 @@ void CGNVCUDARuntime::createOffloadingEntries() { StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" : "cuda_offloading_entries"; for (KernelInfo &I : EmittedKernels) - OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel], + OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()], getDeviceSideName(cast<NamedDecl>(I.D)), 0, DeviceVarFlags::OffloadGlobalEntry, Section); @@ -1193,12 +1194,12 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, GlobalDecl GD) { - auto Loc = KernelHandles.find(F); + auto Loc = KernelHandles.find(F->getName()); if (Loc != KernelHandles.end()) return Loc->second; if (!CGM.getLangOpts().HIP) { - KernelHandles[F] = F; + KernelHandles[F->getName()] = F; KernelStubs[F] = F; return F; } @@ -1212,7 +1213,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, Var->setDSOLocal(F->isDSOLocal()); Var->setVisibility(F->getVisibility()); CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var); - KernelHandles[F] = Var; + KernelHandles[F->getName()] = Var; KernelStubs[Var] = F; return Var; } diff --git a/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu new file mode 100644 index 0000000000000..bd1da1f05c1eb --- /dev/null +++ b/clang/test/CodeGenCUDA/incomplete-func-ptr-type.cu @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - \ +// RUN: | FileCheck %s + +#define __global__ __attribute__((global)) +// CHECK: @_Z4kern7TempValIjE = constant ptr @_Z19__device_stub__kern7TempValIjE, align 8 +// CHECK: @0 = private unnamed_addr constant [19 x i8] c"_Z4kern7TempValIjE\00", align 1 +template <typename type> +struct TempVal { + type value; +}; + +__global__ void kern(TempVal<unsigned int> in_val); + +int main(int argc, char ** argv) { + auto* fptr = &(kern); +// CHECK: store ptr @_Z4kern7TempValIjE, ptr %fptr, align 8 + return 0; +} +// CHECK: define dso_local void @_Z19__device_stub__kern7TempValIjE(i32 %in_val.coerce) #1 { +// CHECK: %2 = call i32 @hipLaunchByPtr(ptr @_Z4kern7TempValIjE) + +// CHECK: define internal void @__hip_register_globals(ptr %0) { +// CHECK: %1 = call i32 @__hipRegisterFunction(ptr %0, ptr @_Z4kern7TempValIjE, ptr @0, ptr @0, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) + +__global__ void kern(TempVal<unsigned int> in_val) { +} + _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits