yaxunl updated this revision to Diff 154894.
yaxunl marked an inline comment as done.
yaxunl added a comment.
clean up function prefix.
https://reviews.llvm.org/D49083
Files:
lib/CodeGen/CGCUDANV.cpp
test/CodeGenCUDA/device-stub.cu
Index: test/CodeGenCUDA/device-stub.cu
===================================================================
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -19,7 +19,7 @@
// RUN: | FileCheck %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \
-// RUN: | FileCheck %s --check-prefixes=ALL,RDC,HIP,HIPRDC
+// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,HIP
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
// RUN: | FileCheck %s -check-prefix=NOGPUBIN
@@ -79,11 +79,11 @@
// CUDA-SAME: section ".nvFatBinSegment"
// HIP-SAME: section ".hipFatBinSegment"
// * variable to save GPU binary handle after initialization
-// NORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
+// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
+// HIP: @__[[PREFIX]]_gpubin_handle = linkonce global i8** null
// * constant unnamed string with NVModuleID
// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
-// HIPRDC-SAME: c"[[MODULE_ID:.+]]\00", section "__hip_module_id", align 32
// * Make sure our constructor was added to global ctor list.
// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
// * Alias to global symbol containing the NVModuleID.
@@ -120,10 +120,18 @@
// ALL: define internal void @__[[PREFIX]]_module_ctor
// In separate mode it calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
+// HIP only register fat binary once.
+// HIP: load i8**, i8*** @__hip_gpubin_handle
+// HIP-NEXT: icmp eq i8** {{.*}}, null
+// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
+// HIP: if:
// NORDC: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
// .. stores return value in __[[PREFIX]]_gpubin_handle
// NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
// .. and then calls __[[PREFIX]]_register_globals
+// HIP-NEXT: br label %exit
+// HIP: exit:
+// HIP-NEXT: load i8**, i8*** @__hip_gpubin_handle
// NORDC-NEXT: call void @__[[PREFIX]]_register_globals
// * In separate mode we also register a destructor.
// NORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
@@ -136,7 +144,14 @@
// Test that we've created destructor.
// NORDC: define internal void @__[[PREFIX]]_module_dtor
// NORDC: load{{.*}}__[[PREFIX]]_gpubin_handle
-// NORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
+// CUDANORDC-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
+// HIP-NEXT: icmp ne i8** {{.*}}, null
+// HIP-NEXT: br i1 {{.*}}, label %if, label %exit
+// HIP: if:
+// HIP-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
+// HIP-NEXT: store i8** null, i8*** @__hip_gpubin_handle
+// HIP-NEXT: br label %exit
+// HIP: exit:
// There should be no __[[PREFIX]]_register_globals if we have no
// device-side globals, but we still need to register GPU binary.
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -427,22 +427,54 @@
/*constant*/ true);
FatbinWrapper->setSection(FatbinSectionName);
- // Register binary with CUDA/HIP runtime. This is substantially different in
- // default mode vs. separate compilation!
- if (!RelocatableDeviceCode) {
- // GpuBinaryHandle = __{cuda|hip}RegisterFatBinary(&FatbinWrapper);
+ // There is only one HIP fat binary per linked module, however there are
+ // multiple constructor functions. Make sure the fat binary is registered
+ // only once.
+ if (IsHIP) {
+ llvm::BasicBlock *IfBlock =
+ llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
+ llvm::BasicBlock *ExitBlock =
+ llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
+ GpuBinaryHandle = new llvm::GlobalVariable(
+ TheModule, VoidPtrPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::LinkOnceAnyLinkage,
+ /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
+ "__hip_gpubin_handle");
+ auto HandleValue =
+ CtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
+ llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
+ llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
+ CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
+ CtorBuilder.SetInsertPoint(IfBlock);
+ // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
+ llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+ RegisterFatbinFunc,
+ CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+ CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
+ CGM.getPointerAlign());
+ CtorBuilder.CreateBr(ExitBlock);
+ CtorBuilder.SetInsertPoint(ExitBlock);
+ // Call __hip_register_globals(GpuBinaryHandle);
+ if (RegisterGlobalsFunc) {
+ auto HandleValue =
+ CtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
+ CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
+ }
+ } else if (!RelocatableDeviceCode) {
+ // Register binary with CUDA runtime. This is substantially different in
+ // default mode vs. separate compilation!
+ // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
RegisterFatbinFunc,
CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
- llvm::ConstantPointerNull::get(VoidPtrPtrTy),
- addUnderscoredPrefixToName("_gpubin_handle"));
+ llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
- // Call __{cuda|hip}_register_globals(GpuBinaryHandle);
+ // Call __cuda_register_globals(GpuBinaryHandle);
if (RegisterGlobalsFunc)
CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
} else {
@@ -453,15 +485,13 @@
llvm::Constant *ModuleIDConstant =
makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
- // Create an alias for the FatbinWrapper that nvcc or hip backend will
- // look for.
+ // Create an alias for the FatbinWrapper that nvcc will look for.
llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
- // void __{cuda|hip}RegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
+ // void __cudaRegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
// void *, void (*)(void **))
- SmallString<128> RegisterLinkedBinaryName(
- addUnderscoredPrefixToName("RegisterLinkedBinary"));
+ SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
RegisterLinkedBinaryName += ModuleID;
llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
@@ -520,8 +550,26 @@
auto HandleValue =
DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
- DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
-
+ // There is only one HIP fat binary per linked module, however there are
+ // multiple destructor functions. Make sure the fat binary is unregistered
+ // only once.
+ if (CGM.getLangOpts().HIP) {
+ llvm::BasicBlock *IfBlock =
+ llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc);
+ llvm::BasicBlock *ExitBlock =
+ llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc);
+ llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
+ llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
+ DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
+ DtorBuilder.SetInsertPoint(IfBlock);
+ DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
+ DtorBuilder.CreateAlignedStore(Zero, GpuBinaryHandle,
+ CGM.getPointerAlign());
+ DtorBuilder.CreateBr(ExitBlock);
+ DtorBuilder.SetInsertPoint(ExitBlock);
+ } else {
+ DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
+ }
DtorBuilder.CreateRetVoid();
return ModuleDtorFunc;
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits