tra created this revision. tra added reviewers: jlebar, timshen. Herald added subscribers: bixia, sanjoy.
There's apparently a race between fatbin destructors registered by us and some internal calls registered by CUDA runtime from cudaRegisterFatbin. Moving fatbin de-registration to atexit() was not sufficient to avoid crash in CUDA runtime on exit when the runtime was linked statically, but CUDA kernel was launched from a shared library. Moving atexit() call to before we call cudaRegisterFatbin appears to work with both statically and dynamically linked CUDA TUs. https://reviews.llvm.org/D49763 Files: clang/lib/CodeGen/CGCUDANV.cpp Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -375,6 +375,19 @@ CtorBuilder.SetInsertPoint(CtorEntryBB); + // Create destructor and register it with atexit() the way NVCC does it. Doing + // it during regular destructor phase worked in CUDA before 9.2 but results in + // double-free in 9.2. + if (llvm::Function *CleanupFn = makeModuleDtorFunction()) { + // extern "C" int atexit(void (*f)(void)); + llvm::FunctionType *AtExitTy = + llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); + llvm::Constant *AtExitFunc = + CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), + /*Local=*/true); + CtorBuilder.CreateCall(AtExitFunc, CleanupFn); + } + const char *FatbinConstantName; const char *FatbinSectionName; const char *ModuleIDSectionName; @@ -530,19 +543,6 @@ CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args); } - // Create destructor and register it with atexit() the way NVCC does it. Doing - // it during regular destructor phase worked in CUDA before 9.2 but results in - // double-free in 9.2. - if (llvm::Function *CleanupFn = makeModuleDtorFunction()) { - // extern "C" int atexit(void (*f)(void)); - llvm::FunctionType *AtExitTy = - llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); - llvm::Constant *AtExitFunc = - CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), - /*Local=*/true); - CtorBuilder.CreateCall(AtExitFunc, CleanupFn); - } - CtorBuilder.CreateRetVoid(); return ModuleCtorFunc; }
Index: clang/lib/CodeGen/CGCUDANV.cpp =================================================================== --- clang/lib/CodeGen/CGCUDANV.cpp +++ clang/lib/CodeGen/CGCUDANV.cpp @@ -375,6 +375,19 @@ CtorBuilder.SetInsertPoint(CtorEntryBB); + // Create destructor and register it with atexit() the way NVCC does it. Doing + // it during regular destructor phase worked in CUDA before 9.2 but results in + // double-free in 9.2. + if (llvm::Function *CleanupFn = makeModuleDtorFunction()) { + // extern "C" int atexit(void (*f)(void)); + llvm::FunctionType *AtExitTy = + llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); + llvm::Constant *AtExitFunc = + CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), + /*Local=*/true); + CtorBuilder.CreateCall(AtExitFunc, CleanupFn); + } + const char *FatbinConstantName; const char *FatbinSectionName; const char *ModuleIDSectionName; @@ -530,19 +543,6 @@ CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args); } - // Create destructor and register it with atexit() the way NVCC does it. Doing - // it during regular destructor phase worked in CUDA before 9.2 but results in - // double-free in 9.2. - if (llvm::Function *CleanupFn = makeModuleDtorFunction()) { - // extern "C" int atexit(void (*f)(void)); - llvm::FunctionType *AtExitTy = - llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); - llvm::Constant *AtExitFunc = - CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), - /*Local=*/true); - CtorBuilder.CreateCall(AtExitFunc, CleanupFn); - } - CtorBuilder.CreateRetVoid(); return ModuleCtorFunc; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits