tra created this revision.
tra added a reviewer: jlebar.
Herald added subscribers: bixia, sanjoy.

This matches the way NVCC does it. Doing module cleanup at global
destructor phase used to work, but is, apparently, too late for
the CUDA runtime in CUDA-9.2, which ends up crashing with double-free.


https://reviews.llvm.org/D48613

Files:
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/CodeGen/CodeGenModule.cpp


Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -404,10 +404,9 @@
       AddGlobalCtor(ObjCInitFunction);
   if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice &&
       CUDARuntime) {
-    if (llvm::Function *CudaCtorFunction = 
CUDARuntime->makeModuleCtorFunction())
+    if (llvm::Function *CudaCtorFunction =
+            CUDARuntime->makeModuleCtorFunction())
       AddGlobalCtor(CudaCtorFunction);
-    if (llvm::Function *CudaDtorFunction = 
CUDARuntime->makeModuleDtorFunction())
-      AddGlobalDtor(CudaDtorFunction);
   }
   if (OpenMPRuntime) {
     if (llvm::Function *OpenMPRegistrationFunction =
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -472,6 +472,18 @@
     CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
   }
 
+  // Create destructor and register it with atexit() the way NVCC does it. 
Doing
+  // it during regular destructor phase worked in CUDA before 9.2 but results 
in
+  // double-free in 9.2.
+  llvm::Function *CleanupFn = makeModuleDtorFunction();
+  // extern "C" int atexit(void (*f)(void));
+  llvm::FunctionType *AtExitTy =
+      llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
+  llvm::Constant *AtExitFunc =
+      CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
+                                /*Local=*/true);
+  CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
+
   CtorBuilder.CreateRetVoid();
   return ModuleCtorFunc;
 }


Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -404,10 +404,9 @@
       AddGlobalCtor(ObjCInitFunction);
   if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice &&
       CUDARuntime) {
-    if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction())
+    if (llvm::Function *CudaCtorFunction =
+            CUDARuntime->makeModuleCtorFunction())
       AddGlobalCtor(CudaCtorFunction);
-    if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction())
-      AddGlobalDtor(CudaDtorFunction);
   }
   if (OpenMPRuntime) {
     if (llvm::Function *OpenMPRegistrationFunction =
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -472,6 +472,18 @@
     CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
   }
 
+  // Create destructor and register it with atexit() the way NVCC does it. Doing
+  // it during regular destructor phase worked in CUDA before 9.2 but results in
+  // double-free in 9.2.
+  llvm::Function *CleanupFn = makeModuleDtorFunction();
+  // extern "C" int atexit(void (*f)(void));
+  llvm::FunctionType *AtExitTy =
+      llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
+  llvm::Constant *AtExitFunc =
+      CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
+                                /*Local=*/true);
+  CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
+
   CtorBuilder.CreateRetVoid();
   return ModuleCtorFunc;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to