r335763 - [CUDA] Use atexit() to call module destructor.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 27 11:32:51 PDT 2018


Author: tra
Date: Wed Jun 27 11:32:51 2018
New Revision: 335763

URL: http://llvm.org/viewvc/llvm-project?rev=335763&view=rev
Log:
[CUDA] Use atexit() to call module destructor.

This matches the way NVCC does it. Doing module cleanup at global
destructor phase used to work, but is, apparently, too late for
the CUDA runtime in CUDA-9.2, which ends up crashing with double-free.

Differential Revision: https://reviews.llvm.org/D48613

Modified:
    cfe/trunk/lib/CodeGen/CGCUDANV.cpp
    cfe/trunk/lib/CodeGen/CodeGenModule.cpp
    cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=335763&r1=335762&r2=335763&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Wed Jun 27 11:32:51 2018
@@ -472,6 +472,19 @@ llvm::Function *CGNVCUDARuntime::makeMod
     CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
   }
 
+  // Create destructor and register it with atexit() the way NVCC does it. Doing
+  // it during regular destructor phase worked in CUDA before 9.2 but results in
+  // double-free in 9.2.
+  if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
+    // extern "C" int atexit(void (*f)(void));
+    llvm::FunctionType *AtExitTy =
+        llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
+    llvm::Constant *AtExitFunc =
+        CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
+                                  /*Local=*/true);
+    CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
+  }
+
   CtorBuilder.CreateRetVoid();
   return ModuleCtorFunc;
 }

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=335763&r1=335762&r2=335763&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Wed Jun 27 11:32:51 2018
@@ -404,10 +404,9 @@ void CodeGenModule::Release() {
       AddGlobalCtor(ObjCInitFunction);
   if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice &&
       CUDARuntime) {
-    if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction())
+    if (llvm::Function *CudaCtorFunction =
+            CUDARuntime->makeModuleCtorFunction())
       AddGlobalCtor(CudaCtorFunction);
-    if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction())
-      AddGlobalDtor(CudaDtorFunction);
   }
   if (OpenMPRuntime) {
     if (llvm::Function *OpenMPRegistrationFunction =

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=335763&r1=335762&r2=335763&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Wed Jun 27 11:32:51 2018
@@ -86,8 +86,6 @@ void use_pointers() {
 // HIPRDC-SAME: c"[[MODULE_ID:.+]]\00", section "__hip_module_id", align 32
 // * Make sure our constructor was added to global ctor list.
 // ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
-// * In separate mode we also register a destructor.
-// NORDC: @llvm.global_dtors = appending global {{.*}}@__[[PREFIX]]_module_dtor
 // * Alias to global symbol containing the NVModuleID.
 // RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8*, i8* }
 // RDC-SAME: { i32, i32, i8*, i8* }* @__[[PREFIX]]_fatbin_wrapper
@@ -127,6 +125,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>
 // NORDC-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
 //   .. and then calls __[[PREFIX]]_register_globals
 // NORDC-NEXT: call void @__[[PREFIX]]_register_globals
+// * In separate mode we also register a destructor.
+// NORDC-NEXT: call i32 @atexit(void (i8*)* @__[[PREFIX]]_module_dtor)
 
 // With relocatable device code we call __[[PREFIX]]RegisterLinkedBinary%NVModuleID%
 // RDC: call{{.*}}__[[PREFIX]]RegisterLinkedBinary[[MODULE_ID]](




More information about the cfe-commits mailing list