r262499 - [CUDA] Do not generate unnecessary runtime init code.

Artem Belevich via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 2 10:28:53 PST 2016


Author: tra
Date: Wed Mar  2 12:28:53 2016
New Revision: 262499

URL: http://llvm.org/viewvc/llvm-project?rev=262499&view=rev
Log:
[CUDA] Do not generate unnecessary runtime init code.

Differential Revision: http://reviews.llvm.org/D17780

Modified:
    cfe/trunk/lib/CodeGen/CGCUDANV.cpp
    cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=262499&r1=262498&r2=262499&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Wed Mar  2 12:28:53 2016
@@ -178,6 +178,10 @@ void CGNVCUDARuntime::emitDeviceStubBody
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
+  // No need to register anything
+  if (EmittedKernels.empty() && DeviceVars.empty())
+    return nullptr;
+
   llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
       llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
@@ -251,6 +255,10 @@ llvm::Function *CGNVCUDARuntime::makeReg
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
+  // No need to generate ctors/dtors if there are no GPU binaries.
+  if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+    return nullptr;
+
   // void __cuda_register_globals(void* handle);
   llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
   // void ** __cudaRegisterFatBinary(void *);
@@ -309,7 +317,8 @@ llvm::Function *CGNVCUDARuntime::makeMod
                                    CGM.getPointerAlign());
 
     // Call __cuda_register_globals(GpuBinaryHandle);
-    CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
+    if (RegisterGlobalsFunc)
+      CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
 
     // Save GpuBinaryHandle so we can unregister it in destructor.
     GpuBinaryHandles.push_back(GpuBinaryHandle);
@@ -329,6 +338,10 @@ llvm::Function *CGNVCUDARuntime::makeMod
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
+  // No need for destructor if we don't have handles to unregister.
+  if (GpuBinaryHandles.empty())
+    return nullptr;
+
   // void __cudaUnregisterFatBinary(void ** handle);
   llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=262499&r1=262498&r2=262499&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Wed Mar  2 12:28:53 2016
@@ -1,7 +1,11 @@
 // RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o -  -DNOGLOBALS \
+// RUN:   | FileCheck %s -check-prefix=NOGLOBALS
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
 
 #include "Inputs/cuda.h"
 
+#ifndef NOGLOBALS
 // CHECK-DAG: @device_var = internal global i32
 __device__ int device_var;
 
@@ -65,6 +69,7 @@ __global__ void kernelfunc(int i, int j,
 // CHECK: call{{.*}}cudaConfigureCall
 // CHECK: call{{.*}}kernelfunc
 void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
+#endif
 
 // Test that we've built a function to register kernels and global vars.
 // CHECK: define internal void @__cuda_register_globals
@@ -89,3 +94,18 @@ void hostfunc(void) { kernelfunc<<<1, 1>
 // CHECK: load{{.*}}__cuda_gpubin_handle
 // CHECK-NEXT: call void @__cudaUnregisterFatBinary
 
+// There should be no __cuda_register_globals if we have no
+// device-side globals, but we still need to register GPU binary.
+// Skip GPU binary string first.
+// NOGLOBALS: @0 = private unnamed_addr constant{{.*}}
+// NOGLOBALS-NOT: define internal void @__cuda_register_globals
+// NOGLOBALS: define internal void @__cuda_module_ctor
+// NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
+// NOGLOBALS-NOT: call void @__cuda_register_globals
+// NOGLOBALS: define internal void @__cuda_module_dtor
+// NOGLOBALS: call void @__cudaUnregisterFatBinary
+
+// There should be no constructors/destructors if we have no GPU binary.
+// NOGPUBIN-NOT: define internal void @__cuda_register_globals
+// NOGPUBIN-NOT: define internal void @__cuda_module_ctor
+// NOGPUBIN-NOT: define internal void @__cuda_module_dtor




More information about the cfe-commits mailing list