[PATCH] D17780: [CUDA] Do not generate unnecessary runtime init code.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 1 13:28:40 PST 2016
tra created this revision.
tra added reviewers: jlebar, jingyue.
tra added a subscriber: cfe-commits.
Do not generate runtime init code if we don't have anything to init.
http://reviews.llvm.org/D17780
Files:
lib/CodeGen/CGCUDANV.cpp
test/CodeGenCUDA/device-stub.cu
Index: test/CodeGenCUDA/device-stub.cu
===================================================================
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -1,7 +1,11 @@
// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - -DNOGLOBALS \
+// RUN: | FileCheck %s -check-prefix=NOGLOBALS
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
#include "Inputs/cuda.h"
+#ifndef NOGLOBALS
// CHECK-DAG: @device_var = internal global i32
__device__ int device_var;
@@ -65,6 +69,7 @@
// CHECK: call{{.*}}cudaConfigureCall
// CHECK: call{{.*}}kernelfunc
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
+#endif
// Test that we've built a function to register kernels and global vars.
// CHECK: define internal void @__cuda_register_globals
@@ -89,3 +94,18 @@
// CHECK: load{{.*}}__cuda_gpubin_handle
// CHECK-NEXT: call void @__cudaUnregisterFatBinary
+// There should be no __cuda_register_globals if we have no
+// device-side globals, but we still need to register GPU binary.
+// Skip GPU binary string first.
+// NOGLOBALS: @0 = private unnamed_addr constant{{.*}}
+// NOGLOBALS-NOT: define internal void @__cuda_register_globals
+// NOGLOBALS: define internal void @__cuda_module_ctor
+// NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
+// NOGLOBALS-NOT: call void @__cuda_register_globals
+// NOGLOBALS: define internal void @__cuda_module_dtor
+// NOGLOBALS: call void @__cudaUnregisterFatBinary
+
+// There should be no constructors/destructors if we have no GPU binary.
+// NOGPUBIN-NOT: define internal void @__cuda_register_globals
+// NOGPUBIN-NOT: define internal void @__cuda_module_ctor
+// NOGPUBIN-NOT: define internal void @__cuda_module_dtor
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -177,6 +177,10 @@
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
+ // No need to register anything
+ if (EmittedKernels.empty() && DeviceVars.empty())
+ return nullptr;
+
llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
@@ -246,6 +250,10 @@
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
+ // No need to generate ctors/dtors if there are no GPU binaries.
+ if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+ return nullptr;
+
// void __cuda_register_globals(void* handle);
llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
// void ** __cudaRegisterFatBinary(void *);
@@ -303,8 +311,9 @@
CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
CGM.getPointerAlign());
- // Call __cuda_register_globalss(GpuBinaryHandle);
- CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
+ // Call __cuda_register_globals(GpuBinaryHandle);
+ if (RegisterGlobalsFunc)
+ CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
// Save GpuBinaryHandle so we can unregister it in destructor.
GpuBinaryHandles.push_back(GpuBinaryHandle);
@@ -324,6 +333,10 @@
/// }
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
+ // No need for destructor if we don't have handles to unregister.
+ if (GpuBinaryHandles.empty())
+ return nullptr;
+
// void __cudaUnregisterFatBinary(void ** handle);
llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D17780.49539.patch
Type: text/x-patch
Size: 3817 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160301/32e2a342/attachment.bin>
More information about the cfe-commits
mailing list