r357526 - [HIP-Clang] Fat binary should not be produced for non GPU code 2
Aaron Enye Shi via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 2 13:49:41 PDT 2019
Author: aaronenyeshi
Date: Tue Apr 2 13:49:41 2019
New Revision: 357526
URL: http://llvm.org/viewvc/llvm-project?rev=357526&view=rev
Log:
[HIP-Clang] Fat binary should not be produced for non GPU code 2
Also for CUDA, we need to disable producing these fat binary functions when there is no GPU code.
Reviewers: yaxunl, tra
Differential Revision: https://reviews.llvm.org/D60141
Modified:
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu
Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=357526&r1=357525&r2=357526&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Tue Apr 2 13:49:41 2019
@@ -468,11 +468,13 @@ llvm::Function *CGNVCUDARuntime::makeReg
/// \endcode
llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
bool IsHIP = CGM.getLangOpts().HIP;
+ bool IsCUDA = CGM.getLangOpts().CUDA;
// No need to generate ctors/dtors if there is no GPU binary.
StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
if (CudaGpuBinaryFileName.empty() && !IsHIP)
return nullptr;
- if (IsHIP && EmittedKernels.empty() && DeviceVars.empty())
+ if ( (IsHIP || (IsCUDA && !RelocatableDeviceCode) )
+ && EmittedKernels.empty() && DeviceVars.empty())
return nullptr;
// void __{cuda|hip}_register_globals(void* handle);
Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=357526&r1=357525&r2=357526&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Tue Apr 2 13:49:41 2019
@@ -227,20 +227,14 @@ void hostfunc(void) { kernelfunc<<<1, 1>
// There should be no __[[PREFIX]]_register_globals if we have no
// device-side globals, but we still need to register GPU binary.
// Skip GPU binary string first.
-// CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
+// CUDANOGLOBALS-NOT: @{{.*}} = private constant{{.*}}
// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}}
// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
-// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
-// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+// NOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
+// NOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
-// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
-// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
-
-// There should be no fat binary functions when no device-code is found for HIP.
-// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
-// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
-// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor
-// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary
+// NOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor
+// NOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary
// There should be no constructors/destructors if we have no GPU binary.
// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
More information about the cfe-commits
mailing list