[PATCH] D11666: [CUDA] Make sure we emit all templated __global__ functions on device side.
Artem Belevich
tra at google.com
Thu Jul 30 15:48:03 PDT 2015
tra created this revision.
tra added reviewers: echristo, eliben.
tra added a subscriber: cfe-commits.
Templated kernels that were instantiated from the host code would normally be eliminated because they were never referenced on device side.
The patch adds __global__ functions to @llvm.used which prevents their elimination.
http://reviews.llvm.org/D11666
Files:
lib/AST/ASTContext.cpp
lib/CodeGen/CodeGenModule.cpp
test/CodeGenCUDA/ptx-kernels.cu
Index: test/CodeGenCUDA/ptx-kernels.cu
===================================================================
--- test/CodeGenCUDA/ptx-kernels.cu
+++ test/CodeGenCUDA/ptx-kernels.cu
@@ -1,7 +1,16 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
#include "Inputs/cuda.h"
+// Make sure that all __global__ functiona are added to @llvm.used
+// CHECK: @llvm.used = appending global
+// CHECK-SAME: @global_function
+// CHECK-SAME: @_Z16templated_kernelIiEvT_
+
// CHECK-LABEL: define void @device_function
extern "C"
__device__ void device_function() {}
@@ -13,4 +22,10 @@
device_function();
}
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -813,6 +813,13 @@
if (D->hasAttr<UsedAttr>())
addUsedGlobal(GV);
+
+ // Treat CUDA kernels as if they have attribute((used)) applied so we don't
+ // eliminate them (which would have happened otherwise because the code that
+ // call them is on the host side of the compilation and nothing else
+ // references the kernels).
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())
+ addUsedGlobal(GV);
}
void CodeGenModule::setAliasAttributes(const Decl *D,
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -8328,6 +8328,9 @@
if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())
return true;
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())
+ return true;
+
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
// Forward declarations aren't required.
if (!FD->doesThisDeclarationHaveABody())
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D11666.31082.patch
Type: text/x-patch
Size: 2450 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150730/cbae16d7/attachment.bin>
More information about the cfe-commits
mailing list