<div dir="ltr">Couldn't you just add an implicit UsedAttr when processing the <span style="font-size:12.8000001907349px">CUDAGlobalAttr and LangOpts.CUDAIsDevice was set to true?</span></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jul 30, 2015 at 3:48 PM, Artem Belevich <span dir="ltr"><<a href="mailto:tra@google.com" target="_blank">tra@google.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">tra created this revision.<br>
tra added reviewers: echristo, eliben.<br>
tra added a subscriber: cfe-commits.<br>
<br>
Templated kernels that were instantiated from the host code would normally be eliminated because they were never referenced on device side.<br>
The patch adds __global__ functions to @llvm.used which prevents their elimination.<br>
<br>
<br>
<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__reviews.llvm.org_D11666&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=0yIBlttu3CqqW7_TWKCAe7kXV9Rk0gzNoOx8D98VBXc&s=jOJiEh68JuOcMNv3-mVTO6VeGrjEZ_OCaJMpJ4lCYgA&e=" rel="noreferrer" target="_blank">http://reviews.llvm.org/D11666</a><br>
<br>
Files:<br>
lib/AST/ASTContext.cpp<br>
lib/CodeGen/CodeGenModule.cpp<br>
test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=0yIBlttu3CqqW7_TWKCAe7kXV9Rk0gzNoOx8D98VBXc&s=55y3cA4s4Zy58TSbNURl_Mmqa6p7jdRbyU08G5fRyps&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
<br>
Index: test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=0yIBlttu3CqqW7_TWKCAe7kXV9Rk0gzNoOx8D98VBXc&s=55y3cA4s4Zy58TSbNURl_Mmqa6p7jdRbyU08G5fRyps&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
===================================================================<br>
--- test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=0yIBlttu3CqqW7_TWKCAe7kXV9Rk0gzNoOx8D98VBXc&s=55y3cA4s4Zy58TSbNURl_Mmqa6p7jdRbyU08G5fRyps&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
+++ test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=0yIBlttu3CqqW7_TWKCAe7kXV9Rk0gzNoOx8D98VBXc&s=55y3cA4s4Zy58TSbNURl_Mmqa6p7jdRbyU08G5fRyps&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
@@ -1,7 +1,16 @@<br>
+// Make sure that __global__ functions are emitted along with correct<br>
+// annotations and are added to @llvm.used to prevent their elimination.<br>
+// REQUIRES: nvptx-registered-target<br>
+//<br>
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s<br>
<br>
#include "Inputs/cuda.h"<br>
<br>
+// Make sure that all __global__ functiona are added to @llvm.used<br>
+// CHECK: @llvm.used = appending global<br>
+// CHECK-SAME: @global_function<br>
+// CHECK-SAME: @_Z16templated_kernelIiEvT_<br>
+<br>
// CHECK-LABEL: define void @device_function<br>
extern "C"<br>
__device__ void device_function() {}<br>
@@ -13,4 +22,10 @@<br>
device_function();<br>
}<br>
<br>
+// Make sure host-instantiated kernels are preserved on device side.<br>
+template <typename T> __global__ void templated_kernel(T param) {}<br>
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_<br>
+void host_function() { templated_kernel<<<0,0>>>(0); }<br>
+<br>
// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}<br>
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}<br>
Index: lib/CodeGen/CodeGenModule.cpp<br>
===================================================================<br>
--- lib/CodeGen/CodeGenModule.cpp<br>
+++ lib/CodeGen/CodeGenModule.cpp<br>
@@ -813,6 +813,13 @@<br>
<br>
if (D->hasAttr<UsedAttr>())<br>
addUsedGlobal(GV);<br>
+<br>
+ // Treat CUDA kernels as if they have attribute((used)) applied so we don't<br>
+ // eliminate them (which would have happened otherwise because the code that<br>
+ // call them is on the host side of the compilation and nothing else<br>
+ // references the kernels).<br>
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())<br>
+ addUsedGlobal(GV);<br>
}<br>
<br>
void CodeGenModule::setAliasAttributes(const Decl *D,<br>
Index: lib/AST/ASTContext.cpp<br>
===================================================================<br>
--- lib/AST/ASTContext.cpp<br>
+++ lib/AST/ASTContext.cpp<br>
@@ -8328,6 +8328,9 @@<br>
if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())<br>
return true;<br>
<br>
+ if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())<br>
+ return true;<br>
+<br>
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {<br>
// Forward declarations aren't required.<br>
if (!FD->doesThisDeclarationHaveABody())<br>
<br>
<br>
<br>_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" rel="noreferrer" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
<br></blockquote></div><br></div>