<div dir="ltr">Thanks for the suggestion. That's indeed a better way to do it. I've updated the patch.<div><br></div><div>--Artem</div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jul 30, 2015 at 5:26 PM, David Majnemer <span dir="ltr"><<a href="mailto:david.majnemer@gmail.com" target="_blank">david.majnemer@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class=""><br><br>On Thursday, July 30, 2015, Artem Belevich <<a href="mailto:tra@google.com" target="_blank">tra@google.com</a>> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">I could, and it would do the job, but I thought that that would be polluting AST with something that didn't originate from source.</div></blockquote><div><br></div></span><div>It's ok to synthesize attributes so long as yo mark them as implicit.</div><div class="HOEnZb"><div class="h5"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div><br></div><div>--Artem<br><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jul 30, 2015 at 3:55 PM, David Majnemer <span dir="ltr"><<a>david.majnemer@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Couldn't you just add an implicit UsedAttr when processing the <span style="font-size:12.8000001907349px">CUDAGlobalAttr and LangOpts.CUDAIsDevice was set to true?</span></div><div class="gmail_extra"><br><div class="gmail_quote"><div><div>On Thu, Jul 30, 2015 at 3:48 PM, Artem Belevich <span dir="ltr"><<a>tra@google.com</a>></span> wrote:<br></div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div>tra created this revision.<br>
tra added reviewers: echristo, eliben.<br>
tra added a subscriber: cfe-commits.<br>
<br>
Templated kernels that were instantiated from the host code would normally be eliminated because they were never referenced on device side.<br>
The patch adds __global__ functions to @llvm.used which prevents their elimination.<br>
<br>
<br>
<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__reviews.llvm.org_D11666&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=Z25A8IWglmGnKQ43fYEm_8_fxaEFf6nlBvSwqfk-G-M&s=ls7VpRerwNEKI6CW5Cam_okBiXsooKPpHqp3h6J7RFM&e=" rel="noreferrer" target="_blank">http://reviews.llvm.org/D11666</a><br>
<br>
Files:<br>
  lib/AST/ASTContext.cpp<br>
  lib/CodeGen/CodeGenModule.cpp<br>
  test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=Z25A8IWglmGnKQ43fYEm_8_fxaEFf6nlBvSwqfk-G-M&s=pvKoiG2lcksCHxbTvn4UjCpmUQrKeY-DpVCYejuod7Y&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
<br>
Index: test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=Z25A8IWglmGnKQ43fYEm_8_fxaEFf6nlBvSwqfk-G-M&s=pvKoiG2lcksCHxbTvn4UjCpmUQrKeY-DpVCYejuod7Y&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
===================================================================<br>
--- test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=Z25A8IWglmGnKQ43fYEm_8_fxaEFf6nlBvSwqfk-G-M&s=pvKoiG2lcksCHxbTvn4UjCpmUQrKeY-DpVCYejuod7Y&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
+++ test/CodeGenCUDA/<a href="https://urldefense.proofpoint.com/v2/url?u=http-3A__ptx-2Dkernels.cu&d=AwMFaQ&c=8hUWFZcy2Z-Za5rBPlktOQ&r=BSqEv9KvKMW_Ob8SyngJ70KdZISM_ASROnREeq0cCxk&m=Z25A8IWglmGnKQ43fYEm_8_fxaEFf6nlBvSwqfk-G-M&s=pvKoiG2lcksCHxbTvn4UjCpmUQrKeY-DpVCYejuod7Y&e=" rel="noreferrer" target="_blank">ptx-kernels.cu</a><br>
@@ -1,7 +1,16 @@<br>
+// Make sure that __global__ functions are emitted along with correct<br>
+// annotations and are added to @llvm.used to prevent their elimination.<br>
+// REQUIRES: nvptx-registered-target<br>
+//<br>
 // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s<br>
<br>
 #include "Inputs/cuda.h"<br>
<br>
+// Make sure that all __global__ functiona are added to @llvm.used<br>
+// CHECK: @llvm.used = appending global<br>
+// CHECK-SAME: @global_function<br>
+// CHECK-SAME: @_Z16templated_kernelIiEvT_<br>
+<br>
 // CHECK-LABEL: define void @device_function<br>
 extern "C"<br>
 __device__ void device_function() {}<br>
@@ -13,4 +22,10 @@<br>
   device_function();<br>
 }<br>
<br>
+// Make sure host-instantiated kernels are preserved on device side.<br>
+template <typename T> __global__ void templated_kernel(T param) {}<br>
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_<br>
+void host_function() { templated_kernel<<<0,0>>>(0); }<br>
+<br>
 // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}<br>
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}<br>
Index: lib/CodeGen/CodeGenModule.cpp<br>
===================================================================<br>
--- lib/CodeGen/CodeGenModule.cpp<br>
+++ lib/CodeGen/CodeGenModule.cpp<br>
@@ -813,6 +813,13 @@<br>
<br>
   if (D->hasAttr<UsedAttr>())<br>
     addUsedGlobal(GV);<br>
+<br>
+  // Treat CUDA kernels as if they have attribute((used)) applied so we don't<br>
+  // eliminate them (which would have happened otherwise because the code that<br>
+  // call them is on the host side of the compilation and nothing else<br>
+  // references the kernels).<br>
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())<br>
+    addUsedGlobal(GV);<br>
 }<br>
<br>
 void CodeGenModule::setAliasAttributes(const Decl *D,<br>
Index: lib/AST/ASTContext.cpp<br>
===================================================================<br>
--- lib/AST/ASTContext.cpp<br>
+++ lib/AST/ASTContext.cpp<br>
@@ -8328,6 +8328,9 @@<br>
   if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())<br>
     return true;<br>
<br>
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())<br>
+    return true;<br>
+<br>
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {<br>
     // Forward declarations aren't required.<br>
     if (!FD->doesThisDeclarationHaveABody())<br>
<br>
<br>
<br></div></div>_______________________________________________<br>
cfe-commits mailing list<br>
<a>cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" rel="noreferrer" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
<br></blockquote></div><br></div>
</blockquote></div><br><br clear="all"><div><br></div>-- <br><div><div dir="ltr">--Artem Belevich </div></div></div></div></div></blockquote>
</div></div></blockquote></div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature"><div dir="ltr">--Artem Belevich</div></div>
</div></div>