r244501 - [CUDA] Add implicit attribute((used)) to all global functions.

Mon Aug 10 13:57:02 PDT 2015

Author: tra
Date: Mon Aug 10 15:57:02 2015
New Revision: 244501

URL: http://llvm.org/viewvc/llvm-project?rev=244501&view=rev
Log:
[CUDA] Add implicit __attribute__((used)) to all __global__ functions.

This allows emitting kernels that were instantiated from the host code
and which would never be explicitly referenced otherwise.

Differential Revision: http://reviews.llvm.org/D11666

Modified:
    cfe/trunk/lib/Sema/SemaDeclAttr.cpp
    cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu

Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=244501&r1=244500&r2=244501&view=diff
==============================================================================

--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Mon Aug 10 15:57:02 2015
@@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, De
   D->addAttr(::new (S.Context)
               CUDAGlobalAttr(Attr.getRange(), S.Context,
                              Attr.getAttributeSpellingListIndex()));
+
+  // Add implicit attribute((used)) so we don't eliminate kernels
+  // because there is nothing referencing them on device side.
+  D->addAttr(UsedAttr::CreateImplicit(S.Context));
 }
 
 static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {

Modified: cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu?rev=244501&r1=244500&r2=244501&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu Mon Aug 10 15:57:02 2015
@@ -1,7 +1,16 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
 // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
 
 #include "Inputs/cuda.h"
 
+// Make sure that all __global__ functiona are added to @llvm.used
+// CHECK: @llvm.used = appending global
+// CHECK-SAME: @global_function
+// CHECK-SAME: @_Z16templated_kernelIiEvT_
+
 // CHECK-LABEL: define void @device_function
 extern "C"
 __device__ void device_function() {}
@@ -13,4 +22,10 @@ __global__ void global_function() {
   device_function();
 }
 
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
 // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}




r244501 - [CUDA] Add implicit __attribute__((used)) to all __global__ functions.

r244501 - [CUDA] Add implicit attribute((used)) to all global functions.