r248293 - [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 22 10:22:51 PDT 2015
Author: tra
Date: Tue Sep 22 12:22:51 2015
New Revision: 248293
URL: http://llvm.org/viewvc/llvm-project?rev=248293&view=rev
Log:
[CUDA] Add implicit __attribute__((used)) to all __global__ functions.
This makes sure that we emit kernels that were instantiated from the
host code and which would never be explicitly referenced by anything
else on device side.
Differential Revision: http://reviews.llvm.org/D11666
Modified:
cfe/trunk/lib/Sema/SemaDeclAttr.cpp
cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=248293&r1=248292&r2=248293&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Tue Sep 22 12:22:51 2015
@@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, De
D->addAttr(::new (S.Context)
CUDAGlobalAttr(Attr.getRange(), S.Context,
Attr.getAttributeSpellingListIndex()));
+
+ // Add implicit attribute((used)) so we don't eliminate kernels
+ // because there is nothing referencing them on device side.
+ D->addAttr(UsedAttr::CreateImplicit(S.Context));
}
static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Modified: cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu?rev=248293&r1=248292&r2=248293&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu Tue Sep 22 12:22:51 2015
@@ -1,7 +1,16 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
#include "Inputs/cuda.h"
+// Make sure that all __global__ functions are added to @llvm.used
+// CHECK: @llvm.used = appending global
+// CHECK-SAME: @global_function
+// CHECK-SAME: @_Z16templated_kernelIiEvT_
+
// CHECK-LABEL: define void @device_function
extern "C"
__device__ void device_function() {}
@@ -13,4 +22,10 @@ __global__ void global_function() {
device_function();
}
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
More information about the cfe-commits
mailing list