[PATCH] D11666: [CUDA] Make sure we emit all templated __global__ functions on device side.

Artem Belevich tra at google.com
Thu Jul 30 15:48:03 PDT 2015


tra created this revision.
tra added reviewers: echristo, eliben.
tra added a subscriber: cfe-commits.

Templated kernels that were instantiated from the host code would normally be eliminated because they were never referenced on device side.
The patch adds __global__ functions to @llvm.used which prevents their elimination.


http://reviews.llvm.org/D11666

Files:
  lib/AST/ASTContext.cpp
  lib/CodeGen/CodeGenModule.cpp
  test/CodeGenCUDA/ptx-kernels.cu

Index: test/CodeGenCUDA/ptx-kernels.cu
===================================================================
--- test/CodeGenCUDA/ptx-kernels.cu
+++ test/CodeGenCUDA/ptx-kernels.cu
@@ -1,7 +1,16 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
 // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
 
 #include "Inputs/cuda.h"
 
+// Make sure that all __global__ functiona are added to @llvm.used
+// CHECK: @llvm.used = appending global
+// CHECK-SAME: @global_function
+// CHECK-SAME: @_Z16templated_kernelIiEvT_
+
 // CHECK-LABEL: define void @device_function
 extern "C"
 __device__ void device_function() {}
@@ -13,4 +22,10 @@
   device_function();
 }
 
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
 // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -813,6 +813,13 @@
 
   if (D->hasAttr<UsedAttr>())
     addUsedGlobal(GV);
+
+  // Treat CUDA kernels as if they have attribute((used)) applied so we don't
+  // eliminate them (which would have happened otherwise because the code that
+  // call them is on the host side of the compilation and nothing else
+  // references the kernels).
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())
+    addUsedGlobal(GV);
 }
 
 void CodeGenModule::setAliasAttributes(const Decl *D,
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -8328,6 +8328,9 @@
   if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())
     return true;
 
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice && D->hasAttr<CUDAGlobalAttr>())
+    return true;
+
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     // Forward declarations aren't required.
     if (!FD->doesThisDeclarationHaveABody())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D11666.31082.patch
Type: text/x-patch
Size: 2450 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150730/cbae16d7/attachment.bin>


More information about the cfe-commits mailing list