[PATCH] D21337: [CUDA] Give templated device functions internal linkage, templated kernels external linkage.

Justin Lebar via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 30 11:48:46 PDT 2016


This revision was automatically updated to reflect the committed changes.
Closed by commit rL274261: [CUDA] Give templated device functions internal linkage, templated kernels… (authored by jlebar).

Changed prior to commit:
  http://reviews.llvm.org/D21337?vs=60728&id=62391#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D21337

Files:
  cfe/trunk/lib/CodeGen/CodeGenModule.cpp
  cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu

Index: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp
@@ -2671,9 +2671,18 @@
   // explicit instantiations can occur in multiple translation units
   // and must all be equivalent. However, we are not allowed to
   // throw away these explicit instantiations.
-  if (Linkage == GVA_StrongODR)
-    return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
-                                            : llvm::Function::ExternalLinkage;
+  //
+  // We don't currently support CUDA device code spread out across multiple TUs,
+  // so say that CUDA templates are either external (for kernels) or internal.
+  // This lets llvm perform aggressive inter-procedural optimizations.
+  if (Linkage == GVA_StrongODR) {
+    if (Context.getLangOpts().AppleKext)
+      return llvm::Function::ExternalLinkage;
+    if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
+      return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
+                                          : llvm::Function::InternalLinkage;
+    return llvm::Function::WeakODRLinkage;
+  }
 
   // C++ doesn't have tentative definitions and thus cannot have common
   // linkage.
Index: cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
===================================================================
--- cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
+++ cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
@@ -19,11 +19,11 @@
 
 // Make sure host-instantiated kernels are preserved on device side.
 template <typename T> __global__ void templated_kernel(T param) {}
-// CHECK-DAG: define weak_odr void @_Z16templated_kernelIiEvT_(
+// CHECK-DAG: define void @_Z16templated_kernelIiEvT_(
 
 namespace {
 __global__ void anonymous_ns_kernel() {}
-// CHECK-DAG: define weak_odr void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
+// CHECK-DAG: define void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
 }
 
 void host_function() {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D21337.62391.patch
Type: text/x-patch
Size: 2058 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160630/7aa44e0e/attachment.bin>


More information about the cfe-commits mailing list