[PATCH] D17313: [CUDA] Annotate all calls in CUDA device mode as convergent.

Tue Feb 16 16:48:14 PST 2016

jlebar created this revision.
jlebar added reviewers: rnk, majnemer.
jlebar added subscribers: tra, cfe-commits.

We need the notion of convergent functions -- which may expose
convergent behavior to callers -- and convergent calls, which are calls
where we would like to preserve convergent behavior in the callee, if
possible.

In CUDA device mode, all calls and functions are convergent.  The
optimizer can then strip this away under some circumstances.

http://reviews.llvm.org/D17313

Files:
  lib/CodeGen/CGCall.cpp
  test/CodeGenCUDA/convergent.cu
  test/CodeGenCUDA/device-var-init.cu

Index: test/CodeGenCUDA/device-var-init.cu
===================================================================

--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -382,7 +382,7 @@
 // CHECK:   call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
 // CHECK:   call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
 // CHECK:   call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
-// CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3
+// CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
 // CHECK:   call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
 // CHECK:   call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
 // CHECK:   call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- test/CodeGenCUDA/convergent.cu
+++ test/CodeGenCUDA/convergent.cu
@@ -22,12 +22,16 @@
 // DEVICE-SAME: convergent
 // DEVICE-NEXT: define void @_Z3barv
 __host__ __device__ void baz();
-__host__ __device__ void bar() { baz(); }
+__host__ __device__ void bar() {
+  // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
+  baz();
+}
 
 // DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // DEVICE: attributes [[BAZ_ATTR]] = {
 // DEVICE-SAME: convergent
 // DEVICE-SAME: }
+// DEVICE: attributes [[CALL_ATTR]] = { convergent }
 
 // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
 // HOST: attributes [[BAZ_ATTR]] = {
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3139,7 +3139,15 @@
   if (CGM.getLangOpts().ObjCAutoRefCount)
     AddObjCARCExceptionMetadata(Inst);
 
-  return llvm::CallSite(Inst);
+  llvm::CallSite CS(Inst);
+  // All calls in CUDA device mode must conservatively be marked as convergent,
+  // preventing some optimizations.  The optimizer can remove this if it can
+  // prove the the callee is not convergent.
+  if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) {
+    CS.addAttribute(llvm::AttributeSet::FunctionIndex,
+                    llvm::Attribute::Convergent);
+  }
+  return CS;
 }
 
 /// \brief Store a non-aggregate value to an address to initialize it.  For
@@ -3539,6 +3547,14 @@
         Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
                            llvm::Attribute::NoInline);
 
+  // All calls in CUDA device code are conservatively marked as convergent.  The
+  // optimizer is able to remove this attribute if it can prove that the callee
+  // is not convergent.
+  if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice)
+    Attrs =
+        Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                           llvm::Attribute::Convergent);
+
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D17313.48127.patch
Type: text/x-patch
Size: 2949 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160217/c91763b0/attachment.bin>