[llvm] [AMDGPU] Don't DEALLOC_VGPRS from callable functions (PR #72245)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 14 04:12:31 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Diana (rovka)

<details>
<summary>Changes</summary>

Callable functions should not send the DEALLOC_VGPRS message, because that might release the VGPRs and scratch allocation before the caller is done with them.

---
Full diff: https://github.com/llvm/llvm-project/pull/72245.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+7-2) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+5) 
- (modified) llvm/test/CodeGen/AMDGPU/release-vgprs.mir (+26) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ede4841b8a5fd7d..d862b37443aec83 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1027,6 +1027,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
     Wait.VmCnt = 0;
   }
 
+  CallingConv::ID CC = MI.getMF()->getFunction().getCallingConv();
+
   // All waits must be resolved at call return.
   // NOTE: this could be improved with knowledge of all call sites or
   //   with knowledge of the called routines.
@@ -1039,10 +1041,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   // Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
   // stores. In this case it can be useful to send a message to explicitly
   // release all VGPRs before the stores have completed, but it is only safe to
-  // do this if there are no outstanding scratch stores.
+  // do this if:
+  // * there are no outstanding scratch stores
+  // * this is not a callable function
   else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
            MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
-    if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && !OptNone &&
+    if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
+        !AMDGPU::isCallableCC(CC) && !OptNone &&
         ScoreBrackets.getScoreRange(VS_CNT) != 0 &&
         !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))
       ReleaseVGPRInsts.insert(&MI);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a09abc639d7590f..5ea135c7b90dd62 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1924,6 +1924,10 @@ bool isChainCC(CallingConv::ID CC) {
   }
 }
 
+bool isCallableCC(CallingConv::ID CC) {
+  return !isEntryFunctionCC(CC) && !isChainCC(CC);
+}
+
 bool isKernelCC(const Function *Func) {
   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
 }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 1e0994d0862cf5d..965414019263448 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1114,6 +1114,11 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC);
 LLVM_READNONE
 bool isChainCC(CallingConv::ID CC);
 
+// Functions that are called via the 'call' instruction, rather than launched
+// by the hardware or via the 'llvm.amdgcn.cs.chain' intrinsic.
+LLVM_READNONE
+bool isCallableCC(CallingConv::ID CC);
+
 bool isKernelCC(const Function *Func);
 
 // FIXME: Remove this when calling conventions cleaned up
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index 3a879e818af797b..39ced04253b571a 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -22,6 +22,8 @@
   define amdgpu_ps void @global_atomic() { ret void }
   define amdgpu_ps void @image_atomic() { ret void }
   define amdgpu_ps void @global_store_optnone() noinline optnone { ret void }
+  define amdgpu_gfx void @gfx_function() { ret void }
+  define void @ccc_function() { ret void }
 ...
 
 ---
@@ -556,3 +558,27 @@ body:             |
     S_WAITCNT_VSCNT undef $sgpr_null, 0
     S_ENDPGM 0
 ...
+
+---
+name:            gfx_function
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: gfx_function
+    ; CHECK-NOT: S_SENDMSG 3
+    ; CHECK: S_ENDPGM 0
+    GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    S_WAITCNT_VSCNT undef $sgpr_null, 0
+    S_ENDPGM 0
+...
+
+---
+name:            ccc_function
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: ccc_function
+    ; CHECK-NOT: S_SENDMSG 3
+    ; CHECK: S_ENDPGM 0
+    GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    S_WAITCNT_VSCNT undef $sgpr_null, 0
+    S_ENDPGM 0
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/72245


More information about the llvm-commits mailing list