[llvm] [AMDGPU] Don't DEALLOC_VGPRS from callable functions (PR #72245)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 14 04:12:31 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Diana (rovka)
<details>
<summary>Changes</summary>
Callable functions should not send the DEALLOC_VGPRS message, because that might release the VGPRs and scratch allocation before the caller is done with them.
---
Full diff: https://github.com/llvm/llvm-project/pull/72245.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+7-2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+5)
- (modified) llvm/test/CodeGen/AMDGPU/release-vgprs.mir (+26)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ede4841b8a5fd7d..d862b37443aec83 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1027,6 +1027,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
Wait.VmCnt = 0;
}
+ CallingConv::ID CC = MI.getMF()->getFunction().getCallingConv();
+
// All waits must be resolved at call return.
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
@@ -1039,10 +1041,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
// stores. In this case it can be useful to send a message to explicitly
// release all VGPRs before the stores have completed, but it is only safe to
- // do this if there are no outstanding scratch stores.
+ // do this if:
+ // * there are no outstanding scratch stores
+ // * this is not a callable function
else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
- if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && !OptNone &&
+ if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
+ !AMDGPU::isCallableCC(CC) && !OptNone &&
ScoreBrackets.getScoreRange(VS_CNT) != 0 &&
!ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))
ReleaseVGPRInsts.insert(&MI);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a09abc639d7590f..5ea135c7b90dd62 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1924,6 +1924,10 @@ bool isChainCC(CallingConv::ID CC) {
}
}
+bool isCallableCC(CallingConv::ID CC) {
+ return !isEntryFunctionCC(CC) && !isChainCC(CC);
+}
+
bool isKernelCC(const Function *Func) {
return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 1e0994d0862cf5d..965414019263448 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1114,6 +1114,11 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC);
LLVM_READNONE
bool isChainCC(CallingConv::ID CC);
+// Functions that are called via the 'call' instruction, rather than launched
+// by the hardware or via the 'llvm.amdgcn.cs.chain' intrinsic.
+LLVM_READNONE
+bool isCallableCC(CallingConv::ID CC);
+
bool isKernelCC(const Function *Func);
// FIXME: Remove this when calling conventions cleaned up
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index 3a879e818af797b..39ced04253b571a 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -22,6 +22,8 @@
define amdgpu_ps void @global_atomic() { ret void }
define amdgpu_ps void @image_atomic() { ret void }
define amdgpu_ps void @global_store_optnone() noinline optnone { ret void }
+ define amdgpu_gfx void @gfx_function() { ret void }
+ define void @ccc_function() { ret void }
...
---
@@ -556,3 +558,27 @@ body: |
S_WAITCNT_VSCNT undef $sgpr_null, 0
S_ENDPGM 0
...
+
+---
+name: gfx_function
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: gfx_function
+ ; CHECK-NOT: S_SENDMSG 3
+ ; CHECK: S_ENDPGM 0
+ GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+ S_WAITCNT_VSCNT undef $sgpr_null, 0
+ S_ENDPGM 0
+...
+
+---
+name: ccc_function
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: ccc_function
+ ; CHECK-NOT: S_SENDMSG 3
+ ; CHECK: S_ENDPGM 0
+ GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+ S_WAITCNT_VSCNT undef $sgpr_null, 0
+ S_ENDPGM 0
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/72245
More information about the llvm-commits
mailing list