[llvm] 7deea9d - [AMDGPU] Move WaitcntBrackets::simplifyXcnt near other simplify functions. NFC. (#178673)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 29 07:49:16 PST 2026
Author: Jay Foad
Date: 2026-01-29T15:49:11Z
New Revision: 7deea9db700a26c2b15afebe573b914febe030ac
URL: https://github.com/llvm/llvm-project/commit/7deea9db700a26c2b15afebe573b914febe030ac
DIFF: https://github.com/llvm/llvm-project/commit/7deea9db700a26c2b15afebe573b914febe030ac.diff
LOG: [AMDGPU] Move WaitcntBrackets::simplifyXcnt near other simplify functions. NFC. (#178673)
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ee59a2e59d4a3..27f4ac389d437 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1325,6 +1325,27 @@ void WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
Count = ~0u;
}
+void WaitcntBrackets::simplifyXcnt(const AMDGPU::Waitcnt &CheckWait,
+ AMDGPU::Waitcnt &UpdateWait) const {
+ // Try to simplify xcnt further by checking for joint kmcnt and loadcnt
+ // optimizations. On entry to a block with multiple predescessors, there may
+ // be pending SMEM and VMEM events active at the same time.
+ // In such cases, only clear one active event at a time.
+ // TODO: Revisit xcnt optimizations for gfx1250.
+ // Wait on XCNT is redundant if we are already waiting for a load to complete.
+ // SMEM can return out of order, so only omit XCNT wait if we are waiting till
+ // zero.
+ if (CheckWait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP))
+ UpdateWait.XCnt = ~0u;
+ // If we have pending store we cannot optimize XCnt because we do not wait for
+ // stores. VMEM loads retun in order, so if we only have loads XCnt is
+ // decremented to the same number as LOADCnt.
+ if (CheckWait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
+ !hasPendingEvent(STORE_CNT) && CheckWait.XCnt >= CheckWait.LoadCnt)
+ UpdateWait.XCnt = ~0u;
+ simplifyWaitcnt(X_CNT, UpdateWait.XCnt);
+}
+
void WaitcntBrackets::simplifyVmVsrc(const AMDGPU::Waitcnt &CheckWait,
AMDGPU::Waitcnt &UpdateWait) const {
// Waiting for some counters implies waiting for VM_VSRC, since an
@@ -1455,27 +1476,6 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
}
}
-void WaitcntBrackets::simplifyXcnt(const AMDGPU::Waitcnt &CheckWait,
- AMDGPU::Waitcnt &UpdateWait) const {
- // Try to simplify xcnt further by checking for joint kmcnt and loadcnt
- // optimizations. On entry to a block with multiple predescessors, there may
- // be pending SMEM and VMEM events active at the same time.
- // In such cases, only clear one active event at a time.
- // TODO: Revisit xcnt optimizations for gfx1250.
- // Wait on XCNT is redundant if we are already waiting for a load to complete.
- // SMEM can return out of order, so only omit XCNT wait if we are waiting till
- // zero.
- if (CheckWait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP))
- UpdateWait.XCnt = ~0u;
- // If we have pending store we cannot optimize XCnt because we do not wait for
- // stores. VMEM loads retun in order, so if we only have loads XCnt is
- // decremented to the same number as LOADCnt.
- if (CheckWait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
- !hasPendingEvent(STORE_CNT) && CheckWait.XCnt >= CheckWait.LoadCnt)
- UpdateWait.XCnt = ~0u;
- simplifyWaitcnt(X_CNT, UpdateWait.XCnt);
-}
-
// Where there are multiple types of event in the bracket of a counter,
// the decrement may go out of order.
bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
More information about the llvm-commits
mailing list