[llvm] [AMDGPU][SIInsertWaitCnts] Gfx12.5 - Refactor xcnt optimization (PR #164357)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 00:06:20 PST 2025
================
@@ -1287,40 +1289,38 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
}
}
-void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
- // On entry to a block with multiple predescessors, there may
- // be pending SMEM and VMEM events active at the same time.
- // In such cases, only clear one active event at a time.
- auto applyPendingXcntGroup = [this](unsigned E) {
- unsigned LowerBound = getScoreLB(X_CNT);
- applyWaitcnt(X_CNT, 0);
- PendingEvents |= (1 << E);
- setScoreLB(X_CNT, LowerBound);
- };
-
+bool WaitcntBrackets::hasRedundantXCntWithKmCnt(const AMDGPU::Waitcnt &Wait) {
// Wait on XCNT is redundant if we are already waiting for a load to complete.
// SMEM can return out of order, so only omit XCNT wait if we are waiting till
// zero.
- if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) {
- if (hasPendingEvent(VMEM_GROUP))
- applyPendingXcntGroup(VMEM_GROUP);
- else
- applyWaitcnt(X_CNT, 0);
- return;
- }
+ return Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP);
+}
+bool WaitcntBrackets::canOptimizeXCntWithLoadCnt(const AMDGPU::Waitcnt &Wait) {
// If we have pending store we cannot optimize XCnt because we do not wait for
// stores. VMEM loads retun in order, so if we only have loads XCnt is
// decremented to the same number as LOADCnt.
- if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
- !hasPendingEvent(STORE_CNT)) {
- if (hasPendingEvent(SMEM_GROUP))
- applyPendingXcntGroup(SMEM_GROUP);
- else
- applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
+ return Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
+ !hasPendingEvent(STORE_CNT) && !hasPendingEvent(SMEM_GROUP);
+}
+
+void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
+ if (hasRedundantXCntWithKmCnt(Wait)) {
+ if (hasPendingEvent(VMEM_GROUP)) {
+ // Only clear the SMEM_GROUP event, but VMEM_GROUP could still require
+ // handling.
+ PendingEvents &= ~(1 << SMEM_GROUP);
+ } else {
+ applyWaitcnt(X_CNT, 0);
+ }
return;
}
-
+ if (canOptimizeXCntWithLoadCnt(Wait)) {
+ // On entry to a block with multiple predescessors, there may
+ // be pending SMEM and VMEM events active at the same time.
+ // In such cases, only clear one active event at a time.
----------------
easyonaadit wrote:
Small nit: this comment seems misplaced now, maybe it could be moved to the top of the function.
https://github.com/llvm/llvm-project/pull/164357
More information about the llvm-commits
mailing list