[llvm] [AMDGPU] Fix missing S_WAIT_XCNT with multiple pending VMEMs (PR #166779)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 06:20:32 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/166779.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+6-12)
- (modified) llvm/test/CodeGen/AMDGPU/wait-xcnt.mir (+5-5)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b7fa899678ec7..306d59d0867cd 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1291,21 +1291,15 @@ void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
// On entry to a block with multiple predescessors, there may
// be pending SMEM and VMEM events active at the same time.
// In such cases, only clear one active event at a time.
- auto applyPendingXcntGroup = [this](unsigned E) {
- unsigned LowerBound = getScoreLB(X_CNT);
- applyWaitcnt(X_CNT, 0);
- PendingEvents |= (1 << E);
- setScoreLB(X_CNT, LowerBound);
- };
// Wait on XCNT is redundant if we are already waiting for a load to complete.
// SMEM can return out of order, so only omit XCNT wait if we are waiting till
// zero.
if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) {
- if (hasPendingEvent(VMEM_GROUP))
- applyPendingXcntGroup(VMEM_GROUP);
- else
+ if (!hasMixedPendingEvents(X_CNT))
applyWaitcnt(X_CNT, 0);
+ else
+ PendingEvents &= ~(1 << SMEM_GROUP);
return;
}
@@ -1314,10 +1308,10 @@ void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
// decremented to the same number as LOADCnt.
if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
!hasPendingEvent(STORE_CNT)) {
- if (hasPendingEvent(SMEM_GROUP))
- applyPendingXcntGroup(SMEM_GROUP);
- else
+ if (!hasMixedPendingEvents(X_CNT))
applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
+ else if (Wait.LoadCnt == 0)
+ PendingEvents &= ~(1 << VMEM_GROUP);
return;
}
diff --git a/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir b/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
index f964480dcc633..fe16f0d44dd1c 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/wait-xcnt.mir
@@ -1069,7 +1069,6 @@ body: |
$sgpr0 = S_MOV_B32 $sgpr0
...
-# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
---
name: mixed_pending_events
tracksRegLiveness: true
@@ -1088,8 +1087,8 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
- ; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: liveins: $sgpr2, $vgpr2
@@ -1098,6 +1097,7 @@ body: |
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
; GCN-NEXT: S_WAIT_KMCNT 0
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
+ ; GCN-NEXT: S_WAIT_XCNT 0
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
@@ -1105,8 +1105,8 @@ body: |
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
liveins: $vgpr0_vgpr1, $sgpr2
- $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
- $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
+ $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
bb.2:
liveins: $sgpr2, $vgpr2
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
``````````
</details>
https://github.com/llvm/llvm-project/pull/166779
More information about the llvm-commits
mailing list