[llvm] 2f15e4e - [AMDGPU] Remove redundant S_WAIT_XCNT after inserting S_SET_VGPR_MSB (#188527)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 03:59:30 PDT 2026
Author: Jay Foad
Date: 2026-03-26T10:59:26Z
New Revision: 2f15e4ea33ec22ad159c5c4e3f60fd077195e401
URL: https://github.com/llvm/llvm-project/commit/2f15e4ea33ec22ad159c5c4e3f60fd077195e401
DIFF: https://github.com/llvm/llvm-project/commit/2f15e4ea33ec22ad159c5c4e3f60fd077195e401.diff
LOG: [AMDGPU] Remove redundant S_WAIT_XCNT after inserting S_SET_VGPR_MSB (#188527)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 649a113c2fbe5..f9e21f2a77879 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -155,6 +155,10 @@ class AMDGPULowerVGPREncoding {
/// Last hard clause instruction.
MachineInstr *Clause;
+ // Remember whether XCNT is known to be zero because of an S_SET_VGPR_MSB
+ // instruction that we inserted, which implicitly waits for XCNT==0.
+ bool XCntIsZero;
+
/// S_SET_VGPR_MSB immediately after S_SETREG_IMM32_B32 targeting MODE is
/// silently dropped on GFX1250. When set, the next S_SET_VGPR_MSB insertion
/// must be preceded by S_NOP to avoid the hazard.
@@ -254,21 +258,32 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
return true;
}
- I = handleClause(I);
- I = handleCoissue(I);
+ MachineBasicBlock::instr_iterator InsertPt = handleClause(I);
+ InsertPt = handleCoissue(InsertPt);
// Case 2 match in handleSetregMode: the setreg's imm[12:19] matched
// current MSBs, but the next VALU needs
diff erent MSBs, so this
// S_SET_VGPR_MSB would land right after the setreg. Insert S_NOP to
// prevent it from being silently dropped.
if (NeedNopBeforeSetVGPRMSB) {
- BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_NOP)).addImm(0);
+ BuildMI(*MBB, InsertPt, {}, TII->get(AMDGPU::S_NOP)).addImm(0);
NeedNopBeforeSetVGPRMSB = false;
}
- MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
- .addImm(NewMode.encode() | OldModeBits);
+ MostRecentModeSet =
+ BuildMI(*MBB, InsertPt, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
+ .addImm(NewMode.encode() | OldModeBits);
LLVM_DEBUG(dbgs() << " -> inserted new S_SET_VGPR_MSB: "
<< *MostRecentModeSet);
+ // If we inserted S_SET_VGPR_MSB early then XCNT should remain zero from the
+ // insertion point to the current instruction. Remove any redundant
+ // S_WAIT_XCNT instructions in that range.
+ for (MachineInstr &MI : make_early_inc_range(make_range(InsertPt, I))) {
+ assert(!SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isSMRD(MI));
+ if (MI.getOpcode() == AMDGPU::S_WAIT_XCNT)
+ MI.eraseFromBundle();
+ }
+ XCntIsZero = true;
+
CurrentMode = NewMode;
return true;
}
@@ -372,7 +387,6 @@ bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
return setMode(NewMode, MI.getIterator());
}
assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());
-
return false;
}
@@ -554,6 +568,7 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
CurrentMode = {};
for (auto &MBB : MF) {
MostRecentModeSet = nullptr;
+ XCntIsZero = false;
NeedNopBeforeSetVGPRMSB = false;
this->MBB = &MBB;
@@ -600,9 +615,21 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
continue;
}
+ // If XCNT is known to be zero then any S_WAIT_XCNT instruction is
+ // redundant and can be removed.
+ if (MI.getOpcode() == AMDGPU::S_WAIT_XCNT && XCntIsZero) {
+ MI.eraseFromBundle();
+ Changed = true;
+ continue;
+ }
+
Changed |= runOnMachineInstr(MI);
NeedNopBeforeSetVGPRMSB = false;
+ // Any VMEM or SMEM instruction may increment XCNT.
+ if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSMRD(MI))
+ XCntIsZero = false;
+
if (ClauseRemaining)
--ClauseRemaining;
}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 7b9e7e24ce05a..e27d22558b755 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -1010,3 +1010,36 @@ body: |
; ASM: NumVgprs: 514
...
+
+# ASM-LABEL: {{^}}redundant_xcnt:
+# DIS-LABEL: <redundant_xcnt>:
+---
+name: redundant_xcnt
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; ASM: %bb.0:
+
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr256, $vgpr257
+
+ ; GCN-NEXT: global_load_b32 v5, v[0:1], off
+ $vgpr5 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
+
+ ; GCN-NEXT: global_load_b32 v5, v[2:3], off
+ $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
+
+ S_WAIT_XCNT 1
+
+ ; GCN-NEXT: s_set_vgpr_msb 4
+ ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=0
+ ; GCN-NEXT: v_add_f32_e32 v0, 1, v0 /*v256*/
+ $vgpr0 = V_ADD_F32_e32 1, $vgpr256, implicit $mode, implicit $exec
+
+ S_WAIT_XCNT 0
+
+ ; GCN-NEXT: v_add_f32_e32 v2, 1, v1 /*v257*/
+ $vgpr2 = V_ADD_F32_e32 1, $vgpr257, implicit $mode, implicit $exec
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x400
+ ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
index c5b7ebe0c78f3..a6666b308437f 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
@@ -96,7 +96,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
- ; CHECK-NEXT: S_WAIT_XCNT 0
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
More information about the llvm-commits
mailing list