[llvm] [AMDGPU] Refactor GFX11 VALU Mask Hazard Waitcnt Merging (PR #169213)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 24 20:37:14 PST 2025
================
@@ -437,11 +438,96 @@ class AMDGPUWaitSGPRHazards {
return Changed;
}
+ bool runWaitMerging(MachineFunction &MF) {
+ // Perform per-block merging of existing s_waitcnt_depctr instructions.
+ // Track set of SGPR writes before a given wait instruction, and search
+ // for reads of these SGPRs prior to the next wait.
+ // If no reads occur then the 1st wait can be merged into the 2nd.
+ const unsigned ConstantMaskBits = AMDGPU::DepCtr::encodeFieldSaSdst(
+ AMDGPU::DepCtr::encodeFieldVaSdst(AMDGPU::DepCtr::encodeFieldVaVcc(0),
+ 0),
+ 0);
+ bool Changed = false;
+ for (auto &MBB : MF) {
+ std::bitset<128> WriteSet, NextWriteSet;
+ MachineInstr *PrevWait = nullptr;
+ bool ReadWriteDep = false;
+ for (MachineBasicBlock::instr_iterator MI = MBB.instr_begin(),
+ E = MBB.instr_end();
+ MI != E; ++MI) {
+ if (MI->isMetaInstruction())
+ continue;
+
+ if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && !MI->isBundled() &&
+ (MI->getOperand(0).getImm() & ConstantMaskBits) ==
+ ConstantMaskBits) {
+ if (PrevWait && !ReadWriteDep) {
+ // Merge previous wait into this one and merge write sets.
+ MachineOperand &MaskOp = MI->getOperand(0);
+ MaskOp.setImm(
+ mergeMasks(PrevWait->getOperand(0).getImm(), MaskOp.getImm()));
+ PrevWait->eraseFromParent();
+ WriteSet |= NextWriteSet;
+ } else {
+ // Start a new merging region using fresh write set.
+ WriteSet = NextWriteSet;
+ }
+ NextWriteSet.reset();
+ PrevWait = &*MI;
+ ReadWriteDep = false;
+ Changed = true;
+ continue;
+ }
+
+ const bool IsVALU = SIInstrInfo::isVALU(*MI);
+ const bool IsSALU = SIInstrInfo::isSALU(*MI);
+ if (!IsVALU && !IsSALU)
----------------
perlfu wrote:
All COPY instructions have been eliminated at this point in MIR.
https://github.com/llvm/llvm-project/pull/169213
More information about the llvm-commits
mailing list