[llvm] [AMDGPU] Rework GFX11 VALU Mask Write Hazard (PR #138663)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue May 6 02:25:57 PDT 2025
================
@@ -2968,29 +2968,102 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
return false;
assert(!ST.hasExtendedWaitCounts());
- if (!ST.isWave64() || !SIInstrInfo::isSALU(*MI))
+ if (!ST.isWave64())
+ return false;
+
+ const bool IsSALU = SIInstrInfo::isSALU(*MI);
+ const bool IsVALU = SIInstrInfo::isVALU(*MI);
+ if (!IsSALU && !IsVALU)
return false;
// The hazard sequence is three instructions:
// 1. VALU reads SGPR as mask
- // 2. SALU writes SGPR
- // 3. SALU reads SGPR
+ // 2. VALU/SALU writes SGPR
+ // 3. VALU/SALU reads SGPR
// The hazard can expire if the distance between 2 and 3 is sufficient.
// In practice this happens <10% of the time, hence this always assumes
// the hazard exists if 1 and 2 are present to avoid searching.
- const MachineOperand *SDSTOp = TII.getNamedOperand(*MI, AMDGPU::OpName::sdst);
- if (!SDSTOp || !SDSTOp->isReg())
- return false;
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
- const Register HazardReg = SDSTOp->getReg();
- if (HazardReg == AMDGPU::EXEC ||
- HazardReg == AMDGPU::EXEC_LO ||
- HazardReg == AMDGPU::EXEC_HI ||
- HazardReg == AMDGPU::M0)
+ auto IgnoreableSGPR = [](const Register Reg) {
+ switch (Reg) {
+ case AMDGPU::EXEC:
+ case AMDGPU::EXEC_LO:
+ case AMDGPU::EXEC_HI:
+ case AMDGPU::M0:
+ case AMDGPU::SGPR_NULL:
+ case AMDGPU::SGPR_NULL64:
+ case AMDGPU::SCC:
+ return true;
+ default:
+ return false;
+ }
+ };
+ auto IsVCC = [](const Register Reg) {
+ return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
+ };
+
+ struct StateType {
+ SmallSet<Register, 2> HazardSGPRs;
+ };
+
+ SmallVector<const MachineInstr *> WaitInstrs;
+ bool HasSGPRRead = false;
+ StateType InitialState;
+
+ // Look for SGPR write.
+ MachineOperand *HazardDef = nullptr;
+ for (MachineOperand &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ if (Op.isDef() && HazardDef)
+ continue;
+
+ Register Reg = Op.getReg();
+ if (IgnoreableSGPR(Reg))
+ continue;
+ if (!IsVCC(Reg)) {
+ if (Op.isImplicit())
+ continue;
+ if (!TRI->isSGPRReg(MRI, Reg))
+ continue;
+ }
+ // Also check for SGPR reads.
+ if (Op.isUse()) {
+ HasSGPRRead = true;
+ continue;
+ }
+
+ assert(!HazardDef);
+ HazardDef = &Op;
+ }
+
+ if (!HazardDef)
return false;
- auto IsHazardFn = [HazardReg, this](const MachineInstr &I) {
+ const Register HazardReg = HazardDef->getReg();
+ auto *HazardRegRC = TRI->getPhysRegBaseClass(HazardReg);
+ bool IsSGPR32 = (HazardRegRC == TRI->getSGPRClassForBitWidth(32)) ||
+ HazardReg == AMDGPU::VCC_LO || HazardReg == AMDGPU::VCC_HI;
----------------
arsenm wrote:
SReg_32RegClass.contains?
https://github.com/llvm/llvm-project/pull/138663
More information about the llvm-commits
mailing list