[llvm] [AMDGPU] Register allocation anti-hints to reduce MFMA hazard NOPs (PR #156943)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 08:00:42 PST 2026
================
@@ -3896,6 +3901,130 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
}
}
+bool SIRegisterInfo::shouldApplyAntiHints(Register VirtReg,
+ const MachineFunction &MF,
+ SmallVector<MCPhysReg, 16> &AntiHints,
+ const VirtRegMap *VRM,
+ unsigned NumAllocatedVGPRs) const {
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
+ unsigned TargetOccupancy = MFI->getOccupancy();
+ unsigned CurrentOccupancy =
+ ST.getOccupancyWithNumVGPRs(NumAllocatedVGPRs, DynamicVGPRBlockSize);
+
+ // If we are already at lowest occupancy, then there is no need to protect
+ // against occupancy regression.
+ if (CurrentOccupancy == 1)
+ return true;
+
+ // Set max VGPRs for target and current occupancy to early bail out if we are
+ // close to the limit.
+ unsigned MaxVGPRForTargetOccupancy =
+ ST.getMaxNumVGPRs(TargetOccupancy, DynamicVGPRBlockSize);
+ unsigned MaxVgprsForCurrentOccupancy =
+ ST.getMaxNumVGPRs(CurrentOccupancy, DynamicVGPRBlockSize);
+ unsigned MaxVGPRsCutOffForTargetOccupancy =
+ (MaxVGPRForTargetOccupancy * 80) / 100;
+ unsigned MaxVGPRsCutOffForCurrentOccupancy =
+ (MaxVgprsForCurrentOccupancy * 95) / 100;
+
+ // Early bail out if we are close to the limit for target occupancy.
+ if (NumAllocatedVGPRs >= MaxVGPRsCutOffForTargetOccupancy)
+ return false;
+
+ // Bail out if we are close to the limit for current occupancy.
+ if (NumAllocatedVGPRs >= MaxVGPRsCutOffForCurrentOccupancy)
+ return false;
+
+ // Safe to apply anti-hints
+ return true;
+}
+
+void SIRegisterInfo::applyRegAllocationAntiHints(
+ Register VirtReg, ArrayRef<MCPhysReg> &Order,
+ SmallVectorImpl<MCPhysReg> &OrderStorage,
+ SmallVector<MCPhysReg, 16> &AntiHints, const MachineFunction &MF,
+ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
+
+ // Early exit to default order if we have no anti-hints or no VRM.
+ if (AntiHints.empty() || !VRM)
+ return;
+
+ // Get total number of allocated VGPRs to determine the current occupancy.
+ unsigned NumAllocatedVGPRs = 0;
+ unsigned NumVGPRs = 0;
+ unsigned NumAGPRs = 0;
+ if (Matrix) {
+ for (MCPhysReg Reg : AMDGPU::VGPR_32RegClass)
+ if (Matrix->isPhysRegUsed(Reg))
+ NumVGPRs = std::max(NumVGPRs, getHWRegIndex(Reg) + 1);
+ for (MCPhysReg Reg : AMDGPU::AGPR_32RegClass)
+ if (Matrix->isPhysRegUsed(Reg))
+ NumAGPRs = std::max(NumAGPRs, getHWRegIndex(Reg) + 1);
+ }
+ NumAllocatedVGPRs =
+ AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), NumAGPRs, NumVGPRs);
+
+ // Early exit if we should not apply anti-hints.
+ if (!shouldApplyAntiHints(VirtReg, MF, AntiHints, VRM, NumAllocatedVGPRs))
+ return;
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
+ unsigned CurrentOccupancy =
+ ST.getOccupancyWithNumVGPRs(NumAllocatedVGPRs, DynamicVGPRBlockSize);
+ unsigned MaxVGPRsForCurrentOccupancy =
+ ST.getMaxNumVGPRs(CurrentOccupancy, DynamicVGPRBlockSize);
+
+ // Returns true if Reg fits within the current occupancy VGPR budget.
+ auto IsWithinBudget = [&](MCPhysReg Reg) -> bool {
+ unsigned HighestVGPR = 0;
+ bool IsVGPR = false;
+ for (MCPhysReg SubReg : subregs_inclusive(Reg)) {
+ if (AMDGPU::VGPR_32RegClass.contains(SubReg) ||
----------------
arsenm wrote:
Should be able to directly look at the correct subregister class elements
https://github.com/llvm/llvm-project/pull/156943
More information about the llvm-commits
mailing list