[llvm] [AMDGPU] Register allocation anti-hints to reduce MFMA hazard NOPs (PR #156943)

Fri Mar 6 08:00:42 PST 2026

================
@@ -3896,6 +3901,130 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
   }
 }
 
+bool SIRegisterInfo::shouldApplyAntiHints(Register VirtReg,
+                                          const MachineFunction &MF,
+                                          SmallVector<MCPhysReg, 16> &AntiHints,
+                                          const VirtRegMap *VRM,
+                                          unsigned NumAllocatedVGPRs) const {
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
+  unsigned TargetOccupancy = MFI->getOccupancy();
+  unsigned CurrentOccupancy =
+      ST.getOccupancyWithNumVGPRs(NumAllocatedVGPRs, DynamicVGPRBlockSize);
+
+  // If we are already at lowest occupancy, then there is no need to protect
+  // against occupancy regression.
+  if (CurrentOccupancy == 1)
+    return true;
+
+  // Set max VGPRs for target and current occupancy to early bail out if we are
+  // close to the limit.
+  unsigned MaxVGPRForTargetOccupancy =
+      ST.getMaxNumVGPRs(TargetOccupancy, DynamicVGPRBlockSize);
+  unsigned MaxVgprsForCurrentOccupancy =
+      ST.getMaxNumVGPRs(CurrentOccupancy, DynamicVGPRBlockSize);
+  unsigned MaxVGPRsCutOffForTargetOccupancy =
+      (MaxVGPRForTargetOccupancy * 80) / 100;
+  unsigned MaxVGPRsCutOffForCurrentOccupancy =
+      (MaxVgprsForCurrentOccupancy * 95) / 100;
+
+  // Early bail out if we are close to the limit for target occupancy.
+  if (NumAllocatedVGPRs >= MaxVGPRsCutOffForTargetOccupancy)
+    return false;
+
+  // Bail out if we are close to the limit for current occupancy.
+  if (NumAllocatedVGPRs >= MaxVGPRsCutOffForCurrentOccupancy)
+    return false;
+
+  // Safe to apply anti-hints
+  return true;
+}
+
+void SIRegisterInfo::applyRegAllocationAntiHints(
+    Register VirtReg, ArrayRef<MCPhysReg> &Order,
+    SmallVectorImpl<MCPhysReg> &OrderStorage,
+    SmallVector<MCPhysReg, 16> &AntiHints, const MachineFunction &MF,
+    const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
+
+  // Early exit to default order if we have no anti-hints or no VRM.
+  if (AntiHints.empty() || !VRM)
+    return;
+
+  // Get total number of allocated VGPRs to determine the current occupancy.
+  unsigned NumAllocatedVGPRs = 0;
+  unsigned NumVGPRs = 0;
+  unsigned NumAGPRs = 0;
+  if (Matrix) {
+    for (MCPhysReg Reg : AMDGPU::VGPR_32RegClass)
+      if (Matrix->isPhysRegUsed(Reg))
+        NumVGPRs = std::max(NumVGPRs, getHWRegIndex(Reg) + 1);
+    for (MCPhysReg Reg : AMDGPU::AGPR_32RegClass)
+      if (Matrix->isPhysRegUsed(Reg))
+        NumAGPRs = std::max(NumAGPRs, getHWRegIndex(Reg) + 1);
+  }
+  NumAllocatedVGPRs =
+      AMDGPU::getTotalNumVGPRs(ST.hasGFX90AInsts(), NumAGPRs, NumVGPRs);
+
+  // Early exit if we should not apply anti-hints.
+  if (!shouldApplyAntiHints(VirtReg, MF, AntiHints, VRM, NumAllocatedVGPRs))
+    return;
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
+  unsigned CurrentOccupancy =
+      ST.getOccupancyWithNumVGPRs(NumAllocatedVGPRs, DynamicVGPRBlockSize);
+  unsigned MaxVGPRsForCurrentOccupancy =
+      ST.getMaxNumVGPRs(CurrentOccupancy, DynamicVGPRBlockSize);
+
+  // Returns true if Reg fits within the current occupancy VGPR budget.
+  auto IsWithinBudget = [&](MCPhysReg Reg) -> bool {
+    unsigned HighestVGPR = 0;
+    bool IsVGPR = false;
+    for (MCPhysReg SubReg : subregs_inclusive(Reg)) {
+      if (AMDGPU::VGPR_32RegClass.contains(SubReg) ||
----------------
arsenm wrote:

Should be able to directly look at the correct subregister class elements 

https://github.com/llvm/llvm-project/pull/156943