[llvm] [AMDGPU] Register allocation anti-hints to reduce MFMA hazard NOPs (PR #156943)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 6 08:00:03 PST 2026


================
@@ -243,8 +252,136 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
   TII = ST.getInstrInfo();
   MRI = &MF.getRegInfo();
   TRI = ST.getRegisterInfo();
+  SchedModel.init(&ST);
 
   bool Changed = false;
+  // Add RA anti-hints to reduce MFMA hazard NOPs
+  if (EnableAntiHintsForMFMARegs && ST.hasMAIInsts()) {
+    // Max lookback window for RAW or WAW hazard (in instructions)
+    constexpr unsigned MaxLookbackWindow = 19;
+
+    // Per-MFMA tracking to determine anti-hint eligibility for subsequent
+    // instructions within the max lookback window.
+    struct MFMAInfo {
+      SmallVector<Register, 4> Regs;
+      unsigned InstrCount;
+      unsigned MFMALatency;
+      unsigned CumulativeLatencySinceThisMFMA;
+    };
+
+    for (const MachineBasicBlock &MBB : MF) {
+      SmallVector<MFMAInfo, 16> RecentMFMAs;
+      unsigned InstrCount = 0;
+
+      for (const MachineInstr &MI : MBB) {
+        if (MI.isDebugInstr())
+          continue;
+
+        ++InstrCount;
+        const unsigned InstrLatency = SchedModel.computeInstrLatency(&MI);
+
+        // Handle MFMA instructions
+        if (SIInstrInfo::isMFMA(MI)) {
+          SmallVector<Register, 4> MFMARegisters;
+          // Helper to get named operand
+          auto collectNamedOperand = [&](AMDGPU::OpName OpName,
+                                         const char *OpNameStr) {
+            const MachineOperand *MO = TII->getNamedOperand(MI, OpName);
+            if (!MO) {
+              LLVM_DEBUG(dbgs() << "    Named operand " << OpNameStr
+                                << " not found\n");
+              return;
+            }
+            if (MO->isReg() && MO->getReg().isVirtual()) {
+              Register Reg = MO->getReg();
+              const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+              // Only consider VGPRs
+              if (TRI->hasVGPRs(RC))
+                MFMARegisters.push_back(Reg);
+              LLVM_DEBUG(dbgs() << "    Collected " << OpNameStr << " : "
+                                << printReg(Reg, TRI) << "\n");
+            }
+          };
+
+          // Collect destination and source C (accumulator) registers
+          collectNamedOperand(AMDGPU::OpName::vdst, "vdst");
+          collectNamedOperand(AMDGPU::OpName::src2, "src2");
+          if (!MFMARegisters.empty()) {
+            RecentMFMAs.push_back(
+                {std::move(MFMARegisters), InstrCount, InstrLatency, 0u});
+            // Maintain the lookback window
+            while (!RecentMFMAs.empty() &&
+                   (InstrCount - RecentMFMAs.front().InstrCount) >
+                       MaxLookbackWindow)
+              RecentMFMAs.erase(RecentMFMAs.begin());
+          }
+          continue;
+        }
+
+        bool ShouldCheckReuse = MI.mayLoad() || MI.mayStore() || MI.isCopy() ||
+                                SIInstrInfo::isVALU(MI);
+
+        // Skip non-relevant instructions, or skip until at least one MFMA is
+        // encountered
+        if (!ShouldCheckReuse || RecentMFMAs.empty()) {
+          for (MFMAInfo &M : RecentMFMAs)
+            M.CumulativeLatencySinceThisMFMA += InstrLatency;
+          continue;
+        }
+
+        // Process operands that might reuse MFMA registers
+        const SlotIndex CurrentSlot = LIS->getInstructionIndex(MI).getRegSlot();
+
+        for (const MachineOperand &MO : MI.operands()) {
+          if (!MO.isReg() || !MO.getReg().isVirtual())
+            continue;
+
+          if (!MO.isDef())
+            continue;
+
+          const Register CandidateReg = MO.getReg();
+          const TargetRegisterClass *CandidateRC =
+              MRI->getRegClass(CandidateReg);
+
+          // Only process VGPR registers
+          if (!TRI->isVGPRClass(CandidateRC))
+            continue;
+          LLVM_DEBUG(dbgs() << "\nAdding antihints for instruction: ";
+                     MI.dump(); dbgs() << "\n");
----------------
arsenm wrote:

```suggestion
          LLVM_DEBUG(dbgs() << "\nAdding antihints for instruction: " << MI);
```

https://github.com/llvm/llvm-project/pull/156943


More information about the llvm-commits mailing list