[llvm] [AMDGPU] Register allocation anti-hints to reduce MFMA hazard NOPs (PR #156943)

Wed Sep 24 02:38:40 PDT 2025

================
@@ -248,6 +254,84 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
 
   bool Changed = false;
 
+  // Single pass implementation
+  if (EnableAntiHintsForMFMARegs && ST.hasMAIInsts()) {
+    // Max lookback window for RAW or WAW hazard
+    constexpr unsigned MaxLookbackWindow = 19;
+    for (const MachineBasicBlock &MBB : MF) {
+
+      SmallVector<std::pair<SlotIndex, SmallVector<Register, 4>>, 16>
+          RecentMFMAs;
+      for (const MachineInstr &MI : MBB) {
+        if (MI.isDebugInstr())
+          continue;
+        const SlotIndex CurrentSlot = LIS->getInstructionIndex(MI).getRegSlot();
+        // Handle MFMA instructions
+        if (SIInstrInfo::isMFMA(MI)) {
+          SmallVector<Register, 4> MFMARegisters;
+          auto collectMFMARegister = [&](unsigned OpIdx) {
+            if (OpIdx >= MI.getNumOperands())
+              return;
+
+            const MachineOperand &MO = MI.getOperand(OpIdx);
+            if (MO.isReg() && MO.getReg().isVirtual())
+              MFMARegisters.push_back(MO.getReg());
+          };
+          // Only collect Matrix C (operand 3) and destination (operand 0)
+          // registers
+          collectMFMARegister(0);
+          collectMFMARegister(3);
----------------
rovka wrote:

I haven't looked at all the MFMA instruction definitions, but if they all have consistent names for their operands you could use `getNamedOperand` instead of hardcoding the operand indices.

https://github.com/llvm/llvm-project/pull/156943