[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

Fri Jan 9 04:27:11 PST 2026

================
@@ -121,7 +121,170 @@ bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
     return false;
   }
 
-  return false;
+  bool Changed = false;
+
+  // Can't inline anything if there aren't any calls.
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.hasCalls() && !MFI.hasTailCall())
+    return false;
+
+  // Collect calls to inline.
+  SmallVector<MachineInstr *, 4> CallsToInline;
+  const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (!MI.isCall())
+        continue;
+
+      const MachineOperand *CalleeOp =
+          TII->getNamedOperand(MI, AMDGPU::OpName::callee);
+      if (CalleeOp && CalleeOp->isGlobal()) {
+        if (auto *CalledFunc = dyn_cast<Function>(CalleeOp->getGlobal())) {
+          // Partial inlining is not supported yet, because the inlining pass
+          // manager does not run the rest of the pass pipeline on functions
+          // that get inlined (including outputting code for them).
+          if (CalledFunc == &F)
+            report_fatal_error("Recursive calls in whole wave functions are "
+                               "not supported yet");
+
+          if (shouldInlineCallsTo(*CalledFunc)) {
+            CallsToInline.push_back(&MI);
+          }
+        }
+      }
+    }
+  }
+
+  // Perform the actual inlining.
+  for (MachineInstr *CallMI : CallsToInline) {
+    const MachineOperand *CalleeOp =
+        TII->getNamedOperand(*CallMI, AMDGPU::OpName::callee);
+    assert(CalleeOp && CalleeOp->isGlobal() &&
+           isa<Function>(CalleeOp->getGlobal()));
+    auto *Callee = cast<Function>(CalleeOp->getGlobal());
+
+    MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
+    assert(CalleeMF && "Couldn't get MachineFunction for callee");
+    assert(!CalleeMF->empty() && "Machine function body is empty");
+
+    LLVM_DEBUG(dbgs() << "    Inlining machine call to: " << Callee->getName()
+                      << " (" << CalleeMF->size() << " basic blocks)\n");
+
+    inlineMachineFunction(&MF, CallMI, CalleeMF, TII);
+    cleanupAfterInlining(&MF, CallMI, TII);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+void AMDGPUMachineLevelInliner::inlineMachineFunction(MachineFunction *CallerMF,
+                                                      MachineInstr *CallMI,
+                                                      MachineFunction *CalleeMF,
+                                                      const SIInstrInfo *TII) {
+
+  MachineBasicBlock *CallMBB = CallMI->getParent();
+  MachineBasicBlock *ContinuationMBB =
+      CallMBB->splitAt(*CallMI, /*UpdateLiveIns=*/true);
+
+  // Splitting marks the ContinuationMBB as a successor, but we want to
+  // fallthrough to the body of the inlined function instead.
+  CallMBB->removeSuccessor(ContinuationMBB);
+
+  // First we clone all the blocks and build a map, so we can patch up the
+  // control flow while cloning their content in a second pass.
+  DenseMap<const MachineBasicBlock *, MachineBasicBlock *> ClonedBlocks;
+  for (const MachineBasicBlock &OrigMBB : *CalleeMF) {
+    MachineBasicBlock *ClonedMBB =
+        CallerMF->CreateMachineBasicBlock(OrigMBB.getBasicBlock());
+    CallerMF->insert(ContinuationMBB->getIterator(), ClonedMBB);
+    ClonedBlocks[&OrigMBB] = ClonedMBB;
+  }
+
+  MachineBasicBlock *ClonedEntry = ClonedBlocks[&CalleeMF->front()];
+  CallMBB->addSuccessor(ClonedEntry);
+
+  for (const MachineBasicBlock &OrigMBB : *CalleeMF) {
+    MachineBasicBlock *ClonedMBB = ClonedBlocks[&OrigMBB];
+
+    for (MachineBasicBlock *OrigSucc : OrigMBB.successors())
+      ClonedMBB->addSuccessor(ClonedBlocks[OrigSucc]);
+
+    for (auto &LiveIn : OrigMBB.liveins())
+      ClonedMBB->addLiveIn(LiveIn);
----------------
rovka wrote:

Fixed, thanks!

https://github.com/llvm/llvm-project/pull/169476