[llvm] [AMDGPU] Added hot-block-rematerialize pass (PR #136631)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu May 1 05:10:38 PDT 2025


================
@@ -0,0 +1,657 @@
+//===------- AMDGPUMIRUtils.cpp - Helpers for MIR passes ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Helper functions for MIR passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMIRUtils.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "xb-mir-util"
+using namespace llvm;
+
+namespace llvm {
+bool getNonDebugMBBEnd(MachineBasicBlock::reverse_iterator &BBEnd,
+                       MachineBasicBlock &MBB) {
+  // R.End doesn't point to the boundary instruction.
+  // Skip Debug instr.
+  while (BBEnd != MBB.rend() && BBEnd->isDebugInstr())
+    BBEnd++;
+  return BBEnd != MBB.rend();
+}
+} // namespace llvm
+
+namespace {
+bool isLocalSegment(const LiveRange::Segment *Seg, SlotIndexes *Indexes,
+                    SmallDenseSet<MachineBasicBlock *, 2> &TouchedMBBSet) {
+  MachineInstr *StartMI = Indexes->getInstructionFromIndex(Seg->start);
+  MachineInstr *EndMI = Indexes->getInstructionFromIndex(Seg->end);
+  // Treat non inst as not local.
+  if (!StartMI || !EndMI)
+    return false;
+  // is local when parent MBB the same.
+  bool IsSameMBB = StartMI->getParent() == EndMI->getParent();
+  if (!IsSameMBB)
+    return false;
+  // Collect touched MBB.
+  MachineBasicBlock *MBB = StartMI->getParent();
+  TouchedMBBSet.insert(MBB);
+  return true;
+}
+
+bool isLocalLiveRange(const LiveRange *Range, SlotIndexes *Indexes,
+                      SmallDenseSet<MachineBasicBlock *, 2> &TouchedMBBSet) {
+  for (const LiveRange::Segment &Seg : Range->segments) {
+    if (!isLocalSegment(&Seg, Indexes, TouchedMBBSet))
+      return false;
+  }
+  return true;
+}
+
+bool isLocalSegment(const LiveRange::Segment *Seg, SlotIndexes *Indexes) {
+  MachineInstr *StartMI = Indexes->getInstructionFromIndex(Seg->start);
+  MachineInstr *EndMI = Indexes->getInstructionFromIndex(Seg->end);
+  // Treat non inst as not local.
+  if (!StartMI || !EndMI)
+    return false;
+  // is local when parent MBB the same.
+  return StartMI->getParent() == EndMI->getParent();
+}
+
+bool isLocalLiveRange(const LiveRange *Range, SlotIndexes *Indexes) {
+  for (const LiveRange::Segment &Seg : Range->segments) {
+    if (!isLocalSegment(&Seg, Indexes))
+      return false;
+  }
+  return true;
+}
+
+// LoopInfo contains a mapping from basic block to the innermost loop. Find
+// the outermost loop in the loop nest that contains BB.
+const MachineLoop *getOutermostLoop(const MachineLoopInfo *LI,
+                                    const MachineBasicBlock *BB) {
+  const MachineLoop *L = LI->getLoopFor(BB);
+  if (L) {
+    while (const MachineLoop *Parent = L->getParentLoop())
+      L = Parent;
+  }
+  return L;
+}
+
+bool loopContainsBoth(const MachineLoopInfo *LI, const MachineBasicBlock *BB1,
+                      const MachineBasicBlock *BB2) {
+  const MachineLoop *L1 = getOutermostLoop(LI, BB1);
+  const MachineLoop *L2 = getOutermostLoop(LI, BB2);
+  return L1 != nullptr && L1 == L2;
+}
+
+} // namespace
+
+namespace llvm {
+
+bool isSccLiveAt(llvm::MachineBasicBlock *MBB,
+                 llvm::MachineBasicBlock::iterator MI) {
+  const TargetRegisterInfo *TRI =
+      MBB->getParent()->getRegInfo().getTargetRegisterInfo();
+  for (auto It = MI; It != MBB->end(); ++It) {
+    const MachineInstr &CurMI = *It;
+    // Hit use of scc, it is live.
+    if (CurMI.readsRegister(AMDGPU::SCC, TRI))
+      return true;
+    // Hit def of scc first, not live.
+    if (CurMI.definesRegister(AMDGPU::SCC, TRI))
+      return false;
+  }
+  // Reach the end of MBB, check live-ins of MBB successors.
+  for (const MachineBasicBlock *Succ : MBB->successors()) {
+    if (Succ->isLiveIn(AMDGPU::SCC))
+      return true;
+  }
+  return false;
+}
+
+//
+// This function is useful for when we need to insert a new
+// instruction that defines scc in a block and we need to find
+// a location that will not smash the existing value.
+//
+// Starting at `BeforeInst` it will look backwards to try to find
+// a place in the block where scc is dead so we can insert our new
+// def there. If no location can be found it will save and restore
+// scc around BeforeInst. This way BeforeInst can safely be used
+// as the new insert location.
+//
+MachineBasicBlock::iterator findOrCreateInsertionPointForSccDef(
+    MachineBasicBlock *MBB, MachineBasicBlock::iterator MI,
+    const TargetRegisterInfo *TRI, const SIInstrInfo *TII,
+    MachineRegisterInfo *MRI, SccDefInsertPointConstraintFlags Constraints) {
+  // If SCC is dead at MI when we can use MI as the insert point.
+  if (!llvm::isSccLiveAt(MBB, MI))
+    return MI;
+
+  const bool CheckForExecWrite =
+      Constraints & SccDefInsertPointConstraintFlags::NoExecWrite;
+
+  // Get the starting reverse iterator taking care to handle the MBB->end()
+  // case.
+  MachineBasicBlock::reverse_iterator Start;
+  if (MI == MBB->end())
+    Start = MBB->rbegin();
+  else
+    Start = MI.getReverse();
+
+  // Otherwise, walk backwards through the block looking for a location where
+  // SCC is dead.
+  for (MachineBasicBlock::reverse_iterator It = Start, End = MBB->rend();
+       It != End; ++It) {
+    // If the instruction modifies exec then we cannot use it as
+    // an insertion point (if that is a constraint from the caller).
+    // The check for EXEC works for both wave64 and wave32 because
+    // it will also catch Writes to the subregisters (e.g. exec_lo).
+    if (CheckForExecWrite && It->modifiesRegister(AMDGPU::EXEC, TRI))
+      break;
+
+    if (It->modifiesRegister(AMDGPU::SCC, TRI) &&
+        !It->readsRegister(AMDGPU::SCC, TRI))
+      return It->getIterator();
+  }
+
+  // If no safe location can be found in the block we can save and restore
+  // SCC around MI. There is no way to directly read or Write SCC so we use
+  // s_cselect to read the current value of SCC and s_cmp to Write the saved
+  // value back to SCC.
----------------
arsenm wrote:

Should just use COPY and let that lower to the s_cselect 

https://github.com/llvm/llvm-project/pull/136631


More information about the llvm-commits mailing list