[llvm] AMDGPU: Add NextUseAnalysis Pass (PR #178873)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 31 04:37:37 PDT 2026


================
@@ -0,0 +1,2344 @@
+//===---------------------- AMDGPUNextUseAnalysis.cpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUNextUseAnalysis.h"
+#include "AMDGPU.h"
+#include "GCNRegPressure.h"
+#include "GCNSubtarget.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <queue>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-next-use-analysis"
+
+//==============================================================================
+// Options etc
+//==============================================================================
+namespace {
+
+cl::opt<bool> DumpNextUseDistance("amdgpu-next-use-analysis-dump-distance",
+                                  cl::init(false), cl::Hidden);
+
+cl::opt<std::string>
+    DumpNextUseDistanceAsJson("amdgpu-next-use-analysis-dump-distance-as-json",
+                              cl::Hidden);
+cl::opt<bool>
+    DumpNextUseDistanceVerbose("amdgpu-next-use-analysis-dump-distance-verbose",
+                               cl::init(false), cl::Hidden);
+
+cl::opt<AMDGPUNextUseAnalysis::CompatibilityMode> CompatModeOpt(
+    "amdgpu-next-use-analysis-compatibility-mode", cl::Hidden,
+    cl::init(AMDGPUNextUseAnalysis::CompatibilityMode::Graphics),
+    cl::values(clEnumValN(AMDGPUNextUseAnalysis::CompatibilityMode::Graphics,
+                          "graphics", "TBD"),
+               clEnumValN(AMDGPUNextUseAnalysis::CompatibilityMode::Compute,
+                          "compute", "TBD")));
+} // namespace
+
+//==============================================================================
+// LiveRegUse - Represents a live register use with its distance. Used for
+// tracking and sorting register uses by distance.
+//==============================================================================
+namespace {
+using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
+struct LiveRegUse : public UseDistancePair {
+  using Base = UseDistancePair;
+
+  // 'nullptr' indicates an unset/invalid state.
+  LiveRegUse() : UseDistancePair(nullptr, 0) {}
+  LiveRegUse(const MachineOperand *Use, NextUseDistance Dist)
+      : UseDistancePair(Use, Dist) {}
+  LiveRegUse(const UseDistancePair &P) : UseDistancePair(P) {}
+
+  bool isUnset() const { return Use == nullptr; }
+
+  Register getReg() const { return Use->getReg(); }
+  unsigned getSubReg() const { return Use->getSubReg(); }
+  LaneBitmask getLaneMask(const SIRegisterInfo *TRI) const {
+    return TRI->getSubRegIndexLaneMask(Use->getSubReg());
+  }
+
+  bool isCloserThan(const LiveRegUse &X) const {
+    if (Dist < X.Dist)
+      return true;
+
+    if (Dist > X.Dist)
+      return false;
+
+    if (Use == X.Use)
+      return false;
+
+    // Ugh. In computeMode PHIs and the first non-PHI instruction have id
+    // 0. In this case, consider PHIs as less than the first non-PHI
+    // instruction.
+    const MachineInstr *ThisMI = Use->getParent();
+    const MachineInstr *XMI = X.Use->getParent();
+    const MachineBasicBlock *ThisMBB = ThisMI->getParent();
+    if (ThisMBB == XMI->getParent()) {
+      bool XIsPhiOp = ThisMI->isPHI();
+      bool YIsPhiOp = XMI->isPHI();
+      if (XIsPhiOp && !YIsPhiOp && XMI == &(*ThisMBB->getFirstNonPHI()))
+        return true;
+    }
+
+    // Ensure deterministic results
+    return X.getReg() < getReg();
+  }
+};
+
+inline bool updateClosest(LiveRegUse &Closest, const LiveRegUse &X) {
+  if (!Closest.Use || X.isCloserThan(Closest)) {
+    Closest = X;
+    return true;
+  }
+  return false;
+}
+
+inline bool updateFurthest(LiveRegUse &Furthest, const LiveRegUse &X) {
+  if (!Furthest.Use || Furthest.isCloserThan(X)) {
+    Furthest = X;
+    return true;
+  }
+  return false;
+}
+} // namespace
+
+//==============================================================================
+// json helpers
+//==============================================================================
+namespace {
+template <typename Lambda>
+void printStringAttr(json::OStream &J, const char *Name, Lambda L) {
+  J.attributeBegin(Name);
+  raw_ostream &OS = J.rawValueBegin();
+  OS << '"';
+  L(OS);
+  OS << '"';
+  J.rawValueEnd();
+  J.attributeEnd();
+}
+void printStringAttr(json::OStream &J, const char *Name, Printable P) {
+  printStringAttr(J, Name, [&](raw_ostream &OS) { OS << P; });
+}
+
+void printStringAttr(json::OStream &J, const char *Name, const MachineInstr &MI,
+                     ModuleSlotTracker &MST) {
+  printStringAttr(J, Name, [&](raw_ostream &OS) {
+    MI.print(OS, MST,
+             /* IsStandalone    */ false,
+             /* SkipOpers       */ false,
+             /* SkipDebugLoc    */ false,
+             /* AddNewLine ---> */ false,
+             /* TargetInstrInfo */ nullptr);
+  });
+}
+
+void printMBBNameAttr(json::OStream &J, const char *Name,
+                      const MachineBasicBlock &MBB, ModuleSlotTracker &MST) {
+  printStringAttr(J, Name, [&](raw_ostream &OS) {
+    MBB.printName(OS, MachineBasicBlock::PrintNameIr, &MST);
+  });
+}
+
+template <typename NameLambda, typename ValueT>
+void printAttr(json::OStream &J, NameLambda NL, ValueT V) {
+  std::string Name;
+  raw_string_ostream NameOS(Name);
+  NL(NameOS);
+  J.attribute(NameOS.str(), V);
+}
+
+template <typename ValueT>
+void printAttr(json::OStream &J, const Printable &P, ValueT V) {
+  printAttr(J, [&](raw_ostream &OS) { OS << P; }, V);
+}
+
+} // namespace
+
+//==============================================================================
+// AMDGPUNextUseAnalysisImpl
+//==============================================================================
+class llvm::AMDGPUNextUseAnalysisImpl {
+  using CompatibilityMode = AMDGPUNextUseAnalysis::CompatibilityMode;
+  const MachineFunction *MF = nullptr;
+  const SIRegisterInfo *TRI = nullptr;
+  const SIInstrInfo *TII = nullptr;
+  const MachineLoopInfo *MLI = nullptr;
+  const MachineRegisterInfo *MRI = nullptr;
+
+  using InstrIdTy = unsigned;
+  using InstrToIdMap = DenseMap<const MachineInstr *, InstrIdTy>;
+  InstrToIdMap InstrToId;
+  CompatibilityMode CompatMode;
+
+  void initializeTables() {
+    for (const MachineBasicBlock &BB : *MF)
+      calcInstrIds(&BB, InstrToId);
+    initializeCfgPaths();
+    initializeInterBlockDistances();
+  }
+
+  void clearTables() {
+    InstrToId.clear();
+    RegUseMap.clear();
+    Paths.clear();
+
+    LastMI = nullptr;
+    LastDistances.clear();
+  }
+
+  bool computeMode() const { return CompatMode == CompatibilityMode::Compute; }
+
+  bool graphicsMode() const {
+    return CompatMode == CompatibilityMode::Graphics;
+  }
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Instruction Ids
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+  void calcInstrIds(const MachineBasicBlock *BB,
+                    InstrToIdMap &MutableInstrToId) const {
+    InstrIdTy Id = 0;
+    for (auto &MI : BB->instrs()) {
+      MutableInstrToId[&MI] = Id;
+      if (!computeMode() || !MI.isPHI())
+        ++Id;
+    }
+  }
+
+  /// Returns MI's instruction Id. It renumbers (part of) the BB if MI is not
+  /// found in the map.
+  InstrIdTy getInstrId(const MachineInstr *MI) const {
+    auto It = InstrToId.find(MI);
+    if (It != InstrToId.end())
+      return It->second;
+
+    // Renumber the MBB.
+    // TODO: Renumber from MI onwards.
+    auto &MutableInstrToId = const_cast<InstrToIdMap &>(InstrToId);
+    calcInstrIds(MI->getParent(), MutableInstrToId);
+    return InstrToId.find(MI)->second;
+  }
+
+  // Length of the segment from MI (inclusive) to the first instruction of the
+  // basic block.
+  InstrIdTy getHeadLen(const MachineInstr *MI) const {
+    const MachineBasicBlock *MBB = MI->getParent();
+    return getInstrId(MI) + getInstrId(&MBB->instr_front()) + 1;
+  }
+
+  // Length of the segment from MI (exclusive) to the last instruction of the
+  // basic block.
+  InstrIdTy getTailLen(const MachineInstr *MI) const {
+    const MachineBasicBlock *MBB = MI->getParent();
+    return getInstrId(&MBB->instr_back()) - getInstrId(MI);
+  }
+
+  // Length of the segment from 'From' to 'To' (exclusive). Both instructions
+  // must be in the same basic block.
+  InstrIdTy getDistance(const MachineInstr *From,
+                        const MachineInstr *To) const {
+    assert(From->getParent() == To->getParent());
+    return getInstrId(To) - getInstrId(From);
+  }
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // RegUses - cache of uses by register
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+  DenseMap<Register, SmallVector<const MachineOperand *>> RegUseMap;
+
+  const SmallVector<const MachineOperand *> &getRegisterUses(Register Reg) {
+    auto I = RegUseMap.find(Reg);
+    if (I != RegUseMap.end())
+      return I->second;
+
+    SmallVector<const MachineOperand *> &Uses = RegUseMap[Reg];
+    for (const MachineOperand &UseMO : MRI->use_nodbg_operands(Reg)) {
+      if (!UseMO.isUndef())
+        Uses.push_back(&UseMO);
+    }
+    return Uses;
+  }
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Paths
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+  class Path
+      : public std::pair<const MachineBasicBlock *, const MachineBasicBlock *> {
+  public:
+    using Base =
+        std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
+    using Base::pair;
+    Path(const Base &Pair) : Base(Pair) {};
+
+    const MachineBasicBlock *src() const { return first; }
+    const MachineBasicBlock *dst() const { return second; }
+
+    using DenseMapInfo = llvm::DenseMapInfo<Base>;
+  };
+
+  enum EdgeKind { Back = -1, None = 0, Forward = 1 };
+  struct PathInfo {
+    EdgeKind EK;
+    bool Reachable;
+    int ForwardReachable;
+    unsigned LoopExits;
+    std::optional<NextUseDistance> ShortestDistance;
+    std::optional<NextUseDistance> ShortestUnweightedDistance;
+    InstrIdTy Size;
+
+    PathInfo()
+        : EK(None), Reachable(false), ForwardReachable(-1), LoopExits(0),
+          Size(0) {}
+
+    bool isBackedge() const { return EK == EdgeKind::Back; }
+
+    bool isForwardReachableSet() const { return 0 <= ForwardReachable; }
+    bool isForwardReachableUnset() const { return ForwardReachable < 0; }
+    bool isForwardReachable() const { return ForwardReachable == 1; }
+    bool isNotForwardReachable() const { return ForwardReachable == 0; }
+  };
+
+  //----------------------------------------------------------------------------
+  // Path Storage - 'Paths' is lazily populated and some members are lazily
+  // computed. All mutations should go through one of the 'initializePathInfo*'
+  // flavors below.
+  //----------------------------------------------------------------------------
+  DenseMap<Path, PathInfo, Path::DenseMapInfo> Paths;
+
+  const PathInfo *maybePathInfoFor(const MachineBasicBlock *From,
+                                   const MachineBasicBlock *To) const {
+    auto I = Paths.find({From, To});
+    return I == Paths.end() ? nullptr : &I->second;
+  }
+
+  PathInfo &getOrInitPathInfo(const MachineBasicBlock *From,
+                              const MachineBasicBlock *To) const {
+    auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
+    auto &MutablePaths = NonConstThis->Paths;
+
+    Path P(From, To);
+    auto [I, Inserted] = MutablePaths.try_emplace(P);
+    if (!Inserted)
+      return I->second;
+
+    bool Reachable = calcIsReachable(P.src(), P.dst());
+
+    // Iterator may have been invalidated by calcIsReachable, so get a fresh
+    // reference to the slot.
+    return NonConstThis->initializePathInfo(MutablePaths.at(P), P,
+                                            EdgeKind::None, Reachable);
+  }
+
+  const PathInfo &pathInfoFor(const MachineBasicBlock *From,
+                              const MachineBasicBlock *To) const {
+    return getOrInitPathInfo(From, To);
+  }
+
+  //----------------------------------------------------------------------------
+  // initializePathInfo* - various flavors of PathInfo initialization. They
+  // (should) always funnel to the first flavor below.
+  //----------------------------------------------------------------------------
+  PathInfo &initializePathInfo(PathInfo &Slot, Path P, EdgeKind EK,
+                               bool Reachable) {
+    Slot.EK = EK;
+    Slot.Reachable = Reachable;
+    Slot.ForwardReachable = EK != EdgeKind::None ? (0 < EK) : -1;
+    Slot.LoopExits = Slot.Reachable ? calcLoopExits(P.src(), P.dst()) : 0;
+    Slot.Size = P.src() == P.dst() ? calcSize(P.src()) : 0;
+    if (EK != EdgeKind::None)
+      Slot.ShortestUnweightedDistance = 0;
+    return Slot;
+  }
+
+  PathInfo &initializePathInfo(Path P, EdgeKind EK, bool Reachable) const {
+    auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
+    auto &MutablePaths = NonConstThis->Paths;
+    return NonConstThis->initializePathInfo(MutablePaths[P], P, EK, Reachable);
+  }
+
+  std::pair<PathInfo *, bool> maybeInitializePathInfo(Path P, EdgeKind EK,
+                                                      bool Reachable) const {
+    auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
+    auto &MutablePaths = NonConstThis->Paths;
+    auto [I, Inserted] = MutablePaths.try_emplace(P);
+    if (Inserted)
+      NonConstThis->initializePathInfo(I->second, P, EK, Reachable);
+    return {&I->second, Inserted};
+  }
+
+  bool initializePathInfoForwardReachable(const MachineBasicBlock *From,
+                                          const MachineBasicBlock *To,
+                                          bool Value) const {
+    PathInfo &Slot = getOrInitPathInfo(From, To);
+    assert(Slot.isForwardReachableUnset());
+    Slot.ForwardReachable = Value;
+    return Value;
+  }
+
+  NextUseDistance
+  initializePathInfoShortestDistance(const MachineBasicBlock *From,
+                                     const MachineBasicBlock *To,
+                                     NextUseDistance Value) const {
+    PathInfo &Slot = getOrInitPathInfo(From, To);
+    assert(!Slot.ShortestDistance.has_value());
+    Slot.ShortestDistance = Value;
+    return Value;
+  }
+
+  NextUseDistance
+  initializePathInfoShortestUnweightedDistance(const MachineBasicBlock *From,
+                                               const MachineBasicBlock *To,
+                                               NextUseDistance Value) const {
+    PathInfo &Slot = getOrInitPathInfo(From, To);
+    assert(!Slot.ShortestUnweightedDistance.has_value());
+    Slot.ShortestUnweightedDistance = Value;
+    return Value;
+  }
+
+  //----------------------------------------------------------------------------
+  // initialize*Paths
+  //----------------------------------------------------------------------------
+private:
+  void initializePaths(const SmallVector<Path> &ReachablePaths,
+                       const SmallVector<Path> &UnreachablePaths) const {
+    for (bool R : {true, false}) {
+      const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
+      for (const Path &P : ToInit)
+        initializePathInfo(P, EdgeKind::None, R);
+    }
+  }
+
+  void
+  initializeForwardOnlyPaths(const SmallVector<Path> &ReachablePaths,
+                             const SmallVector<Path> &UnreachablePaths) const {
+    for (bool R : {true, false}) {
+      const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
+      for (const Path &P : ToInit) {
+        PathInfo &Slot = getOrInitPathInfo(P.src(), P.dst());
+        assert(Slot.isForwardReachableUnset() || Slot.ForwardReachable == R);
+        Slot.ForwardReachable = R;
+      }
+    }
+  }
+
+  // Follow the control flow graph starting at the entry block until all blocks
+  // have been visited. Along the way, initialize the PathInfo for each edge
+  // traversed.
+  void initializeCfgPaths() {
+    Paths.clear();
+
+    enum VisitState { Undiscovered, Visiting, Finished };
+    DenseMap<const MachineBasicBlock *, VisitState> State;
+
+    SmallVector<const MachineBasicBlock *> Work{&MF->front()};
+    State[&MF->front()] = Undiscovered;
+
+    while (!Work.empty()) {
+      const MachineBasicBlock *Src = Work.back();
+      VisitState &SrcState = State[Src];
+
+      if (SrcState == Visiting) {
+        Work.pop_back();
+        SrcState = Finished;
+        continue;
+      }
+
+      SrcState = Visiting;
+      for (const MachineBasicBlock *Dst : Src->successors()) {
+        const VisitState DstState = State.lookup(Dst);
+
+        EdgeKind EK;
+        if (DstState == Undiscovered) {
+          EK = EdgeKind::Forward;
+          Work.push_back(Dst);
+        } else if (DstState == Visiting) {
+          EK = EdgeKind::Back;
+        } else {
+          EK = EdgeKind::Forward;
+        }
+
+        Path P(Src, Dst);
+        assert(!Paths.contains(P));
+        initializePathInfo(P, EK, /*Reachable*/ true);
+      }
+    }
+  }
+
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  // Calculate features
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+  InstrIdTy calcSize(const MachineBasicBlock *BB) const {
+    InstrIdTy Size = BB->size();
+    if (computeMode())
+      Size -= std::distance(BB->begin(), BB->getFirstNonPHI());
+    return Size;
+  }
+
+  NextUseDistance calcWeightedSize(const MachineBasicBlock *From,
+                                   const MachineBasicBlock *To) const {
+    NextUseDistance Size{getSize(From)};
+    return Size.applyLoopWeight(getNumLoopExits(From, To));
+  }
+
+  static unsigned calcEffectiveLoopDepth(MachineLoop *Loop,
+                                         const MachineBasicBlock *To) {
+    unsigned LoopDepth = 0;
+    MachineLoop *const End = Loop->getOutermostLoop()->getParentLoop();
+    for (MachineLoop *L = Loop; L != End; L = L->getParentLoop()) {
+      if (!L->contains(To))
+        LoopDepth++;
+    }
+    return LoopDepth;
+  }
+
+  unsigned calcLoopExits(const MachineBasicBlock *From,
+                         const MachineBasicBlock *To) const {
+    MachineLoop *LoopFrom = MLI->getLoopFor(From);
+    MachineLoop *LoopTo = MLI->getLoopFor(To);
+
+    if (!LoopFrom)
+      return 0;
+
+    if (LoopTo && LoopFrom->contains(LoopTo)) // covers LoopFrom == LoopTo
+      return 0;
+
+    if (LoopTo && LoopTo->contains(LoopFrom))
+      return LoopFrom->getLoopDepth() - LoopTo->getLoopDepth();
+
+    return calcEffectiveLoopDepth(LoopFrom, To);
+  }
+
+  // Attempt to find a path from 'From' to 'To' using a depth first search. If
+  // 'ForwardOnly' is true, do not follow backedges. As a performance
+  // improvement, this may initialize reachable intermediate paths or paths we
+  // determine are unreachable.
+  bool calcIsReachable(const MachineBasicBlock *From,
+                       const MachineBasicBlock *To,
+                       bool ForwardOnly = false) const {
+    if (!ForwardOnly && interBlockDistanceFor(From, To))
+      return true;
+
+    if (From == To && !MLI->getLoopFor(From))
----------------
macurtis-amd wrote:

Re-ordered `if`s as suggested.

https://github.com/llvm/llvm-project/pull/178873


More information about the llvm-commits mailing list