[llvm] AMDGPU: Add NextUseAnalysis Pass (PR #178873)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 31 04:37:37 PDT 2026
================
@@ -0,0 +1,2344 @@
+//===---------------------- AMDGPUNextUseAnalysis.cpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUNextUseAnalysis.h"
+#include "AMDGPU.h"
+#include "GCNRegPressure.h"
+#include "GCNSubtarget.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <queue>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-next-use-analysis"
+
+//==============================================================================
+// Options etc
+//==============================================================================
+namespace {
+
+cl::opt<bool> DumpNextUseDistance("amdgpu-next-use-analysis-dump-distance",
+ cl::init(false), cl::Hidden);
+
+cl::opt<std::string>
+ DumpNextUseDistanceAsJson("amdgpu-next-use-analysis-dump-distance-as-json",
+ cl::Hidden);
+cl::opt<bool>
+ DumpNextUseDistanceVerbose("amdgpu-next-use-analysis-dump-distance-verbose",
+ cl::init(false), cl::Hidden);
+
+cl::opt<AMDGPUNextUseAnalysis::CompatibilityMode> CompatModeOpt(
+ "amdgpu-next-use-analysis-compatibility-mode", cl::Hidden,
+ cl::init(AMDGPUNextUseAnalysis::CompatibilityMode::Graphics),
+ cl::values(clEnumValN(AMDGPUNextUseAnalysis::CompatibilityMode::Graphics,
+ "graphics", "TBD"),
+ clEnumValN(AMDGPUNextUseAnalysis::CompatibilityMode::Compute,
+ "compute", "TBD")));
+} // namespace
+
+//==============================================================================
+// LiveRegUse - Represents a live register use with its distance. Used for
+// tracking and sorting register uses by distance.
+//==============================================================================
+namespace {
+using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
+struct LiveRegUse : public UseDistancePair {
+ using Base = UseDistancePair;
+
+ // 'nullptr' indicates an unset/invalid state.
+ LiveRegUse() : UseDistancePair(nullptr, 0) {}
+ LiveRegUse(const MachineOperand *Use, NextUseDistance Dist)
+ : UseDistancePair(Use, Dist) {}
+ LiveRegUse(const UseDistancePair &P) : UseDistancePair(P) {}
+
+ bool isUnset() const { return Use == nullptr; }
+
+ Register getReg() const { return Use->getReg(); }
+ unsigned getSubReg() const { return Use->getSubReg(); }
+ LaneBitmask getLaneMask(const SIRegisterInfo *TRI) const {
+ return TRI->getSubRegIndexLaneMask(Use->getSubReg());
+ }
+
+ bool isCloserThan(const LiveRegUse &X) const {
+ if (Dist < X.Dist)
+ return true;
+
+ if (Dist > X.Dist)
+ return false;
+
+ if (Use == X.Use)
+ return false;
+
+ // Ugh. In computeMode PHIs and the first non-PHI instruction have id
+ // 0. In this case, consider PHIs as less than the first non-PHI
+ // instruction.
+ const MachineInstr *ThisMI = Use->getParent();
+ const MachineInstr *XMI = X.Use->getParent();
+ const MachineBasicBlock *ThisMBB = ThisMI->getParent();
+ if (ThisMBB == XMI->getParent()) {
+ bool XIsPhiOp = ThisMI->isPHI();
+ bool YIsPhiOp = XMI->isPHI();
+ if (XIsPhiOp && !YIsPhiOp && XMI == &(*ThisMBB->getFirstNonPHI()))
+ return true;
+ }
+
+ // Ensure deterministic results
+ return X.getReg() < getReg();
+ }
+};
+
+inline bool updateClosest(LiveRegUse &Closest, const LiveRegUse &X) {
+ if (!Closest.Use || X.isCloserThan(Closest)) {
+ Closest = X;
+ return true;
+ }
+ return false;
+}
+
+inline bool updateFurthest(LiveRegUse &Furthest, const LiveRegUse &X) {
+ if (!Furthest.Use || Furthest.isCloserThan(X)) {
+ Furthest = X;
+ return true;
+ }
+ return false;
+}
+} // namespace
+
+//==============================================================================
+// json helpers
+//==============================================================================
+namespace {
+template <typename Lambda>
+void printStringAttr(json::OStream &J, const char *Name, Lambda L) {
+ J.attributeBegin(Name);
+ raw_ostream &OS = J.rawValueBegin();
+ OS << '"';
+ L(OS);
+ OS << '"';
+ J.rawValueEnd();
+ J.attributeEnd();
+}
+void printStringAttr(json::OStream &J, const char *Name, Printable P) {
+ printStringAttr(J, Name, [&](raw_ostream &OS) { OS << P; });
+}
+
+void printStringAttr(json::OStream &J, const char *Name, const MachineInstr &MI,
+ ModuleSlotTracker &MST) {
+ printStringAttr(J, Name, [&](raw_ostream &OS) {
+ MI.print(OS, MST,
+ /* IsStandalone */ false,
+ /* SkipOpers */ false,
+ /* SkipDebugLoc */ false,
+ /* AddNewLine ---> */ false,
+ /* TargetInstrInfo */ nullptr);
+ });
+}
+
+void printMBBNameAttr(json::OStream &J, const char *Name,
+ const MachineBasicBlock &MBB, ModuleSlotTracker &MST) {
+ printStringAttr(J, Name, [&](raw_ostream &OS) {
+ MBB.printName(OS, MachineBasicBlock::PrintNameIr, &MST);
+ });
+}
+
+template <typename NameLambda, typename ValueT>
+void printAttr(json::OStream &J, NameLambda NL, ValueT V) {
+ std::string Name;
+ raw_string_ostream NameOS(Name);
+ NL(NameOS);
+ J.attribute(NameOS.str(), V);
+}
+
+template <typename ValueT>
+void printAttr(json::OStream &J, const Printable &P, ValueT V) {
+ printAttr(J, [&](raw_ostream &OS) { OS << P; }, V);
+}
+
+} // namespace
+
+//==============================================================================
+// AMDGPUNextUseAnalysisImpl
+//==============================================================================
+class llvm::AMDGPUNextUseAnalysisImpl {
+ using CompatibilityMode = AMDGPUNextUseAnalysis::CompatibilityMode;
+ const MachineFunction *MF = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ const MachineLoopInfo *MLI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+
+ using InstrIdTy = unsigned;
+ using InstrToIdMap = DenseMap<const MachineInstr *, InstrIdTy>;
+ InstrToIdMap InstrToId;
+ CompatibilityMode CompatMode;
+
+ void initializeTables() {
+ for (const MachineBasicBlock &BB : *MF)
+ calcInstrIds(&BB, InstrToId);
+ initializeCfgPaths();
+ initializeInterBlockDistances();
+ }
+
+ void clearTables() {
+ InstrToId.clear();
+ RegUseMap.clear();
+ Paths.clear();
+
+ LastMI = nullptr;
+ LastDistances.clear();
+ }
+
+ bool computeMode() const { return CompatMode == CompatibilityMode::Compute; }
+
+ bool graphicsMode() const {
+ return CompatMode == CompatibilityMode::Graphics;
+ }
+
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ // Instruction Ids
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+ void calcInstrIds(const MachineBasicBlock *BB,
+ InstrToIdMap &MutableInstrToId) const {
+ InstrIdTy Id = 0;
+ for (auto &MI : BB->instrs()) {
+ MutableInstrToId[&MI] = Id;
+ if (!computeMode() || !MI.isPHI())
+ ++Id;
+ }
+ }
+
+ /// Returns MI's instruction Id. It renumbers (part of) the BB if MI is not
+ /// found in the map.
+ InstrIdTy getInstrId(const MachineInstr *MI) const {
+ auto It = InstrToId.find(MI);
+ if (It != InstrToId.end())
+ return It->second;
+
+ // Renumber the MBB.
+ // TODO: Renumber from MI onwards.
+ auto &MutableInstrToId = const_cast<InstrToIdMap &>(InstrToId);
+ calcInstrIds(MI->getParent(), MutableInstrToId);
+ return InstrToId.find(MI)->second;
+ }
+
+ // Length of the segment from MI (inclusive) to the first instruction of the
+ // basic block.
+ InstrIdTy getHeadLen(const MachineInstr *MI) const {
+ const MachineBasicBlock *MBB = MI->getParent();
+ return getInstrId(MI) + getInstrId(&MBB->instr_front()) + 1;
+ }
+
+ // Length of the segment from MI (exclusive) to the last instruction of the
+ // basic block.
+ InstrIdTy getTailLen(const MachineInstr *MI) const {
+ const MachineBasicBlock *MBB = MI->getParent();
+ return getInstrId(&MBB->instr_back()) - getInstrId(MI);
+ }
+
+ // Length of the segment from 'From' to 'To' (exclusive). Both instructions
+ // must be in the same basic block.
+ InstrIdTy getDistance(const MachineInstr *From,
+ const MachineInstr *To) const {
+ assert(From->getParent() == To->getParent());
+ return getInstrId(To) - getInstrId(From);
+ }
+
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ // RegUses - cache of uses by register
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+ DenseMap<Register, SmallVector<const MachineOperand *>> RegUseMap;
+
+ const SmallVector<const MachineOperand *> &getRegisterUses(Register Reg) {
+ auto I = RegUseMap.find(Reg);
+ if (I != RegUseMap.end())
+ return I->second;
+
+ SmallVector<const MachineOperand *> &Uses = RegUseMap[Reg];
+ for (const MachineOperand &UseMO : MRI->use_nodbg_operands(Reg)) {
+ if (!UseMO.isUndef())
+ Uses.push_back(&UseMO);
+ }
+ return Uses;
+ }
+
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ // Paths
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+ class Path
+ : public std::pair<const MachineBasicBlock *, const MachineBasicBlock *> {
+ public:
+ using Base =
+ std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
+ using Base::pair;
+ Path(const Base &Pair) : Base(Pair) {};
+
+ const MachineBasicBlock *src() const { return first; }
+ const MachineBasicBlock *dst() const { return second; }
+
+ using DenseMapInfo = llvm::DenseMapInfo<Base>;
+ };
+
+ enum EdgeKind { Back = -1, None = 0, Forward = 1 };
+ struct PathInfo {
+ EdgeKind EK;
+ bool Reachable;
+ int ForwardReachable;
+ unsigned LoopExits;
+ std::optional<NextUseDistance> ShortestDistance;
+ std::optional<NextUseDistance> ShortestUnweightedDistance;
+ InstrIdTy Size;
+
+ PathInfo()
+ : EK(None), Reachable(false), ForwardReachable(-1), LoopExits(0),
+ Size(0) {}
+
+ bool isBackedge() const { return EK == EdgeKind::Back; }
+
+ bool isForwardReachableSet() const { return 0 <= ForwardReachable; }
+ bool isForwardReachableUnset() const { return ForwardReachable < 0; }
+ bool isForwardReachable() const { return ForwardReachable == 1; }
+ bool isNotForwardReachable() const { return ForwardReachable == 0; }
+ };
+
+ //----------------------------------------------------------------------------
+ // Path Storage - 'Paths' is lazily populated and some members are lazily
+ // computed. All mutations should go through one of the 'initializePathInfo*'
+ // flavors below.
+ //----------------------------------------------------------------------------
+ DenseMap<Path, PathInfo, Path::DenseMapInfo> Paths;
+
+ const PathInfo *maybePathInfoFor(const MachineBasicBlock *From,
+ const MachineBasicBlock *To) const {
+ auto I = Paths.find({From, To});
+ return I == Paths.end() ? nullptr : &I->second;
+ }
+
+ PathInfo &getOrInitPathInfo(const MachineBasicBlock *From,
+ const MachineBasicBlock *To) const {
+ auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
+ auto &MutablePaths = NonConstThis->Paths;
+
+ Path P(From, To);
+ auto [I, Inserted] = MutablePaths.try_emplace(P);
+ if (!Inserted)
+ return I->second;
+
+ bool Reachable = calcIsReachable(P.src(), P.dst());
+
+ // Iterator may have been invalidated by calcIsReachable, so get a fresh
+ // reference to the slot.
+ return NonConstThis->initializePathInfo(MutablePaths.at(P), P,
+ EdgeKind::None, Reachable);
+ }
+
+ const PathInfo &pathInfoFor(const MachineBasicBlock *From,
+ const MachineBasicBlock *To) const {
+ return getOrInitPathInfo(From, To);
+ }
+
+ //----------------------------------------------------------------------------
+ // initializePathInfo* - various flavors of PathInfo initialization. They
+ // (should) always funnel to the first flavor below.
+ //----------------------------------------------------------------------------
+ PathInfo &initializePathInfo(PathInfo &Slot, Path P, EdgeKind EK,
+ bool Reachable) {
+ Slot.EK = EK;
+ Slot.Reachable = Reachable;
+ Slot.ForwardReachable = EK != EdgeKind::None ? (0 < EK) : -1;
+ Slot.LoopExits = Slot.Reachable ? calcLoopExits(P.src(), P.dst()) : 0;
+ Slot.Size = P.src() == P.dst() ? calcSize(P.src()) : 0;
+ if (EK != EdgeKind::None)
+ Slot.ShortestUnweightedDistance = 0;
+ return Slot;
+ }
+
+ PathInfo &initializePathInfo(Path P, EdgeKind EK, bool Reachable) const {
+ auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
+ auto &MutablePaths = NonConstThis->Paths;
+ return NonConstThis->initializePathInfo(MutablePaths[P], P, EK, Reachable);
+ }
+
+ std::pair<PathInfo *, bool> maybeInitializePathInfo(Path P, EdgeKind EK,
+ bool Reachable) const {
+ auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
+ auto &MutablePaths = NonConstThis->Paths;
+ auto [I, Inserted] = MutablePaths.try_emplace(P);
+ if (Inserted)
+ NonConstThis->initializePathInfo(I->second, P, EK, Reachable);
+ return {&I->second, Inserted};
+ }
+
+ bool initializePathInfoForwardReachable(const MachineBasicBlock *From,
+ const MachineBasicBlock *To,
+ bool Value) const {
+ PathInfo &Slot = getOrInitPathInfo(From, To);
+ assert(Slot.isForwardReachableUnset());
+ Slot.ForwardReachable = Value;
+ return Value;
+ }
+
+ NextUseDistance
+ initializePathInfoShortestDistance(const MachineBasicBlock *From,
+ const MachineBasicBlock *To,
+ NextUseDistance Value) const {
+ PathInfo &Slot = getOrInitPathInfo(From, To);
+ assert(!Slot.ShortestDistance.has_value());
+ Slot.ShortestDistance = Value;
+ return Value;
+ }
+
+ NextUseDistance
+ initializePathInfoShortestUnweightedDistance(const MachineBasicBlock *From,
+ const MachineBasicBlock *To,
+ NextUseDistance Value) const {
+ PathInfo &Slot = getOrInitPathInfo(From, To);
+ assert(!Slot.ShortestUnweightedDistance.has_value());
+ Slot.ShortestUnweightedDistance = Value;
+ return Value;
+ }
+
+ //----------------------------------------------------------------------------
+ // initialize*Paths
+ //----------------------------------------------------------------------------
+private:
+ void initializePaths(const SmallVector<Path> &ReachablePaths,
+ const SmallVector<Path> &UnreachablePaths) const {
+ for (bool R : {true, false}) {
+ const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
+ for (const Path &P : ToInit)
+ initializePathInfo(P, EdgeKind::None, R);
+ }
+ }
+
+ void
+ initializeForwardOnlyPaths(const SmallVector<Path> &ReachablePaths,
+ const SmallVector<Path> &UnreachablePaths) const {
+ for (bool R : {true, false}) {
+ const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
+ for (const Path &P : ToInit) {
+ PathInfo &Slot = getOrInitPathInfo(P.src(), P.dst());
+ assert(Slot.isForwardReachableUnset() || Slot.ForwardReachable == R);
+ Slot.ForwardReachable = R;
+ }
+ }
+ }
+
+ // Follow the control flow graph starting at the entry block until all blocks
+ // have been visited. Along the way, initialize the PathInfo for each edge
+ // traversed.
+ void initializeCfgPaths() {
+ Paths.clear();
+
+ enum VisitState { Undiscovered, Visiting, Finished };
+ DenseMap<const MachineBasicBlock *, VisitState> State;
+
+ SmallVector<const MachineBasicBlock *> Work{&MF->front()};
+ State[&MF->front()] = Undiscovered;
+
+ while (!Work.empty()) {
+ const MachineBasicBlock *Src = Work.back();
+ VisitState &SrcState = State[Src];
+
+ if (SrcState == Visiting) {
+ Work.pop_back();
+ SrcState = Finished;
+ continue;
+ }
+
+ SrcState = Visiting;
+ for (const MachineBasicBlock *Dst : Src->successors()) {
+ const VisitState DstState = State.lookup(Dst);
+
+ EdgeKind EK;
+ if (DstState == Undiscovered) {
+ EK = EdgeKind::Forward;
+ Work.push_back(Dst);
+ } else if (DstState == Visiting) {
+ EK = EdgeKind::Back;
+ } else {
+ EK = EdgeKind::Forward;
+ }
+
+ Path P(Src, Dst);
+ assert(!Paths.contains(P));
+ initializePathInfo(P, EK, /*Reachable*/ true);
+ }
+ }
+ }
+
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ // Calculate features
+ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+private:
+ InstrIdTy calcSize(const MachineBasicBlock *BB) const {
+ InstrIdTy Size = BB->size();
+ if (computeMode())
+ Size -= std::distance(BB->begin(), BB->getFirstNonPHI());
+ return Size;
+ }
+
+ NextUseDistance calcWeightedSize(const MachineBasicBlock *From,
+ const MachineBasicBlock *To) const {
+ NextUseDistance Size{getSize(From)};
+ return Size.applyLoopWeight(getNumLoopExits(From, To));
+ }
+
+ static unsigned calcEffectiveLoopDepth(MachineLoop *Loop,
+ const MachineBasicBlock *To) {
+ unsigned LoopDepth = 0;
+ MachineLoop *const End = Loop->getOutermostLoop()->getParentLoop();
+ for (MachineLoop *L = Loop; L != End; L = L->getParentLoop()) {
+ if (!L->contains(To))
+ LoopDepth++;
+ }
+ return LoopDepth;
+ }
+
+ unsigned calcLoopExits(const MachineBasicBlock *From,
+ const MachineBasicBlock *To) const {
+ MachineLoop *LoopFrom = MLI->getLoopFor(From);
+ MachineLoop *LoopTo = MLI->getLoopFor(To);
+
+ if (!LoopFrom)
+ return 0;
+
+ if (LoopTo && LoopFrom->contains(LoopTo)) // covers LoopFrom == LoopTo
+ return 0;
+
+ if (LoopTo && LoopTo->contains(LoopFrom))
+ return LoopFrom->getLoopDepth() - LoopTo->getLoopDepth();
+
+ return calcEffectiveLoopDepth(LoopFrom, To);
+ }
+
+ // Attempt to find a path from 'From' to 'To' using a depth first search. If
+ // 'ForwardOnly' is true, do not follow backedges. As a performance
+ // improvement, this may initialize reachable intermediate paths or paths we
+ // determine are unreachable.
+ bool calcIsReachable(const MachineBasicBlock *From,
+ const MachineBasicBlock *To,
+ bool ForwardOnly = false) const {
+ if (!ForwardOnly && interBlockDistanceFor(From, To))
+ return true;
+
+ if (From == To && !MLI->getLoopFor(From))
----------------
macurtis-amd wrote:
Re-ordered `if`s as suggested.
https://github.com/llvm/llvm-project/pull/178873
More information about the llvm-commits
mailing list