[llvm] [AMDGPU] Introduce Next-Use Analysis for SSA-based Register Allocation (PR #156079)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 29 12:16:32 PDT 2025
https://github.com/alex-t updated https://github.com/llvm/llvm-project/pull/156079
>From dc33189687267b37b16238c356acaf2d58db6eb3 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Fri, 29 Aug 2025 16:08:40 +0000
Subject: [PATCH 1/4] AMDGPU SSA RA: Next Use Analysis pass added
---
.../Target/AMDGPU/AMDGPUNextUseAnalysis.cpp | 436 ++++++++++++++++++
.../lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h | 316 +++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUSSARAUtils.h | 69 +++
llvm/lib/Target/AMDGPU/VRegMaskPair.h | 403 ++++++++++++++++
.../CodeGen/AMDGPU/NextUseAnalysis/README.md | 33 ++
.../NextUseAnalysis/basic-distances.mir | 58 +++
.../AMDGPU/NextUseAnalysis/dead-registers.mir | 28 ++
.../NextUseAnalysis/multiblock-distances.mir | 37 ++
.../NextUseAnalysis/subreg-distances.mir | 29 ++
.../NextUseAnalysis/subreg-interference.mir | 39 ++
10 files changed, 1448 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUSSARAUtils.h
create mode 100644 llvm/lib/Target/AMDGPU/VRegMaskPair.h
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/README.md
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
new file mode 100644
index 0000000000000..0c2feca1e7d8f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
@@ -0,0 +1,436 @@
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Support/Timer.h"
+
+#include "AMDGPU.h"
+
+#include "AMDGPUNextUseAnalysis.h"
+
+#define DEBUG_TYPE "amdgpu-next-use"
+
+using namespace llvm;
+
+//namespace {
+
+
+void NextUseResult::init(const MachineFunction &MF) {
+ TG = new TimerGroup("Next Use Analysis",
+ "Compilation Timers for Next Use Analysis");
+ T1 = new Timer("Next Use Analysis", "Time spent in analyse()", *TG);
+ T2 = new Timer("Next Use Analysis", "Time spent in computeNextUseDistance()",
+ *TG);
+ for (auto L : LI->getLoopsInPreorder()) {
+ SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Exiting;
+ L->getExitEdges(Exiting);
+ for (auto P : Exiting) {
+ LoopExits[P.first->getNumber()] = P.second->getNumber();
+ }
+ }
+}
+
+void NextUseResult::analyze(const MachineFunction &MF) {
+ // Upward-exposed distances are only necessary to convey the data flow from
+ // the block to its predecessors. No need to store it beyond the analyze
+ // function as the analysis users are only interested in the use distances
+ // relatively to the given MI or the given block end.
+ DenseMap<unsigned, VRegDistances> UpwardNextUses;
+ T1->startTimer();
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (auto MBB : post_order(&MF)) {
+ unsigned MBBNum = MBB->getNumber();
+ VRegDistances Curr, Prev;
+ if (UpwardNextUses.contains(MBBNum)) {
+ Prev = UpwardNextUses[MBBNum];
+ }
+
+ LLVM_DEBUG(dbgs() << "\nMerging successors for " << "MBB_"
+ << MBB->getNumber() << "." << MBB->getName() << "\n";);
+
+ for (auto Succ : successors(MBB)) {
+ unsigned SuccNum = Succ->getNumber();
+
+ if (!UpwardNextUses.contains(SuccNum))
+ continue;
+
+ VRegDistances SuccDist = UpwardNextUses[SuccNum];
+ LLVM_DEBUG(dbgs() << "\nMerging " << "MBB_" << Succ->getNumber() << "."
+ << Succ->getName() << "\n");
+
+ // Check if the edge from MBB to Succ goes out of the Loop
+ unsigned Weight = 0;
+ if (LoopExits.contains(MBB->getNumber())) {
+ int SuccNum = LoopExits[MBB->getNumber()];
+ if (Succ->getNumber() == SuccNum)
+ Weight = Infinity;
+ }
+
+ if (LI->getLoopDepth(MBB) < LI->getLoopDepth(Succ)) {
+ // MBB->Succ is entering the Succ's loop
+ // Clear out the Loop-Exiting wights.
+ for (auto &P : SuccDist) {
+ auto &Dists = P.second;
+ for (auto R : Dists) {
+ if (R.second >= Infinity) {
+ std::pair<LaneBitmask, unsigned> New = R;
+ New.second -= Infinity;
+ Dists.erase(R);
+ Dists.insert(New);
+ }
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "\nCurr: "; printVregDistances(Curr);
+ dbgs() << "\nSucc: "; printVregDistances(SuccDist));
+
+ Curr.merge(SuccDist, Weight);
+ LLVM_DEBUG(dbgs() << "\nCurr after merge: "; printVregDistances(Curr));
+ // Now take care of the PHIs operands in the Succ
+ for (auto &PHI : Succ->phis()) {
+ for (auto &U : PHI.uses()) {
+ if (U.isReg()) {
+ auto OpNo = U.getOperandNo();
+ auto B = PHI.getOperand(++OpNo);
+ assert(B.isMBB());
+ MachineBasicBlock *ValueSrc = B.getMBB();
+ if (ValueSrc->getNumber() == MBB->getNumber()) {
+ // We assume that all the PHIs have zero distance from the
+ // succ end!
+ Curr.insert(VRegMaskPair(U, TRI, MRI), 0);
+ }
+ }
+ }
+ for (auto &U : PHI.defs()) {
+ Curr.clear(VRegMaskPair(U, TRI, MRI));
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "\nCurr after succsessors processing: ";
+ printVregDistances(Curr));
+ NextUseMap[MBBNum].Bottom = Curr;
+
+ for (auto &MI : make_range(MBB->rbegin(), MBB->rend())) {
+
+ if (MI.isPHI())
+ // We'll take care of PHIs when merging this block to it's
+ // predecessor.
+ continue;
+
+ // TODO: Compute distances in some modifiable container and copy to
+ // the std::set once when ready in one loop!
+ for (auto &P : Curr) {
+ VRegDistances::SortedRecords Tmp;
+ for (auto D : P.second)
+ Tmp.insert({D.first, ++D.second});
+ P.second = Tmp;
+ }
+
+ for (auto &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ VRegMaskPair P(MO, TRI, MRI);
+ if (MO.isUse()) {
+ Curr.insert(P, 0);
+ UsedInBlock[MBB->getNumber()].insert(P);
+ } else if (MO.isDef()) {
+ Curr.clear(P);
+ UsedInBlock[MBB->getNumber()].remove(P);
+ }
+ }
+ }
+ NextUseMap[MBBNum].InstrDist[&MI] = Curr;
+ }
+
+ LLVM_DEBUG(dbgs() << "\nFinal distances for MBB_" << MBB->getNumber()
+ << "." << MBB->getName() << "\n";
+ printVregDistances(Curr));
+ LLVM_DEBUG(dbgs() << "\nPrevious distances for MBB_" << MBB->getNumber()
+ << "." << MBB->getName() << "\n";
+ printVregDistances(Prev));
+ UpwardNextUses[MBBNum] = std::move(Curr);
+
+ bool Changed4MBB = (Prev != UpwardNextUses[MBBNum]);
+
+ Changed |= Changed4MBB;
+ }
+ }
+ dumpUsedInBlock();
+ // Dump complete analysis results for testing
+ LLVM_DEBUG(dumpAllNextUseDistances(MF));
+ T1->stopTimer();
+ LLVM_DEBUG(TG->print(llvm::errs()));
+ }
+
+void NextUseResult::getFromSortedRecords(
+ const VRegDistances::SortedRecords Dists, LaneBitmask Mask, unsigned &D) {
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(Mask) <<"]\n");
+ for (auto P : Dists) {
+ // Records are sorted in distance increasing order. So, the first record
+ // is for the closest use.
+ LaneBitmask UseMask = P.first;
+ LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask) << "]\n");
+ if ((UseMask & Mask) == UseMask) {
+ D = P.second;
+ break;
+ }
+ }
+}
+
+SmallVector<VRegMaskPair>
+NextUseResult::getSortedSubregUses(const MachineBasicBlock::iterator I,
+ const VRegMaskPair VMP) {
+ SmallVector<VRegMaskPair> Result;
+ const MachineBasicBlock *MBB = I->getParent();
+ unsigned MBBNum = MBB->getNumber();
+ if (NextUseMap.contains(MBBNum) &&
+ NextUseMap[MBBNum].InstrDist.contains(&*I)) {
+ // VRegDistances Dists = NextUseMap[MBBNum].InstrDist[&*I];
+ if (NextUseMap[MBBNum].InstrDist[&*I].contains(VMP.getVReg())) {
+ VRegDistances::SortedRecords Dists =
+ NextUseMap[MBBNum].InstrDist[&*I][VMP.getVReg()];
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n");
+ for (auto P : reverse(Dists)) {
+ LaneBitmask UseMask = P.first;
+ LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask)
+ << "]\n");
+ if ((UseMask & VMP.getLaneMask()) == UseMask) {
+ Result.push_back({VMP.getVReg(), UseMask});
+ }
+ }
+ }
+ }
+ return Result;
+}
+
+SmallVector<VRegMaskPair>
+NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
+ const VRegMaskPair VMP) {
+ SmallVector<VRegMaskPair> Result;
+ unsigned MBBNum = MBB.getNumber();
+ if (NextUseMap.contains(MBBNum) &&
+ NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
+ VRegDistances::SortedRecords Dists = NextUseMap[MBBNum].Bottom[VMP.getVReg()];
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n");
+ for (auto P : reverse(Dists)) {
+ LaneBitmask UseMask = P.first;
+ LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask) << "]\n");
+ if ((UseMask & VMP.getLaneMask()) == UseMask) {
+ Result.push_back({VMP.getVReg(), UseMask});
+ }
+ }
+ }
+ return Result;
+}
+
+void NextUseResult::dumpUsedInBlock() {
+ LLVM_DEBUG(for (auto P
+ : UsedInBlock) {
+ dbgs() << "MBB_" << P.first << ":\n";
+ for (auto VMP : P.second) {
+ dbgs() << "[ " << printReg(VMP.getVReg()) << " : <"
+ << PrintLaneMask(VMP.getLaneMask()) << "> ]\n";
+ }
+ });
+}
+
+unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock::iterator I,
+ const VRegMaskPair VMP) {
+ unsigned Dist = Infinity;
+ const MachineBasicBlock *MBB = I->getParent();
+ unsigned MBBNum = MBB->getNumber();
+ if (NextUseMap.contains(MBBNum) &&
+ NextUseMap[MBBNum].InstrDist.contains(&*I)) {
+ VRegDistances Dists = NextUseMap[MBBNum].InstrDist[&*I];
+ if (NextUseMap[MBBNum].InstrDist[&*I].contains(VMP.getVReg())) {
+ // printSortedRecords(Dists[VMP.VReg], VMP.VReg);
+ getFromSortedRecords(Dists[VMP.getVReg()], VMP.getLaneMask(), Dist);
+ }
+ }
+
+ return Dist;
+}
+
+unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
+ const VRegMaskPair VMP) {
+ unsigned Dist = Infinity;
+ unsigned MBBNum = MBB.getNumber();
+ if (NextUseMap.contains(MBBNum)) {
+ if (NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
+ getFromSortedRecords(NextUseMap[MBBNum].Bottom[VMP.getVReg()], VMP.getLaneMask(),
+ Dist);
+ }
+ }
+ return Dist;
+}
+
+AMDGPUNextUseAnalysis::Result
+AMDGPUNextUseAnalysis::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ return AMDGPUNextUseAnalysis::Result(MF,
+ MFAM.getResult<SlotIndexesAnalysis>(MF),
+ MFAM.getResult<MachineLoopAnalysis>(MF));
+}
+
+AnalysisKey AMDGPUNextUseAnalysis::Key;
+
+//} // namespace
+
+extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo
+llvmGetPassPluginInfo() {
+ return {LLVM_PLUGIN_API_VERSION, "AMDGPUNextUseAnalysisPass",
+ LLVM_VERSION_STRING, [](PassBuilder &PB) {
+ PB.registerAnalysisRegistrationCallback(
+ [](MachineFunctionAnalysisManager &MFAM) {
+ MFAM.registerPass([] { return AMDGPUNextUseAnalysis(); });
+ });
+ }};
+}
+
+char AMDGPUNextUseAnalysisWrapper::ID = 0;
+char &llvm::AMDGPUNextUseAnalysisID = AMDGPUNextUseAnalysisWrapper::ID;
+INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisWrapper, "amdgpu-next-use",
+ "AMDGPU Next Use Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUNextUseAnalysisWrapper, "amdgpu-next-use",
+ "AMDGPU Next Use Analysis", false, false)
+
+bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(
+ MachineFunction &MF) {
+ NU.Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
+ NU.LI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ NU.MRI = &MF.getRegInfo();
+ NU.TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ assert(NU.MRI->isSSA());
+ NU.init(MF);
+ NU.analyze(MF);
+// LLVM_DEBUG(NU.dump());
+ return false;
+}
+
+void AMDGPUNextUseAnalysisWrapper::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineLoopInfoWrapperPass>();
+ AU.addRequired<SlotIndexesWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AMDGPUNextUseAnalysisWrapper::AMDGPUNextUseAnalysisWrapper()
+ : MachineFunctionPass(ID) {
+ initializeAMDGPUNextUseAnalysisWrapperPass(*PassRegistry::getPassRegistry());
+}
+void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "=== NextUseAnalysis Results for " << MF.getName() << " ===\n");
+
+ for (const auto &MBB : MF) {
+ unsigned MBBNum = MBB.getNumber();
+ LLVM_DEBUG(dbgs() << "\n--- MBB_" << MBBNum << " ---\n");
+
+ if (!NextUseMap.contains(MBBNum)) {
+ LLVM_DEBUG(dbgs() << " No analysis data for this block\n");
+ continue;
+ }
+
+ const NextUseInfo &Info = NextUseMap.at(MBBNum);
+
+ // Process each instruction in the block
+ for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
+ const MachineInstr &MI = *II;
+
+ // Print instruction
+ LLVM_DEBUG(dbgs() << " Instr: ");
+ LLVM_DEBUG(MI.print(dbgs(), /*IsStandalone=*/false, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/true, /*AddNewLine=*/false));
+ LLVM_DEBUG(dbgs() << "\n");
+
+ // Print distances at this instruction
+ if (Info.InstrDist.contains(&MI)) {
+ const VRegDistances &Dists = Info.InstrDist.at(&MI);
+ LLVM_DEBUG(dbgs() << " Next-use distances:\n");
+
+ for (const auto &VRegEntry : Dists) {
+ unsigned VReg = VRegEntry.getFirst();
+ const auto &Records = VRegEntry.getSecond();
+
+ for (const auto &Record : Records) {
+ LaneBitmask LaneMask = Record.first;
+ unsigned Distance = Record.second;
+
+ LLVM_DEBUG(dbgs() << " ");
+
+ // Print register with sub-register if applicable
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask != FullMask) {
+ unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
+ LLVM_DEBUG(dbgs() << printReg(VReg, TRI, SubRegIdx, MRI));
+ } else {
+ LLVM_DEBUG(dbgs() << printReg(VReg, TRI));
+ }
+
+ if (Distance == Infinity) {
+ LLVM_DEBUG(dbgs() << " -> DEAD (infinite distance)\n");
+ } else {
+ LLVM_DEBUG(dbgs() << " -> " << Distance << " instructions\n");
+ }
+ }
+ }
+
+ if (Dists.size() == 0) {
+ LLVM_DEBUG(dbgs() << " (no register uses)\n");
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << " (no distance data)\n");
+ }
+ }
+
+ // Print distances at end of block
+ LLVM_DEBUG(dbgs() << " Block End Distances:\n");
+ for (const auto &VRegEntry : Info.Bottom) {
+ unsigned VReg = VRegEntry.getFirst();
+ const auto &Records = VRegEntry.getSecond();
+
+ for (const auto &Record : Records) {
+ LaneBitmask LaneMask = Record.first;
+ unsigned Distance = Record.second;
+
+ LLVM_DEBUG(dbgs() << " ");
+
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask != FullMask) {
+ unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
+ LLVM_DEBUG(dbgs() << printReg(VReg, TRI, SubRegIdx, MRI));
+ } else {
+ LLVM_DEBUG(dbgs() << printReg(VReg, TRI));
+ }
+
+ if (Distance == Infinity) {
+ LLVM_DEBUG(dbgs() << " -> DEAD\n");
+ } else {
+ LLVM_DEBUG(dbgs() << " -> " << Distance << "\n");
+ }
+ }
+ }
+
+ if (Info.Bottom.size() == 0) {
+ LLVM_DEBUG(dbgs() << " (no registers live at block end)\n");
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "\n=== End NextUseAnalysis Results ===\n");
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
new file mode 100644
index 0000000000000..101ee7640a0bd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
@@ -0,0 +1,316 @@
+//===- AMDGPUNextUseAnalysis.h ----------------------------------------*- C++-
+//*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_NEXT_USE_ANALYSIS_H
+#define LLVM_LIB_TARGET_AMDGPU_NEXT_USE_ANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+#include "SIRegisterInfo.h"
+#include "GCNSubtarget.h"
+#include "AMDGPUSSARAUtils.h"
+#include "VRegMaskPair.h"
+
+#include <algorithm>
+#include <limits>
+#include <set>
+
+using namespace llvm;
+
+// namespace {
+
+
+class NextUseResult {
+ friend class AMDGPUNextUseAnalysisWrapper;
+ SlotIndexes *Indexes;
+ const MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI;
+ MachineLoopInfo *LI;
+
+ TimerGroup *TG;
+ Timer *T1;
+ Timer *T2;
+
+ class VRegDistances {
+
+ using Record = std::pair<LaneBitmask, unsigned>;
+ struct CompareByDist {
+ bool operator()(const Record &LHS, const Record &RHS) const {
+ if (LHS.first ==
+ RHS.first) // Same LaneBitmask → prefer furthest distance
+ return LHS.second > RHS.second;
+ return LHS.first.getAsInteger() <
+ RHS.first.getAsInteger(); // Otherwise sort by LaneBitmask so
+ // that smaller Mask first
+ }
+ };
+
+public:
+ using SortedRecords = std::set<Record, CompareByDist>;
+ private:
+ DenseMap<unsigned, SortedRecords> NextUseMap;
+
+ public:
+ auto begin() { return NextUseMap.begin(); }
+ auto end() { return NextUseMap.end(); }
+
+ auto begin() const { return NextUseMap.begin(); }
+ auto end() const { return NextUseMap.end(); }
+
+ size_t size() const { return NextUseMap.size(); }
+ std::pair<bool, SortedRecords> get(unsigned Key) const {
+ if (NextUseMap.contains(Key))
+ return {true, NextUseMap.find(Key)->second};
+ return {false, SortedRecords()};
+ }
+
+ SortedRecords &operator[](unsigned Key) { return NextUseMap[Key]; }
+
+ SmallVector<unsigned> keys() {
+ SmallVector<unsigned> Keys;
+ for (auto P : NextUseMap)
+ Keys.push_back(P.first);
+ return Keys;
+ }
+
+ bool contains(unsigned Key) {
+ return NextUseMap.contains(Key);
+ }
+
+ bool insert(VRegMaskPair VMP, unsigned Dist) {
+ Record R(VMP.getLaneMask(), Dist);
+ if (NextUseMap.contains(VMP.getVReg())) {
+ SortedRecords &Dists = NextUseMap[VMP.getVReg()];
+
+ if (!Dists.contains(R)) {
+ for (auto D : Dists) {
+ if (D.first == R.first) {
+ if (D.second > R.second) {
+ // Change to record with less distance
+ Dists.erase(D);
+ return Dists.insert(R).second;
+ } else {
+ return false;
+ }
+ }
+ }
+ // add new record
+ return Dists.insert(R).second;
+ } else {
+ // record already exists!
+ return false;
+ }
+ } else
+ return NextUseMap[VMP.getVReg()].insert(R).second;
+ }
+
+ void clear(VRegMaskPair VMP) {
+ if (NextUseMap.contains(VMP.getVReg())) {
+ auto &Dists = NextUseMap[VMP.getVReg()];
+ std::erase_if(Dists,
+ [&](Record R) { return (R.first &= ~VMP.getLaneMask()).none(); });
+ if (Dists.empty())
+ NextUseMap.erase(VMP.getVReg());
+ }
+ }
+
+ bool operator == (const VRegDistances Other) const {
+
+ if (Other.size() != size())
+ return false;
+
+ for (auto P : NextUseMap) {
+
+ std::pair<bool, SortedRecords> OtherDists = Other.get(P.getFirst());
+ if (!OtherDists.first)
+ return false;
+ SortedRecords &Dists = P.getSecond();
+
+ if (Dists.size() != OtherDists.second.size())
+ return false;
+
+ for (auto R : OtherDists.second) {
+ SortedRecords::iterator I = Dists.find(R);
+ if (I == Dists.end())
+ return false;
+ if (R.second != I->second)
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool operator!=(const VRegDistances &Other) const {
+ return !operator==(Other);
+ }
+
+ void merge(const VRegDistances &Other, unsigned Weight = 0) {
+ for (const auto &P : Other) {
+ unsigned Key = P.getFirst();
+ const auto &OtherDists = P.getSecond();
+ auto &MineDists = NextUseMap[Key]; // creates empty if not present
+
+ for (const auto &D : OtherDists) {
+ Record Adjusted = {D.first, D.second + Weight};
+
+ // Try to find existing record with the same LaneBitmask
+ auto It =
+ std::find_if(MineDists.begin(), MineDists.end(),
+ [&](const Record &R) { return R.first == D.first; });
+
+ if (It == MineDists.end()) {
+ // No record → insert
+ MineDists.insert(Adjusted);
+ } else if (It->second > Adjusted.second) {
+ // Furthest wins (adjusted is more distant) → replace
+ MineDists.erase(It);
+ MineDists.insert(Adjusted);
+ }
+ }
+ }
+ }
+ };
+ class NextUseInfo {
+ // FIXME: need to elaborate proper class interface!
+ public:
+ VRegDistances Bottom;
+ DenseMap<const MachineInstr *, VRegDistances> InstrDist;
+ };
+
+ DenseMap<unsigned, NextUseInfo> NextUseMap;
+
+public:
+
+
+private:
+ DenseMap<unsigned, SetVector<VRegMaskPair>> UsedInBlock;
+ DenseMap<int, int> LoopExits;
+ const uint16_t Infinity = std::numeric_limits<unsigned short>::max();
+ void init(const MachineFunction &MF);
+ void analyze(const MachineFunction &MF);
+ LLVM_ATTRIBUTE_NOINLINE void
+ printSortedRecords(VRegDistances::SortedRecords Records, unsigned VReg,
+ raw_ostream &O = dbgs()) const {
+ for (auto X : Records) {
+ O << "Vreg: ";
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (X.first != FullMask) {
+ unsigned SubRegIdx = getSubRegIndexForLaneMask(X.first, TRI);
+ O << printReg(VReg, TRI, SubRegIdx, MRI) << "[ " << X.second << "]\n";
+ } else
+ O << printReg(VReg) << "[ " << X.second << "]\n";
+ }
+ }
+
+ LLVM_ATTRIBUTE_NOINLINE
+ void printVregDistances(const VRegDistances &D,
+ raw_ostream &O = dbgs()) const {
+ O << "\n";
+ for (auto P : D) {
+ printSortedRecords(P.second, P.first);
+ }
+ }
+
+ void clear() {
+ NextUseMap.clear();
+ LoopExits.clear();
+ }
+
+public:
+ NextUseResult() = default;
+ NextUseResult(const MachineFunction &MF, SlotIndexes &SI, MachineLoopInfo &LI)
+ : Indexes(&SI), MRI(&MF.getRegInfo()), LI(&LI) {
+ init(MF);
+ analyze(MF);
+ }
+ ~NextUseResult() { clear(); }
+
+ // void print(raw_ostream &O) const { dump(O); }
+
+ unsigned getNextUseDistance(const MachineBasicBlock &MBB,
+ const VRegMaskPair VMP);
+ unsigned getNextUseDistance(const MachineBasicBlock::iterator I,
+ const VRegMaskPair VMP);
+ void getFromSortedRecords(const VRegDistances::SortedRecords Dists,
+ LaneBitmask Mask, unsigned &D);
+
+ SmallVector<VRegMaskPair>
+ getSortedSubregUses(const MachineBasicBlock::iterator I,
+ const VRegMaskPair VMP);
+
+ SmallVector<VRegMaskPair>
+ getSortedSubregUses(const MachineBasicBlock &MBB,
+ const VRegMaskPair VMP);
+
+ bool isDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ const VRegMaskPair VMP) {
+ if (!VMP.getVReg().isVirtual())
+ report_fatal_error("Only virtual registers allowed!\n", true);
+ // FIXME: We use the same Infinity value to indicate both invalid distance
+ // and too long for out of block values. It is okay if the use out of block
+ // is at least one instruction further then the end of loop exit. In this
+ // case we have a distance Infinity + 1 and hence register is not considered
+ // dead. What if the register is defined by the last instruction in the loop
+ // exit block and out of loop use is in PHI? By design the dist of all PHIs
+ // from the beginning of block are ZERO and hence the distance of
+ // out-of-the-loop use will be exactly Infinity So, the register will be
+ // mistakenly considered DEAD! On another hand, any predecessor of the block
+ // containing PHI must have a branch as the last instruction. In this case
+ // the current design works.
+ return I == MBB.end() ? getNextUseDistance(MBB, VMP) == Infinity
+ : getNextUseDistance(I, VMP) == Infinity;
+ }
+
+ SetVector<VRegMaskPair>& usedInBlock(MachineBasicBlock &MBB) {
+ return UsedInBlock[MBB.getNumber()];
+ }
+
+ void dumpUsedInBlock();
+
+ /// Dump complete next-use analysis results for testing
+ void dumpAllNextUseDistances(const MachineFunction &MF);
+};
+
+class AMDGPUNextUseAnalysis : public AnalysisInfoMixin<AMDGPUNextUseAnalysis> {
+ friend AnalysisInfoMixin<AMDGPUNextUseAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = NextUseResult;
+ Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+};
+
+class AMDGPUNextUseAnalysisWrapper : public MachineFunctionPass {
+ NextUseResult NU;
+
+public:
+ static char ID;
+
+ AMDGPUNextUseAnalysisWrapper();
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Pass entry point; Calculates LiveIntervals.
+ bool runOnMachineFunction(MachineFunction &) override;
+ void releaseMemory() override { NU.clear(); }
+
+ // /// Implement the dump method.
+ // void print(raw_ostream &O, const Module * = nullptr) const override {
+ // NU.print(O);
+ // }
+
+ NextUseResult &getNU() { return NU; }
+};
+
+//}
+
+#endif // LLVM_LIB_TARGET_AMDGPU_NEXT_USE_ANALYSIS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSSARAUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUSSARAUtils.h
new file mode 100644
index 0000000000000..0bb163eed59a9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSSARAUtils.h
@@ -0,0 +1,69 @@
+//===------- AMDGPUSSARAUtils.h ----------------------------------------*- C++-
+//*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SSA_RA_UTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_SSA_RA_UTILS_H
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+inline LaneBitmask getOperandLaneMask(const MachineOperand &MO,
+ const SIRegisterInfo *TRI,
+ MachineRegisterInfo *MRI) {
+ assert(MO.isReg() && MO.getReg().isVirtual() &&
+ "Error: Only virtual register allowed!\n");
+ if (MO.getSubReg())
+ return TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ return MRI->getMaxLaneMaskForVReg(MO.getReg());
+}
+
+inline unsigned getSubRegIndexForLaneMask(LaneBitmask Mask,
+ const SIRegisterInfo *TRI) {
+ for (unsigned Idx = 1; Idx < TRI->getNumSubRegIndices(); ++Idx) {
+ if (TRI->getSubRegIndexLaneMask(Idx) == Mask)
+ return Idx;
+ }
+ return AMDGPU::NoRegister;
+}
+
+inline SmallVector<unsigned>
+getCoveringSubRegsForLaneMask(LaneBitmask Mask, const TargetRegisterClass *RC,
+ const SIRegisterInfo *TRI) {
+ SmallVector<unsigned> Candidates;
+ for (unsigned SubIdx = 1; SubIdx < TRI->getNumSubRegIndices(); ++SubIdx) {
+ if (!TRI->getSubClassWithSubReg(RC, SubIdx))
+ continue;
+
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubIdx);
+ if ((SubMask & Mask).any()) {
+ Candidates.push_back(SubIdx);
+ }
+ }
+
+ SmallVector<unsigned> OptimalSubIndices;
+ llvm::stable_sort(Candidates, [&](unsigned A, unsigned B) {
+ return TRI->getSubRegIndexLaneMask(A).getNumLanes() >
+ TRI->getSubRegIndexLaneMask(B).getNumLanes();
+ });
+ for (unsigned SubIdx : Candidates) {
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubIdx);
+ if ((Mask & SubMask) == SubMask) {
+ OptimalSubIndices.push_back(SubIdx);
+ Mask &= ~SubMask; // remove covered bits
+ if (Mask.none())
+ break;
+ }
+ }
+ return OptimalSubIndices;
+}
+#endif // LLVM_LIB_TARGET_AMDGPU_SSA_RA_UTILS_H
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/VRegMaskPair.h b/llvm/lib/Target/AMDGPU/VRegMaskPair.h
new file mode 100644
index 0000000000000..de4e8b818e28d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/VRegMaskPair.h
@@ -0,0 +1,403 @@
+//===------- VRegMaskPair.h ----------------------------------------*-
+//C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// ile
+/// rief Defines VRegMaskPair and VRegMaskPairSet for managing sets of
+/// virtual registers and their lane masks.
+///
+/// Set operations (union, intersection, subtraction) are implemented based on
+/// *subregister coverage logic* rather than exact equality. This means:
+/// - Two VRegMaskPairs are considered overlapping if their LaneMasks overlap.
+/// - Intersection and subtraction operate on *overlapping masks*, not exact
+/// matches.
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_VREGMASKPAIR_H
+#define LLVM_LIB_TARGET_VREGMASKPAIR_H
+
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+
+class VRegMaskPairSet;
+
+class VRegMaskPair {
+ friend class VRegMaskPairSet;
+
+ Register VReg;
+ LaneBitmask LaneMask;
+
+public:
+ VRegMaskPair(Register VReg, LaneBitmask LaneMask)
+ : VReg(VReg), LaneMask(LaneMask) {}
+
+ VRegMaskPair() : VReg(AMDGPU::NoRegister), LaneMask(LaneBitmask::getNone()) {}
+ VRegMaskPair(const VRegMaskPair &Other) = default;
+ VRegMaskPair(VRegMaskPair &&Other) = default;
+ VRegMaskPair &operator=(const VRegMaskPair &Other) = default;
+ VRegMaskPair &operator=(VRegMaskPair &&Other) = default;
+
+ VRegMaskPair(const MachineOperand MO, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+ assert(MO.isReg() && "Not a register operand!");
+ assert(MO.getReg().isVirtual() && "Not a virtual register!");
+ VReg = MO.getReg();
+ LaneMask = MO.getSubReg() ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
+ : MRI->getMaxLaneMaskForVReg(VReg);
+ }
+
+ const Register getVReg() const { return VReg; }
+ const LaneBitmask getLaneMask() const { return LaneMask; }
+
+ unsigned getSubReg(const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI) const {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask == Mask)
+ return AMDGPU::NoRegister;
+ return getSubRegIndexForLaneMask(LaneMask, TRI);
+ }
+
+ const TargetRegisterClass *getRegClass(const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI) const {
+ const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, VReg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask != Mask) {
+ unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
+ return TRI->getSubRegisterClass(RC, SubRegIdx);
+ }
+ return RC;
+ }
+
+ unsigned getSizeInRegs(const SIRegisterInfo *TRI) const {
+ return TRI->getNumCoveredRegs(LaneMask);
+ }
+
+ bool operator==(const VRegMaskPair &other) const {
+ return VReg == other.VReg && LaneMask == other.LaneMask;
+ }
+ };
+
+ class LaneCoverageResult {
+ friend class VRegMaskPairSet;
+ LaneBitmask Data;
+ LaneBitmask Covered;
+ LaneBitmask NotCovered;
+
+ public:
+ LaneCoverageResult() = default;
+ LaneCoverageResult(const LaneBitmask Mask)
+ : Data(Mask), NotCovered(Mask){};
+ bool isFullyCovered() { return Data == Covered; }
+ bool isFullyUncovered() { return Data == NotCovered; }
+ LaneBitmask getCovered() { return Covered; }
+ LaneBitmask getNotCovered() { return NotCovered; }
+ };
+
+ class VRegMaskPairSet {
+
+ using MaskSet = std::set<LaneBitmask>;
+ using SetStorageT = DenseMap<Register, MaskSet>;
+ using LinearStorageT = std::vector<VRegMaskPair>;
+
+ SetStorageT SetStorage;
+ LinearStorageT LinearStorage;
+
+ public:
+
+ VRegMaskPairSet() = default;
+
+ template <typename ContainerT,
+ typename = std::enable_if_t<std::is_same<
+ typename ContainerT::value_type, VRegMaskPair>::value>>
+ VRegMaskPairSet(const ContainerT &Vec) {
+ for (const auto &VMP : Vec)
+ insert(VMP);
+ }
+
+ template <typename ContainerT,
+ typename = std::enable_if_t<std::is_same<
+ typename ContainerT::value_type, VRegMaskPair>::value>>
+ VRegMaskPairSet(ContainerT &&Vec) {
+ for (auto &&VMP : Vec)
+ insert(std::move(VMP));
+ }
+
+ bool insert(const VRegMaskPair &VMP) {
+ auto &MaskSet = SetStorage[VMP.VReg];
+ auto Inserted = MaskSet.insert(VMP.LaneMask);
+ if (!Inserted.second)
+ return false;
+ LinearStorage.push_back(VMP);
+ return true;
+ }
+
+ template <typename InputIt> void insert(InputIt First, InputIt Last) {
+ for (auto It = First; It != Last; ++It)
+ insert(*It);
+ }
+
+ void remove(const VRegMaskPair &VMP) {
+ auto MapIt = SetStorage.find(VMP.VReg);
+ if (MapIt == SetStorage.end())
+ return;
+
+ size_t Erased = MapIt->second.erase(VMP.LaneMask);
+ if (!Erased)
+ return;
+
+ if (MapIt->second.empty())
+ SetStorage.erase(MapIt);
+
+ auto VecIt = std::find(LinearStorage.begin(), LinearStorage.end(), VMP);
+ if (VecIt != LinearStorage.end()) {
+ LinearStorage.erase(VecIt);
+ } else {
+ llvm_unreachable("Inconsistent LinearStorage: VMP missing on remove");
+ }
+ }
+
+ template <typename Predicate> void remove_if(Predicate Pred) {
+ for (auto It = LinearStorage.begin(); It != LinearStorage.end();) {
+ const VRegMaskPair VMP = *It;
+ if (Pred(VMP)) {
+ It = LinearStorage.erase(It);
+ SetStorage[VMP.VReg].erase(VMP.LaneMask);
+ if (SetStorage[VMP.VReg].empty())
+ SetStorage.erase(VMP.VReg);
+ } else {
+ ++It;
+ }
+ }
+ }
+
+ bool count(const VRegMaskPair &VMP) const {
+ auto It = SetStorage.find(VMP.VReg);
+ if (It == SetStorage.end())
+ return false;
+
+ return It->second.count(VMP.LaneMask) > 0;
+ }
+
+ bool contains(const VRegMaskPair &VMP) const {
+ auto It = SetStorage.find(VMP.VReg);
+ return It != SetStorage.end() && It->second.contains(VMP.LaneMask);
+ }
+
+ void clear() {
+ SetStorage.clear();
+ LinearStorage.clear();
+ }
+
+ size_t size() const { return LinearStorage.size(); }
+ bool empty() const { return LinearStorage.empty(); }
+
+ void
+ sort(llvm::function_ref<bool(const VRegMaskPair &, const VRegMaskPair &)>
+ Cmp) {
+ std::sort(LinearStorage.begin(), LinearStorage.end(), Cmp);
+ }
+
+ VRegMaskPair pop_back_val() {
+ assert(!LinearStorage.empty() && "Pop from empty set");
+ VRegMaskPair VMP = LinearStorage.back();
+ LinearStorage.pop_back();
+
+ auto It = SetStorage.find(VMP.VReg);
+ assert(It != SetStorage.end() && "Inconsistent SetStorage");
+ It->second.erase(VMP.LaneMask);
+ if (It->second.empty())
+ SetStorage.erase(It);
+
+ return VMP;
+ }
+
+ LaneCoverageResult getCoverage(const VRegMaskPair &VMP) const {
+ LaneCoverageResult Result(VMP.LaneMask);
+ auto It = SetStorage.find(VMP.VReg);
+ if (It != SetStorage.end()) {
+ MaskSet Masks = It->second;
+ for (auto Mask : Masks) {
+ Result.Covered |= (Mask & VMP.LaneMask);
+ }
+ Result.NotCovered = (VMP.LaneMask & ~Result.Covered);
+ }
+ return Result;
+ }
+
+ bool operator==(const VRegMaskPairSet &Other) const {
+ if (SetStorage.size() != Other.SetStorage.size())
+ return false;
+
+ for (const auto &Entry : SetStorage) {
+ auto It = Other.SetStorage.find(Entry.first);
+ if (It == Other.SetStorage.end())
+ return false;
+
+ if (Entry.second != It->second)
+ return false;
+ }
+
+ return true;
+ }
+
+ template <typename ContainerT>
+ VRegMaskPairSet &operator=(const ContainerT &Vec) {
+ static_assert(
+ std::is_same<typename ContainerT::value_type, VRegMaskPair>::value,
+ "Container must hold VRegMaskPair elements");
+
+ clear();
+ for (const auto &VMP : Vec)
+ insert(VMP);
+ return *this;
+ }
+
+ // Set operations based on subregister coverage logic
+
+ /// Adds all elements from Other whose (VReg, LaneMask) overlap with none
+ /// in *this.
+ void set_union(const VRegMaskPairSet &Other) {
+ for (const auto &VMP : Other)
+ insert(VMP);
+ }
+
+ /// Keeps only those elements in *this that are at least partially covered
+ /// by Other.
+ void set_intersect(const VRegMaskPairSet &Other) {
+ std::vector<VRegMaskPair> ToInsert;
+ remove_if([&](const VRegMaskPair &VMP) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (Cov.isFullyUncovered())
+ return true;
+
+ if (!Cov.isFullyCovered()) {
+ ToInsert.push_back({VMP.VReg, Cov.getCovered()});
+ return true; // remove current, will reinsert trimmed version
+ }
+
+ return false; // keep as-is
+ });
+
+ insert(ToInsert.begin(), ToInsert.end());
+ }
+
+ /// Removes elements from *this that are at least partially covered by
+ /// Other.
+ void set_subtract(const VRegMaskPairSet &Other) {
+ std::vector<VRegMaskPair> ToInsert;
+ remove_if([&](const VRegMaskPair &VMP) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (Cov.isFullyCovered())
+ return true;
+
+ if (!Cov.isFullyUncovered()) {
+ ToInsert.push_back({VMP.VReg, Cov.getNotCovered()});
+ return true; // remove and reinsert uncovered part
+ }
+
+ return false;
+ });
+
+ insert(ToInsert.begin(), ToInsert.end());
+ }
+
+ /// Returns the union (join) of this set and Other under coverage logic.
+ VRegMaskPairSet set_join(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result = *this;
+ Result.set_union(Other);
+ return Result;
+ }
+
+ /// Returns the intersection of this set and Other based on partial
+ /// overlap.
+ VRegMaskPairSet set_intersection(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result;
+ for (const auto &VMP : *this) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (!Cov.isFullyUncovered()) {
+ Result.insert({VMP.VReg, Cov.getCovered()});
+ }
+ }
+ return Result;
+ }
+
+ /// Returns all elements of *this that do not overlap with anything in
+ /// Other.
+ VRegMaskPairSet set_difference(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result;
+ for (const auto &VMP : *this) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (!Cov.isFullyCovered()) {
+ Result.insert({VMP.VReg, Cov.getNotCovered()});
+ }
+ }
+ return Result;
+ }
+
+ // Debug
+ void dump() const {
+ dbgs() << "=== VRegMaskPairSet Dump ===\n";
+
+ dbgs() << "SetStorage:\n";
+ for (const auto &Entry : SetStorage) {
+ dbgs() << " VReg: " << printReg(Entry.first) << " => { ";
+ for (const auto &Mask : Entry.second) {
+ dbgs() << PrintLaneMask(Mask) << " ";
+ }
+ dbgs() << "}\n";
+ }
+
+ dbgs() << "LinearStorage (insertion order):\n";
+ for (const auto &VMP : LinearStorage) {
+ dbgs() << " (" << printReg(VMP.getVReg()) << ", "
+ << PrintLaneMask(VMP.getLaneMask()) << ")\n";
+ }
+
+ dbgs() << "=============================\n";
+ }
+
+ // Iterators
+ using iterator = LinearStorageT::const_iterator;
+ iterator begin() const { return LinearStorage.begin(); }
+ iterator end() const { return LinearStorage.end(); }
+ };
+
+ namespace llvm {
+ template <> struct DenseMapInfo<VRegMaskPair> {
+ static inline VRegMaskPair getEmptyKey() {
+ return {Register(DenseMapInfo<unsigned>::getEmptyKey()),
+ LaneBitmask(0xFFFFFFFFFFFFFFFFULL)};
+ }
+
+ static inline VRegMaskPair getTombstoneKey() {
+ return {Register(DenseMapInfo<unsigned>::getTombstoneKey()),
+ LaneBitmask(0xFFFFFFFFFFFFFFFEULL)};
+ }
+
+ static unsigned getHashValue(const VRegMaskPair &P) {
+ return DenseMapInfo<unsigned>::getHashValue(P.getVReg().id()) ^
+ DenseMapInfo<uint64_t>::getHashValue(
+ P.getLaneMask().getAsInteger());
+ }
+
+ static bool isEqual(const VRegMaskPair &LHS, const VRegMaskPair &RHS) {
+ return DenseMapInfo<unsigned>::isEqual(LHS.getVReg().id(),
+ RHS.getVReg().id()) &&
+ DenseMapInfo<uint64_t>::isEqual(
+ LHS.getLaneMask().getAsInteger(),
+ RHS.getLaneMask().getAsInteger());
+ }
+ };
+
+ } // namespace llvm
+#endif // LLVM_LIB_TARGET_VREGMASKPAIR_H
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/README.md b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/README.md
new file mode 100644
index 0000000000000..ce4dd224853ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/README.md
@@ -0,0 +1,33 @@
+# AMDGPU NextUseAnalysis Tests
+
+This directory contains comprehensive tests for the AMDGPU NextUseAnalysis V2 implementation.
+
+## Running Tests
+
+### Individual Test
+```bash
+cd build/Debug
+./bin/llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use \
+ ../../llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir -o /dev/null 2>&1 | \
+ ./bin/FileCheck ../../llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir
+```
+
+### All Tests
+```bash
+cd build/Debug
+for test in ../../llvm/test/CodeGen/AMDGPU/NextUseAnalysis/*.mir; do
+ echo "Testing: $test"
+ ./bin/llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use \
+ "$test" -o /dev/null 2>&1 | ./bin/FileCheck "$test" && echo "PASS" || echo "FAIL"
+done
+```
+
+## Test Categories
+
+1. **basic-distances.mir** - Fundamental distance calculations
+2. **subreg-distances.mir** - Sub-register handling
+3. **multiblock-distances.mir** - Control flow analysis
+4. **dead-registers.mir** - Dead register detection
+5. **subreg-interference.mir** - Advanced sub-register interference
+
+All tests validate the V2 implementation's sub-register aware analysis capabilities.
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir
new file mode 100644
index 0000000000000..c706ca44ead8d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir
@@ -0,0 +1,58 @@
+# NOTE: Basic next-use distance calculation test
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: basic_distances
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+body: |
+ bb.0:
+ ; Test basic distance calculation
+ ; %0 is used 2 instructions later, then %1 is used immediately, etc.
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+ %2:vgpr_32 = V_ADD_F32_e32 %1, %1, implicit $exec, implicit $mode
+ %3:vgpr_32 = V_ADD_F32_e32 %0, %2, implicit $exec, implicit $mode
+ S_ENDPGM 0
+
+# CHECK: === NextUseAnalysis Results for basic_distances ===
+# CHECK: --- MBB_0 ---
+
+# First instruction: %0 definition - no incoming register uses
+# CHECK: Instr: %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+# CHECK-NEXT: Next-use distances:
+# CHECK-NEXT: (no register uses)
+
+# Second instruction: %1 definition - %0 will be used in 2 instructions
+# CHECK: Instr: %1:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+# CHECK-NEXT: Next-use distances:
+# CHECK-NEXT: %0 -> 2 instructions
+
+# Third instruction: %2 definition using %1 twice - %0 in 1 instruction, %1 immediate use
+# CHECK: Instr: %2:vgpr_32 = V_ADD_F32_e32 %1, %1, implicit $exec, implicit $mode
+# CHECK-NEXT: Next-use distances:
+# CHECK-NEXT: %0 -> 1 instructions
+# CHECK-NEXT: %1 -> 0 instructions
+
+# Fourth instruction: %3 definition using %0 and %2 - both immediate use
+# CHECK: Instr: %3:vgpr_32 = V_ADD_F32_e32 %0, %2, implicit $exec, implicit $mode
+# CHECK-NEXT: Next-use distances:
+# CHECK-NEXT: %0 -> 0 instructions
+# CHECK-NEXT: %2 -> 0 instructions
+
+# Final instruction: no register uses
+# CHECK: Instr: S_ENDPGM 0
+# CHECK-NEXT: Next-use distances:
+# CHECK-NEXT: (no register uses)
+
+# Block end: no live registers
+# CHECK: Block End Distances:
+# CHECK-NEXT: (no registers live at block end)
+
+# CHECK: === End NextUseAnalysis Results ===
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.mir b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.mir
new file mode 100644
index 0000000000000..c3db7bd9a7d00
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.mir
@@ -0,0 +1,28 @@
+# NOTE: Dead register detection test
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: dead_registers
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ ; %0 is defined but never used - should be DEAD
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+ %2:vgpr_32 = V_ADD_F32_e32 %1, %1, implicit $exec, implicit $mode
+ S_ENDPGM 0
+
+# CHECK: === NextUseAnalysis Results for dead_registers ===
+# CHECK: --- MBB_0 ---
+
+# %0 should be considered dead since it's never used
+# CHECK: Block End Distances:
+# Look for either DEAD or very high distance for %0
+
+# CHECK: === End NextUseAnalysis Results ===
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.mir b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.mir
new file mode 100644
index 0000000000000..c82d0e8265b70
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.mir
@@ -0,0 +1,37 @@
+# NOTE: Multi-block next-use distance calculation test
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: multiblock_distances
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: sreg_32 }
+body: |
+ bb.0:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %3:sreg_32 = S_MOV_B32 1
+ S_CMP_EQ_U32 %3, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ %1:vgpr_32 = V_ADD_F32_e32 %0, %0, implicit $exec, implicit $mode
+ S_BRANCH %bb.2
+
+ bb.2:
+ %2:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
+ S_ENDPGM 0
+
+# CHECK: === NextUseAnalysis Results for multiblock_distances ===
+
+# Check that we get analysis for all blocks
+# CHECK: --- MBB_0 ---
+# CHECK: --- MBB_1 ---
+# CHECK: --- MBB_2 ---
+
+# CHECK: === End NextUseAnalysis Results ===
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.mir b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.mir
new file mode 100644
index 0000000000000..2ddcbea5d3f1c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.mir
@@ -0,0 +1,29 @@
+# NOTE: Sub-register next-use distance calculation test
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: subreg_distances
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ ; Test sub-register usage patterns
+ ; %0 is a 64-bit register, we use different sub-registers
+ %0:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+ %1:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %2:vgpr_32 = COPY %0.sub0
+ S_ENDPGM 0
+
+# CHECK: === NextUseAnalysis Results for subreg_distances ===
+# CHECK: --- MBB_0 ---
+
+# The test checks that sub-register analysis works correctly
+# CHECK: Instr: %0:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+# CHECK: Instr: %1:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+# CHECK: Instr: %2:vgpr_32 = COPY %0.sub0
+# CHECK: === End NextUseAnalysis Results ===
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.mir b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.mir
new file mode 100644
index 0000000000000..a1e9a8ca8b48e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.mir
@@ -0,0 +1,39 @@
+# NOTE: Sub-register interference resolution test
+# This tests the V2 capability to handle disjoint sub-register usage without false interference
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-next-use -debug-only=amdgpu-next-use %s -o /dev/null 2>&1 | FileCheck %s
+
+---
+name: subreg_interference
+alignment: 1
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_128 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+body: |
+ bb.0:
+ ; Create a 128-bit register with sub-register usage
+ ; Lower 64 bits (sub0_sub1) and upper 64 bits (sub2_sub3) should not interfere
+ %0:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2, %4, %subreg.sub3
+
+ ; Use only lower 64 bits - should not interfere with upper bits
+ %1:vgpr_32 = COPY %0.sub0
+ %2:vgpr_32 = COPY %0.sub1
+
+ ; Later use upper 64 bits - should show separate distance tracking
+ %3:vgpr_32 = COPY %0.sub2
+ %4:vgpr_32 = COPY %0.sub3
+
+ S_ENDPGM 0
+
+# CHECK: === NextUseAnalysis Results for subreg_interference ===
+# CHECK: --- MBB_0 ---
+
+# The V2 implementation should track sub-register lanes separately
+# Look for lane mask information in the output
+# CHECK: Next-use distances:
+
+# CHECK: === End NextUseAnalysis Results ===
+...
>From c21291caa4d6cafa514950a0f17b01d46436ee90 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Fri, 29 Aug 2025 18:32:48 +0000
Subject: [PATCH 2/4] Build errors fixed
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +++
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
llvm/lib/Target/X86/X86WinEHUnwindV2.cpp | 2 +-
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index ebe38de1636be..9c2980ef735bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -248,6 +248,9 @@ extern char &AMDGPUPreloadKernArgPrologLegacyID;
void initializeAMDGPUPreloadKernelArgumentsLegacyPass(PassRegistry &);
extern char &AMDGPUPreloadKernelArgumentsLegacyID;
+void initializeAMDGPUNextUseAnalysisWrapperPass(PassRegistry &);
+extern char &AMDGPUNextUseAnalysisID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 619ff4e5c73c4..7e9f1d27bd3e4 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -181,6 +181,7 @@ add_llvm_target(AMDGPUCodeGen
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
+ AMDGPUNextUseAnalysis.cpp
LINK_COMPONENTS
AMDGPUDesc
diff --git a/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp b/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp
index ea8b88f41bb87..d4c2510446242 100644
--- a/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp
+++ b/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp
@@ -219,7 +219,7 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) {
if (State == FunctionState::InEpilog) {
Register Reg = MI.getOperand(0).getReg();
if (HasStackAlloc && (PoppedRegCount == 0) &&
- !llvm::is_contained(PushedRegs, Reg)) {
+ !llvm::is_contained(PushedRegs, Reg.id())) {
// If this is a pop that doesn't correspond to the set of pushed
// registers, then assume it was used to adjust the stack pointer.
HasStackDealloc = true;
>From a5df373c17d715563cc43e830655f44714d3ac4f Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Fri, 29 Aug 2025 19:04:53 +0000
Subject: [PATCH 3/4] Pass registeration fixed. LIT tests fixed.
---
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
.../AMDGPU/NextUseAnalysis/basic-distances.s | 128 ++++++++++++++++
.../AMDGPU/NextUseAnalysis/dead-registers.s | 126 ++++++++++++++++
.../NextUseAnalysis/multiblock-distances.s | 140 ++++++++++++++++++
.../AMDGPU/NextUseAnalysis/subreg-distances.s | 126 ++++++++++++++++
.../NextUseAnalysis/subreg-interference.s | 130 ++++++++++++++++
6 files changed, 651 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.s
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.s
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.s
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.s
create mode 100644 llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.s
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4a2f0a13b1325..92f4a7f414f93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -581,6 +581,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUReserveWWMRegsLegacyPass(*PR);
initializeAMDGPURewriteAGPRCopyMFMALegacyPass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
+ initializeAMDGPUNextUseAnalysisWrapperPass(*PR);
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeSIAnnotateControlFlowLegacyPass(*PR);
initializeAMDGPUInsertDelayAluLegacyPass(*PR);
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.s b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.s
new file mode 100644
index 0000000000000..9cead858931f8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.s
@@ -0,0 +1,128 @@
+--- |
+ ; ModuleID = '/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir'
+ source_filename = "/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/basic-distances.mir"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define void @basic_distances() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+...
+---
+name: basic_distances
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: vgpr_32, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 0
+ numWaveDispatchVGPRs: 0
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sp_reg'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+ %2:vgpr_32 = V_ADD_F32_e32 %1, %1, implicit $exec, implicit $mode
+ %3:vgpr_32 = V_ADD_F32_e32 %0, %2, implicit $exec, implicit $mode
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.s b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.s
new file mode 100644
index 0000000000000..c451255d9dcdc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.s
@@ -0,0 +1,126 @@
+--- |
+ ; ModuleID = '/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.mir'
+ source_filename = "/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/dead-registers.mir"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define void @dead_registers() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+...
+---
+name: dead_registers
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vgpr_32, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 0
+ numWaveDispatchVGPRs: 0
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sp_reg'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 100, implicit $exec
+ %2:vgpr_32 = V_ADD_F32_e32 %1, %1, implicit $exec, implicit $mode
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.s b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.s
new file mode 100644
index 0000000000000..b7d50d4916d0e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.s
@@ -0,0 +1,140 @@
+--- |
+ ; ModuleID = '/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.mir'
+ source_filename = "/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/multiblock-distances.mir"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define void @multiblock_distances() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+...
+---
+name: multiblock_distances
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: sreg_32, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 0
+ numWaveDispatchVGPRs: 0
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sp_reg'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %3:sreg_32 = S_MOV_B32 1
+ S_CMP_EQ_U32 %3, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %1:vgpr_32 = V_ADD_F32_e32 %0, %0, implicit $exec, implicit $mode
+ S_BRANCH %bb.2
+
+ bb.2:
+ %2:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.s b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.s
new file mode 100644
index 0000000000000..f23c7146be2ff
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.s
@@ -0,0 +1,126 @@
+--- |
+ ; ModuleID = '/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.mir'
+ source_filename = "/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-distances.mir"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define void @subreg_distances() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+...
+---
+name: subreg_distances
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vreg_64, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vgpr_32, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 0
+ numWaveDispatchVGPRs: 0
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sp_reg'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0:
+ %0:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1
+ %1:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %2:vgpr_32 = COPY %0.sub0
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.s b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.s
new file mode 100644
index 0000000000000..8c12e07783ac4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.s
@@ -0,0 +1,130 @@
+--- |
+ ; ModuleID = '/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.mir'
+ source_filename = "/work/atimofee/sandbox/github/llvm-project/llvm/test/CodeGen/AMDGPU/NextUseAnalysis/subreg-interference.mir"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define void @subreg_interference() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+...
+---
+name: subreg_interference
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vreg_128, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: vgpr_32, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 0
+ numWaveDispatchVGPRs: 0
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sp_reg'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0:
+ %0:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2, %4, %subreg.sub3
+ %1:vgpr_32 = COPY %0.sub0
+ %2:vgpr_32 = COPY %0.sub1
+ %3:vgpr_32 = COPY %0.sub2
+ %4:vgpr_32 = COPY %0.sub3
+ S_ENDPGM 0
+...
>From 8a157c158b43f293240598579183057558694b90 Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev at amd.com>
Date: Fri, 29 Aug 2025 19:16:00 +0000
Subject: [PATCH 4/4] Formatting fixed
---
.../Target/AMDGPU/AMDGPUNextUseAnalysis.cpp | 89 +--
.../lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h | 32 +-
llvm/lib/Target/AMDGPU/VRegMaskPair.h | 653 +++++++++---------
3 files changed, 384 insertions(+), 390 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
index 0c2feca1e7d8f..c0775d5897da8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
@@ -24,8 +24,7 @@
using namespace llvm;
-//namespace {
-
+// namespace {
void NextUseResult::init(const MachineFunction &MF) {
TG = new TimerGroup("Next Use Analysis",
@@ -59,8 +58,9 @@ void NextUseResult::analyze(const MachineFunction &MF) {
Prev = UpwardNextUses[MBBNum];
}
- LLVM_DEBUG(dbgs() << "\nMerging successors for " << "MBB_"
- << MBB->getNumber() << "." << MBB->getName() << "\n";);
+ LLVM_DEBUG(dbgs() << "\nMerging successors for "
+ << "MBB_" << MBB->getNumber() << "." << MBB->getName()
+ << "\n";);
for (auto Succ : successors(MBB)) {
unsigned SuccNum = Succ->getNumber();
@@ -69,7 +69,8 @@ void NextUseResult::analyze(const MachineFunction &MF) {
continue;
VRegDistances SuccDist = UpwardNextUses[SuccNum];
- LLVM_DEBUG(dbgs() << "\nMerging " << "MBB_" << Succ->getNumber() << "."
+ LLVM_DEBUG(dbgs() << "\nMerging "
+ << "MBB_" << Succ->getNumber() << "."
<< Succ->getName() << "\n");
// Check if the edge from MBB to Succ goes out of the Loop
@@ -168,17 +169,17 @@ void NextUseResult::analyze(const MachineFunction &MF) {
Changed |= Changed4MBB;
}
- }
- dumpUsedInBlock();
+ }
+ dumpUsedInBlock();
// Dump complete analysis results for testing
LLVM_DEBUG(dumpAllNextUseDistances(MF));
- T1->stopTimer();
- LLVM_DEBUG(TG->print(llvm::errs()));
- }
+ T1->stopTimer();
+ LLVM_DEBUG(TG->print(llvm::errs()));
+}
void NextUseResult::getFromSortedRecords(
const VRegDistances::SortedRecords Dists, LaneBitmask Mask, unsigned &D) {
- LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(Mask) <<"]\n");
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(Mask) << "]\n");
for (auto P : Dists) {
// Records are sorted in distance increasing order. So, the first record
// is for the closest use.
@@ -203,7 +204,8 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock::iterator I,
if (NextUseMap[MBBNum].InstrDist[&*I].contains(VMP.getVReg())) {
VRegDistances::SortedRecords Dists =
NextUseMap[MBBNum].InstrDist[&*I][VMP.getVReg()];
- LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n");
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask())
+ << "]\n");
for (auto P : reverse(Dists)) {
LaneBitmask UseMask = P.first;
LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask)
@@ -224,8 +226,10 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
unsigned MBBNum = MBB.getNumber();
if (NextUseMap.contains(MBBNum) &&
NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
- VRegDistances::SortedRecords Dists = NextUseMap[MBBNum].Bottom[VMP.getVReg()];
- LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n");
+ VRegDistances::SortedRecords Dists =
+ NextUseMap[MBBNum].Bottom[VMP.getVReg()];
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask())
+ << "]\n");
for (auto P : reverse(Dists)) {
LaneBitmask UseMask = P.first;
LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask) << "]\n");
@@ -271,8 +275,8 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
unsigned MBBNum = MBB.getNumber();
if (NextUseMap.contains(MBBNum)) {
if (NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
- getFromSortedRecords(NextUseMap[MBBNum].Bottom[VMP.getVReg()], VMP.getLaneMask(),
- Dist);
+ getFromSortedRecords(NextUseMap[MBBNum].Bottom[VMP.getVReg()],
+ VMP.getLaneMask(), Dist);
}
}
return Dist;
@@ -310,8 +314,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUNextUseAnalysisWrapper, "amdgpu-next-use",
"AMDGPU Next Use Analysis", false, false)
-bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(
- MachineFunction &MF) {
+bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(MachineFunction &MF) {
NU.Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
NU.LI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
NU.MRI = &MF.getRegInfo();
@@ -319,12 +322,11 @@ bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(
assert(NU.MRI->isSSA());
NU.init(MF);
NU.analyze(MF);
-// LLVM_DEBUG(NU.dump());
+ // LLVM_DEBUG(NU.dump());
return false;
}
-void AMDGPUNextUseAnalysisWrapper::getAnalysisUsage(
- AnalysisUsage &AU) const {
+void AMDGPUNextUseAnalysisWrapper::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<SlotIndexesWrapperPass>();
@@ -336,44 +338,45 @@ AMDGPUNextUseAnalysisWrapper::AMDGPUNextUseAnalysisWrapper()
initializeAMDGPUNextUseAnalysisWrapperPass(*PassRegistry::getPassRegistry());
}
void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "=== NextUseAnalysis Results for " << MF.getName() << " ===\n");
-
+ LLVM_DEBUG(dbgs() << "=== NextUseAnalysis Results for " << MF.getName()
+ << " ===\n");
+
for (const auto &MBB : MF) {
unsigned MBBNum = MBB.getNumber();
LLVM_DEBUG(dbgs() << "\n--- MBB_" << MBBNum << " ---\n");
-
+
if (!NextUseMap.contains(MBBNum)) {
LLVM_DEBUG(dbgs() << " No analysis data for this block\n");
continue;
}
-
+
const NextUseInfo &Info = NextUseMap.at(MBBNum);
-
+
// Process each instruction in the block
for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
const MachineInstr &MI = *II;
-
+
// Print instruction
LLVM_DEBUG(dbgs() << " Instr: ");
- LLVM_DEBUG(MI.print(dbgs(), /*IsStandalone=*/false, /*SkipOpers=*/false,
- /*SkipDebugLoc=*/true, /*AddNewLine=*/false));
+ LLVM_DEBUG(MI.print(dbgs(), /*IsStandalone=*/false, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/true, /*AddNewLine=*/false));
LLVM_DEBUG(dbgs() << "\n");
-
+
// Print distances at this instruction
if (Info.InstrDist.contains(&MI)) {
const VRegDistances &Dists = Info.InstrDist.at(&MI);
LLVM_DEBUG(dbgs() << " Next-use distances:\n");
-
+
for (const auto &VRegEntry : Dists) {
unsigned VReg = VRegEntry.getFirst();
const auto &Records = VRegEntry.getSecond();
-
+
for (const auto &Record : Records) {
LaneBitmask LaneMask = Record.first;
unsigned Distance = Record.second;
-
+
LLVM_DEBUG(dbgs() << " ");
-
+
// Print register with sub-register if applicable
LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
if (LaneMask != FullMask) {
@@ -382,7 +385,7 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
} else {
LLVM_DEBUG(dbgs() << printReg(VReg, TRI));
}
-
+
if (Distance == Infinity) {
LLVM_DEBUG(dbgs() << " -> DEAD (infinite distance)\n");
} else {
@@ -390,7 +393,7 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
}
}
}
-
+
if (Dists.size() == 0) {
LLVM_DEBUG(dbgs() << " (no register uses)\n");
}
@@ -398,19 +401,19 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
LLVM_DEBUG(dbgs() << " (no distance data)\n");
}
}
-
+
// Print distances at end of block
LLVM_DEBUG(dbgs() << " Block End Distances:\n");
for (const auto &VRegEntry : Info.Bottom) {
unsigned VReg = VRegEntry.getFirst();
const auto &Records = VRegEntry.getSecond();
-
+
for (const auto &Record : Records) {
LaneBitmask LaneMask = Record.first;
unsigned Distance = Record.second;
-
+
LLVM_DEBUG(dbgs() << " ");
-
+
LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
if (LaneMask != FullMask) {
unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
@@ -418,7 +421,7 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
} else {
LLVM_DEBUG(dbgs() << printReg(VReg, TRI));
}
-
+
if (Distance == Infinity) {
LLVM_DEBUG(dbgs() << " -> DEAD\n");
} else {
@@ -426,11 +429,11 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
}
}
}
-
+
if (Info.Bottom.size() == 0) {
LLVM_DEBUG(dbgs() << " (no registers live at block end)\n");
}
}
-
+
LLVM_DEBUG(dbgs() << "\n=== End NextUseAnalysis Results ===\n");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
index 101ee7640a0bd..bd774b290d7b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
@@ -14,9 +14,9 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "SIRegisterInfo.h"
-#include "GCNSubtarget.h"
#include "AMDGPUSSARAUtils.h"
+#include "GCNSubtarget.h"
+#include "SIRegisterInfo.h"
#include "VRegMaskPair.h"
#include <algorithm>
@@ -27,7 +27,6 @@ using namespace llvm;
// namespace {
-
class NextUseResult {
friend class AMDGPUNextUseAnalysisWrapper;
SlotIndexes *Indexes;
@@ -53,8 +52,9 @@ class NextUseResult {
}
};
-public:
+ public:
using SortedRecords = std::set<Record, CompareByDist>;
+
private:
DenseMap<unsigned, SortedRecords> NextUseMap;
@@ -81,9 +81,7 @@ class NextUseResult {
return Keys;
}
- bool contains(unsigned Key) {
- return NextUseMap.contains(Key);
- }
+ bool contains(unsigned Key) { return NextUseMap.contains(Key); }
bool insert(VRegMaskPair VMP, unsigned Dist) {
Record R(VMP.getLaneMask(), Dist);
@@ -115,15 +113,16 @@ class NextUseResult {
void clear(VRegMaskPair VMP) {
if (NextUseMap.contains(VMP.getVReg())) {
auto &Dists = NextUseMap[VMP.getVReg()];
- std::erase_if(Dists,
- [&](Record R) { return (R.first &= ~VMP.getLaneMask()).none(); });
+ std::erase_if(Dists, [&](Record R) {
+ return (R.first &= ~VMP.getLaneMask()).none();
+ });
if (Dists.empty())
NextUseMap.erase(VMP.getVReg());
}
}
- bool operator == (const VRegDistances Other) const {
-
+ bool operator==(const VRegDistances Other) const {
+
if (Other.size() != size())
return false;
@@ -181,7 +180,7 @@ class NextUseResult {
};
class NextUseInfo {
// FIXME: need to elaborate proper class interface!
- public:
+ public:
VRegDistances Bottom;
DenseMap<const MachineInstr *, VRegDistances> InstrDist;
};
@@ -189,8 +188,6 @@ class NextUseResult {
DenseMap<unsigned, NextUseInfo> NextUseMap;
public:
-
-
private:
DenseMap<unsigned, SetVector<VRegMaskPair>> UsedInBlock;
DenseMap<int, int> LoopExits;
@@ -247,9 +244,8 @@ class NextUseResult {
getSortedSubregUses(const MachineBasicBlock::iterator I,
const VRegMaskPair VMP);
- SmallVector<VRegMaskPair>
- getSortedSubregUses(const MachineBasicBlock &MBB,
- const VRegMaskPair VMP);
+ SmallVector<VRegMaskPair> getSortedSubregUses(const MachineBasicBlock &MBB,
+ const VRegMaskPair VMP);
bool isDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const VRegMaskPair VMP) {
@@ -270,7 +266,7 @@ class NextUseResult {
: getNextUseDistance(I, VMP) == Infinity;
}
- SetVector<VRegMaskPair>& usedInBlock(MachineBasicBlock &MBB) {
+ SetVector<VRegMaskPair> &usedInBlock(MachineBasicBlock &MBB) {
return UsedInBlock[MBB.getNumber()];
}
diff --git a/llvm/lib/Target/AMDGPU/VRegMaskPair.h b/llvm/lib/Target/AMDGPU/VRegMaskPair.h
index de4e8b818e28d..b08d082a0c66a 100644
--- a/llvm/lib/Target/AMDGPU/VRegMaskPair.h
+++ b/llvm/lib/Target/AMDGPU/VRegMaskPair.h
@@ -1,5 +1,5 @@
//===------- VRegMaskPair.h ----------------------------------------*-
-//C++-*-===//
+// C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -21,11 +21,11 @@
#ifndef LLVM_LIB_TARGET_VREGMASKPAIR_H
#define LLVM_LIB_TARGET_VREGMASKPAIR_H
-#include "llvm/CodeGen/Register.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
@@ -54,350 +54,345 @@ class VRegMaskPair {
VReg = MO.getReg();
LaneMask = MO.getSubReg() ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
: MRI->getMaxLaneMaskForVReg(VReg);
- }
+ }
+
+ const Register getVReg() const { return VReg; }
+ const LaneBitmask getLaneMask() const { return LaneMask; }
+
+ unsigned getSubReg(const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI) const {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask == Mask)
+ return AMDGPU::NoRegister;
+ return getSubRegIndexForLaneMask(LaneMask, TRI);
+ }
+
+ const TargetRegisterClass *getRegClass(const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI) const {
+ const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, VReg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask != Mask) {
+ unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
+ return TRI->getSubRegisterClass(RC, SubRegIdx);
+ }
+ return RC;
+ }
+
+ unsigned getSizeInRegs(const SIRegisterInfo *TRI) const {
+ return TRI->getNumCoveredRegs(LaneMask);
+ }
+
+ bool operator==(const VRegMaskPair &other) const {
+ return VReg == other.VReg && LaneMask == other.LaneMask;
+ }
+};
+
+class LaneCoverageResult {
+ friend class VRegMaskPairSet;
+ LaneBitmask Data;
+ LaneBitmask Covered;
+ LaneBitmask NotCovered;
- const Register getVReg() const { return VReg; }
- const LaneBitmask getLaneMask() const { return LaneMask; }
+public:
+ LaneCoverageResult() = default;
+ LaneCoverageResult(const LaneBitmask Mask) : Data(Mask), NotCovered(Mask){};
+ bool isFullyCovered() { return Data == Covered; }
+ bool isFullyUncovered() { return Data == NotCovered; }
+ LaneBitmask getCovered() { return Covered; }
+ LaneBitmask getNotCovered() { return NotCovered; }
+};
- unsigned getSubReg(const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI) const {
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
- if (LaneMask == Mask)
- return AMDGPU::NoRegister;
- return getSubRegIndexForLaneMask(LaneMask, TRI);
- }
+class VRegMaskPairSet {
- const TargetRegisterClass *getRegClass(const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI) const {
- const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, VReg);
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
- if (LaneMask != Mask) {
- unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
- return TRI->getSubRegisterClass(RC, SubRegIdx);
- }
- return RC;
- }
+ using MaskSet = std::set<LaneBitmask>;
+ using SetStorageT = DenseMap<Register, MaskSet>;
+ using LinearStorageT = std::vector<VRegMaskPair>;
- unsigned getSizeInRegs(const SIRegisterInfo *TRI) const {
- return TRI->getNumCoveredRegs(LaneMask);
- }
+ SetStorageT SetStorage;
+ LinearStorageT LinearStorage;
- bool operator==(const VRegMaskPair &other) const {
- return VReg == other.VReg && LaneMask == other.LaneMask;
- }
- };
-
- class LaneCoverageResult {
- friend class VRegMaskPairSet;
- LaneBitmask Data;
- LaneBitmask Covered;
- LaneBitmask NotCovered;
-
- public:
- LaneCoverageResult() = default;
- LaneCoverageResult(const LaneBitmask Mask)
- : Data(Mask), NotCovered(Mask){};
- bool isFullyCovered() { return Data == Covered; }
- bool isFullyUncovered() { return Data == NotCovered; }
- LaneBitmask getCovered() { return Covered; }
- LaneBitmask getNotCovered() { return NotCovered; }
- };
-
- class VRegMaskPairSet {
-
- using MaskSet = std::set<LaneBitmask>;
- using SetStorageT = DenseMap<Register, MaskSet>;
- using LinearStorageT = std::vector<VRegMaskPair>;
-
- SetStorageT SetStorage;
- LinearStorageT LinearStorage;
-
- public:
-
- VRegMaskPairSet() = default;
-
- template <typename ContainerT,
- typename = std::enable_if_t<std::is_same<
- typename ContainerT::value_type, VRegMaskPair>::value>>
- VRegMaskPairSet(const ContainerT &Vec) {
- for (const auto &VMP : Vec)
- insert(VMP);
+public:
+ VRegMaskPairSet() = default;
+
+ template <typename ContainerT,
+ typename = std::enable_if_t<std::is_same<
+ typename ContainerT::value_type, VRegMaskPair>::value>>
+ VRegMaskPairSet(const ContainerT &Vec) {
+ for (const auto &VMP : Vec)
+ insert(VMP);
+ }
+
+ template <typename ContainerT,
+ typename = std::enable_if_t<std::is_same<
+ typename ContainerT::value_type, VRegMaskPair>::value>>
+ VRegMaskPairSet(ContainerT &&Vec) {
+ for (auto &&VMP : Vec)
+ insert(std::move(VMP));
+ }
+
+ bool insert(const VRegMaskPair &VMP) {
+ auto &MaskSet = SetStorage[VMP.VReg];
+ auto Inserted = MaskSet.insert(VMP.LaneMask);
+ if (!Inserted.second)
+ return false;
+ LinearStorage.push_back(VMP);
+ return true;
+ }
+
+ template <typename InputIt> void insert(InputIt First, InputIt Last) {
+ for (auto It = First; It != Last; ++It)
+ insert(*It);
+ }
+
+ void remove(const VRegMaskPair &VMP) {
+ auto MapIt = SetStorage.find(VMP.VReg);
+ if (MapIt == SetStorage.end())
+ return;
+
+ size_t Erased = MapIt->second.erase(VMP.LaneMask);
+ if (!Erased)
+ return;
+
+ if (MapIt->second.empty())
+ SetStorage.erase(MapIt);
+
+ auto VecIt = std::find(LinearStorage.begin(), LinearStorage.end(), VMP);
+ if (VecIt != LinearStorage.end()) {
+ LinearStorage.erase(VecIt);
+ } else {
+ llvm_unreachable("Inconsistent LinearStorage: VMP missing on remove");
+ }
+ }
+
+ template <typename Predicate> void remove_if(Predicate Pred) {
+ for (auto It = LinearStorage.begin(); It != LinearStorage.end();) {
+ const VRegMaskPair VMP = *It;
+ if (Pred(VMP)) {
+ It = LinearStorage.erase(It);
+ SetStorage[VMP.VReg].erase(VMP.LaneMask);
+ if (SetStorage[VMP.VReg].empty())
+ SetStorage.erase(VMP.VReg);
+ } else {
+ ++It;
}
-
- template <typename ContainerT,
- typename = std::enable_if_t<std::is_same<
- typename ContainerT::value_type, VRegMaskPair>::value>>
- VRegMaskPairSet(ContainerT &&Vec) {
- for (auto &&VMP : Vec)
- insert(std::move(VMP));
+ }
+ }
+
+ bool count(const VRegMaskPair &VMP) const {
+ auto It = SetStorage.find(VMP.VReg);
+ if (It == SetStorage.end())
+ return false;
+
+ return It->second.count(VMP.LaneMask) > 0;
+ }
+
+ bool contains(const VRegMaskPair &VMP) const {
+ auto It = SetStorage.find(VMP.VReg);
+ return It != SetStorage.end() && It->second.contains(VMP.LaneMask);
+ }
+
+ void clear() {
+ SetStorage.clear();
+ LinearStorage.clear();
+ }
+
+ size_t size() const { return LinearStorage.size(); }
+ bool empty() const { return LinearStorage.empty(); }
+
+ void sort(llvm::function_ref<bool(const VRegMaskPair &, const VRegMaskPair &)>
+ Cmp) {
+ std::sort(LinearStorage.begin(), LinearStorage.end(), Cmp);
+ }
+
+ VRegMaskPair pop_back_val() {
+ assert(!LinearStorage.empty() && "Pop from empty set");
+ VRegMaskPair VMP = LinearStorage.back();
+ LinearStorage.pop_back();
+
+ auto It = SetStorage.find(VMP.VReg);
+ assert(It != SetStorage.end() && "Inconsistent SetStorage");
+ It->second.erase(VMP.LaneMask);
+ if (It->second.empty())
+ SetStorage.erase(It);
+
+ return VMP;
+ }
+
+ LaneCoverageResult getCoverage(const VRegMaskPair &VMP) const {
+ LaneCoverageResult Result(VMP.LaneMask);
+ auto It = SetStorage.find(VMP.VReg);
+ if (It != SetStorage.end()) {
+ MaskSet Masks = It->second;
+ for (auto Mask : Masks) {
+ Result.Covered |= (Mask & VMP.LaneMask);
}
-
- bool insert(const VRegMaskPair &VMP) {
- auto &MaskSet = SetStorage[VMP.VReg];
- auto Inserted = MaskSet.insert(VMP.LaneMask);
- if (!Inserted.second)
- return false;
- LinearStorage.push_back(VMP);
+ Result.NotCovered = (VMP.LaneMask & ~Result.Covered);
+ }
+ return Result;
+ }
+
+ bool operator==(const VRegMaskPairSet &Other) const {
+ if (SetStorage.size() != Other.SetStorage.size())
+ return false;
+
+ for (const auto &Entry : SetStorage) {
+ auto It = Other.SetStorage.find(Entry.first);
+ if (It == Other.SetStorage.end())
+ return false;
+
+ if (Entry.second != It->second)
+ return false;
+ }
+
+ return true;
+ }
+
+ template <typename ContainerT>
+ VRegMaskPairSet &operator=(const ContainerT &Vec) {
+ static_assert(
+ std::is_same<typename ContainerT::value_type, VRegMaskPair>::value,
+ "Container must hold VRegMaskPair elements");
+
+ clear();
+ for (const auto &VMP : Vec)
+ insert(VMP);
+ return *this;
+ }
+
+ // Set operations based on subregister coverage logic
+
+ /// Adds all elements from Other whose (VReg, LaneMask) overlap with none
+ /// in *this.
+ void set_union(const VRegMaskPairSet &Other) {
+ for (const auto &VMP : Other)
+ insert(VMP);
+ }
+
+ /// Keeps only those elements in *this that are at least partially covered
+ /// by Other.
+ void set_intersect(const VRegMaskPairSet &Other) {
+ std::vector<VRegMaskPair> ToInsert;
+ remove_if([&](const VRegMaskPair &VMP) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (Cov.isFullyUncovered())
return true;
- }
-
- template <typename InputIt> void insert(InputIt First, InputIt Last) {
- for (auto It = First; It != Last; ++It)
- insert(*It);
- }
- void remove(const VRegMaskPair &VMP) {
- auto MapIt = SetStorage.find(VMP.VReg);
- if (MapIt == SetStorage.end())
- return;
-
- size_t Erased = MapIt->second.erase(VMP.LaneMask);
- if (!Erased)
- return;
-
- if (MapIt->second.empty())
- SetStorage.erase(MapIt);
-
- auto VecIt = std::find(LinearStorage.begin(), LinearStorage.end(), VMP);
- if (VecIt != LinearStorage.end()) {
- LinearStorage.erase(VecIt);
- } else {
- llvm_unreachable("Inconsistent LinearStorage: VMP missing on remove");
- }
- }
-
- template <typename Predicate> void remove_if(Predicate Pred) {
- for (auto It = LinearStorage.begin(); It != LinearStorage.end();) {
- const VRegMaskPair VMP = *It;
- if (Pred(VMP)) {
- It = LinearStorage.erase(It);
- SetStorage[VMP.VReg].erase(VMP.LaneMask);
- if (SetStorage[VMP.VReg].empty())
- SetStorage.erase(VMP.VReg);
- } else {
- ++It;
- }
- }
+ if (!Cov.isFullyCovered()) {
+ ToInsert.push_back({VMP.VReg, Cov.getCovered()});
+ return true; // remove current, will reinsert trimmed version
}
- bool count(const VRegMaskPair &VMP) const {
- auto It = SetStorage.find(VMP.VReg);
- if (It == SetStorage.end())
- return false;
-
- return It->second.count(VMP.LaneMask) > 0;
- }
-
- bool contains(const VRegMaskPair &VMP) const {
- auto It = SetStorage.find(VMP.VReg);
- return It != SetStorage.end() && It->second.contains(VMP.LaneMask);
- }
+ return false; // keep as-is
+ });
- void clear() {
- SetStorage.clear();
- LinearStorage.clear();
- }
-
- size_t size() const { return LinearStorage.size(); }
- bool empty() const { return LinearStorage.empty(); }
-
- void
- sort(llvm::function_ref<bool(const VRegMaskPair &, const VRegMaskPair &)>
- Cmp) {
- std::sort(LinearStorage.begin(), LinearStorage.end(), Cmp);
- }
-
- VRegMaskPair pop_back_val() {
- assert(!LinearStorage.empty() && "Pop from empty set");
- VRegMaskPair VMP = LinearStorage.back();
- LinearStorage.pop_back();
-
- auto It = SetStorage.find(VMP.VReg);
- assert(It != SetStorage.end() && "Inconsistent SetStorage");
- It->second.erase(VMP.LaneMask);
- if (It->second.empty())
- SetStorage.erase(It);
-
- return VMP;
- }
-
- LaneCoverageResult getCoverage(const VRegMaskPair &VMP) const {
- LaneCoverageResult Result(VMP.LaneMask);
- auto It = SetStorage.find(VMP.VReg);
- if (It != SetStorage.end()) {
- MaskSet Masks = It->second;
- for (auto Mask : Masks) {
- Result.Covered |= (Mask & VMP.LaneMask);
- }
- Result.NotCovered = (VMP.LaneMask & ~Result.Covered);
- }
- return Result;
- }
-
- bool operator==(const VRegMaskPairSet &Other) const {
- if (SetStorage.size() != Other.SetStorage.size())
- return false;
-
- for (const auto &Entry : SetStorage) {
- auto It = Other.SetStorage.find(Entry.first);
- if (It == Other.SetStorage.end())
- return false;
-
- if (Entry.second != It->second)
- return false;
- }
+ insert(ToInsert.begin(), ToInsert.end());
+ }
+ /// Removes elements from *this that are at least partially covered by
+ /// Other.
+ void set_subtract(const VRegMaskPairSet &Other) {
+ std::vector<VRegMaskPair> ToInsert;
+ remove_if([&](const VRegMaskPair &VMP) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (Cov.isFullyCovered())
return true;
- }
-
- template <typename ContainerT>
- VRegMaskPairSet &operator=(const ContainerT &Vec) {
- static_assert(
- std::is_same<typename ContainerT::value_type, VRegMaskPair>::value,
- "Container must hold VRegMaskPair elements");
- clear();
- for (const auto &VMP : Vec)
- insert(VMP);
- return *this;
+ if (!Cov.isFullyUncovered()) {
+ ToInsert.push_back({VMP.VReg, Cov.getNotCovered()});
+ return true; // remove and reinsert uncovered part
}
- // Set operations based on subregister coverage logic
-
- /// Adds all elements from Other whose (VReg, LaneMask) overlap with none
- /// in *this.
- void set_union(const VRegMaskPairSet &Other) {
- for (const auto &VMP : Other)
- insert(VMP);
+ return false;
+ });
+
+ insert(ToInsert.begin(), ToInsert.end());
+ }
+
+ /// Returns the union (join) of this set and Other under coverage logic.
+ VRegMaskPairSet set_join(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result = *this;
+ Result.set_union(Other);
+ return Result;
+ }
+
+ /// Returns the intersection of this set and Other based on partial
+ /// overlap.
+ VRegMaskPairSet set_intersection(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result;
+ for (const auto &VMP : *this) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (!Cov.isFullyUncovered()) {
+ Result.insert({VMP.VReg, Cov.getCovered()});
}
-
- /// Keeps only those elements in *this that are at least partially covered
- /// by Other.
- void set_intersect(const VRegMaskPairSet &Other) {
- std::vector<VRegMaskPair> ToInsert;
- remove_if([&](const VRegMaskPair &VMP) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (Cov.isFullyUncovered())
- return true;
-
- if (!Cov.isFullyCovered()) {
- ToInsert.push_back({VMP.VReg, Cov.getCovered()});
- return true; // remove current, will reinsert trimmed version
- }
-
- return false; // keep as-is
- });
-
- insert(ToInsert.begin(), ToInsert.end());
+ }
+ return Result;
+ }
+
+ /// Returns all elements of *this that do not overlap with anything in
+ /// Other.
+ VRegMaskPairSet set_difference(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result;
+ for (const auto &VMP : *this) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (!Cov.isFullyCovered()) {
+ Result.insert({VMP.VReg, Cov.getNotCovered()});
}
-
- /// Removes elements from *this that are at least partially covered by
- /// Other.
- void set_subtract(const VRegMaskPairSet &Other) {
- std::vector<VRegMaskPair> ToInsert;
- remove_if([&](const VRegMaskPair &VMP) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (Cov.isFullyCovered())
- return true;
-
- if (!Cov.isFullyUncovered()) {
- ToInsert.push_back({VMP.VReg, Cov.getNotCovered()});
- return true; // remove and reinsert uncovered part
- }
-
- return false;
- });
-
- insert(ToInsert.begin(), ToInsert.end());
+ }
+ return Result;
+ }
+
+ // Debug
+ void dump() const {
+ dbgs() << "=== VRegMaskPairSet Dump ===\n";
+
+ dbgs() << "SetStorage:\n";
+ for (const auto &Entry : SetStorage) {
+ dbgs() << " VReg: " << printReg(Entry.first) << " => { ";
+ for (const auto &Mask : Entry.second) {
+ dbgs() << PrintLaneMask(Mask) << " ";
}
-
- /// Returns the union (join) of this set and Other under coverage logic.
- VRegMaskPairSet set_join(const VRegMaskPairSet &Other) const {
- VRegMaskPairSet Result = *this;
- Result.set_union(Other);
- return Result;
- }
-
- /// Returns the intersection of this set and Other based on partial
- /// overlap.
- VRegMaskPairSet set_intersection(const VRegMaskPairSet &Other) const {
- VRegMaskPairSet Result;
- for (const auto &VMP : *this) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (!Cov.isFullyUncovered()) {
- Result.insert({VMP.VReg, Cov.getCovered()});
- }
- }
- return Result;
- }
-
- /// Returns all elements of *this that do not overlap with anything in
- /// Other.
- VRegMaskPairSet set_difference(const VRegMaskPairSet &Other) const {
- VRegMaskPairSet Result;
- for (const auto &VMP : *this) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (!Cov.isFullyCovered()) {
- Result.insert({VMP.VReg, Cov.getNotCovered()});
- }
- }
- return Result;
- }
-
- // Debug
- void dump() const {
- dbgs() << "=== VRegMaskPairSet Dump ===\n";
-
- dbgs() << "SetStorage:\n";
- for (const auto &Entry : SetStorage) {
- dbgs() << " VReg: " << printReg(Entry.first) << " => { ";
- for (const auto &Mask : Entry.second) {
- dbgs() << PrintLaneMask(Mask) << " ";
- }
- dbgs() << "}\n";
- }
-
- dbgs() << "LinearStorage (insertion order):\n";
- for (const auto &VMP : LinearStorage) {
- dbgs() << " (" << printReg(VMP.getVReg()) << ", "
- << PrintLaneMask(VMP.getLaneMask()) << ")\n";
- }
-
- dbgs() << "=============================\n";
- }
-
- // Iterators
- using iterator = LinearStorageT::const_iterator;
- iterator begin() const { return LinearStorage.begin(); }
- iterator end() const { return LinearStorage.end(); }
- };
-
- namespace llvm {
- template <> struct DenseMapInfo<VRegMaskPair> {
- static inline VRegMaskPair getEmptyKey() {
- return {Register(DenseMapInfo<unsigned>::getEmptyKey()),
- LaneBitmask(0xFFFFFFFFFFFFFFFFULL)};
- }
-
- static inline VRegMaskPair getTombstoneKey() {
- return {Register(DenseMapInfo<unsigned>::getTombstoneKey()),
- LaneBitmask(0xFFFFFFFFFFFFFFFEULL)};
- }
-
- static unsigned getHashValue(const VRegMaskPair &P) {
- return DenseMapInfo<unsigned>::getHashValue(P.getVReg().id()) ^
- DenseMapInfo<uint64_t>::getHashValue(
- P.getLaneMask().getAsInteger());
- }
-
- static bool isEqual(const VRegMaskPair &LHS, const VRegMaskPair &RHS) {
- return DenseMapInfo<unsigned>::isEqual(LHS.getVReg().id(),
- RHS.getVReg().id()) &&
- DenseMapInfo<uint64_t>::isEqual(
- LHS.getLaneMask().getAsInteger(),
- RHS.getLaneMask().getAsInteger());
- }
- };
-
- } // namespace llvm
+ dbgs() << "}\n";
+ }
+
+ dbgs() << "LinearStorage (insertion order):\n";
+ for (const auto &VMP : LinearStorage) {
+ dbgs() << " (" << printReg(VMP.getVReg()) << ", "
+ << PrintLaneMask(VMP.getLaneMask()) << ")\n";
+ }
+
+ dbgs() << "=============================\n";
+ }
+
+ // Iterators
+ using iterator = LinearStorageT::const_iterator;
+ iterator begin() const { return LinearStorage.begin(); }
+ iterator end() const { return LinearStorage.end(); }
+};
+
+namespace llvm {
+template <> struct DenseMapInfo<VRegMaskPair> {
+ static inline VRegMaskPair getEmptyKey() {
+ return {Register(DenseMapInfo<unsigned>::getEmptyKey()),
+ LaneBitmask(0xFFFFFFFFFFFFFFFFULL)};
+ }
+
+ static inline VRegMaskPair getTombstoneKey() {
+ return {Register(DenseMapInfo<unsigned>::getTombstoneKey()),
+ LaneBitmask(0xFFFFFFFFFFFFFFFEULL)};
+ }
+
+ static unsigned getHashValue(const VRegMaskPair &P) {
+ return DenseMapInfo<unsigned>::getHashValue(P.getVReg().id()) ^
+ DenseMapInfo<uint64_t>::getHashValue(P.getLaneMask().getAsInteger());
+ }
+
+ static bool isEqual(const VRegMaskPair &LHS, const VRegMaskPair &RHS) {
+ return DenseMapInfo<unsigned>::isEqual(LHS.getVReg().id(),
+ RHS.getVReg().id()) &&
+ DenseMapInfo<uint64_t>::isEqual(LHS.getLaneMask().getAsInteger(),
+ RHS.getLaneMask().getAsInteger());
+ }
+};
+
+} // namespace llvm
#endif // LLVM_LIB_TARGET_VREGMASKPAIR_H
\ No newline at end of file
More information about the llvm-commits
mailing list