[llvm] SSA regalloc integration draft (PR #156049)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 29 09:03:22 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h llvm/lib/Target/AMDGPU/AMDGPURebuildSSA.cpp llvm/lib/Target/AMDGPU/AMDGPUSSARAUtils.h llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.cpp llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.h llvm/lib/Target/AMDGPU/VRegMaskPair.h llvm/unittests/CodeGen/VRegMaskPairTest.cpp llvm/include/llvm/CodeGen/MachineSSAUpdater.h llvm/include/llvm/CodeGen/TargetInstrInfo.h llvm/lib/CodeGen/MachineSSAUpdater.cpp llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.h llvm/lib/Target/X86/X86InstrInfo.cpp llvm/lib/Target/X86/X86InstrInfo.h
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 58d8feb01..65c4c2464 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1159,8 +1159,8 @@ public:
MachineBasicBlock::iterator MI,
Register SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- Register VReg, unsigned SubRegIdx = 0) const {
+ const TargetRegisterInfo *TRI, Register VReg,
+ unsigned SubRegIdx = 0) const {
llvm_unreachable("Target didn't implement "
"TargetInstrInfo::storeRegToStackSlot!");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
index 0c2feca1e..028826182 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
@@ -24,8 +24,7 @@
using namespace llvm;
-//namespace {
-
+// namespace {
void NextUseResult::init(const MachineFunction &MF) {
TG = new TimerGroup("Next Use Analysis",
@@ -168,17 +167,17 @@ void NextUseResult::analyze(const MachineFunction &MF) {
Changed |= Changed4MBB;
}
- }
- dumpUsedInBlock();
+ }
+ dumpUsedInBlock();
// Dump complete analysis results for testing
LLVM_DEBUG(dumpAllNextUseDistances(MF));
- T1->stopTimer();
- LLVM_DEBUG(TG->print(llvm::errs()));
- }
+ T1->stopTimer();
+ LLVM_DEBUG(TG->print(llvm::errs()));
+}
void NextUseResult::getFromSortedRecords(
const VRegDistances::SortedRecords Dists, LaneBitmask Mask, unsigned &D) {
- LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(Mask) <<"]\n");
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(Mask) << "]\n");
for (auto P : Dists) {
// Records are sorted in distance increasing order. So, the first record
// is for the closest use.
@@ -203,7 +202,8 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock::iterator I,
if (NextUseMap[MBBNum].InstrDist[&*I].contains(VMP.getVReg())) {
VRegDistances::SortedRecords Dists =
NextUseMap[MBBNum].InstrDist[&*I][VMP.getVReg()];
- LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n");
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask())
+ << "]\n");
for (auto P : reverse(Dists)) {
LaneBitmask UseMask = P.first;
LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask)
@@ -224,8 +224,10 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
unsigned MBBNum = MBB.getNumber();
if (NextUseMap.contains(MBBNum) &&
NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
- VRegDistances::SortedRecords Dists = NextUseMap[MBBNum].Bottom[VMP.getVReg()];
- LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n");
+ VRegDistances::SortedRecords Dists =
+ NextUseMap[MBBNum].Bottom[VMP.getVReg()];
+ LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask())
+ << "]\n");
for (auto P : reverse(Dists)) {
LaneBitmask UseMask = P.first;
LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask) << "]\n");
@@ -238,8 +240,7 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
}
void NextUseResult::dumpUsedInBlock() {
- LLVM_DEBUG(for (auto P
- : UsedInBlock) {
+ LLVM_DEBUG(for (auto P : UsedInBlock) {
dbgs() << "MBB_" << P.first << ":\n";
for (auto VMP : P.second) {
dbgs() << "[ " << printReg(VMP.getVReg()) << " : <"
@@ -271,8 +272,8 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
unsigned MBBNum = MBB.getNumber();
if (NextUseMap.contains(MBBNum)) {
if (NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
- getFromSortedRecords(NextUseMap[MBBNum].Bottom[VMP.getVReg()], VMP.getLaneMask(),
- Dist);
+ getFromSortedRecords(NextUseMap[MBBNum].Bottom[VMP.getVReg()],
+ VMP.getLaneMask(), Dist);
}
}
return Dist;
@@ -310,8 +311,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUNextUseAnalysisWrapper, "amdgpu-next-use",
"AMDGPU Next Use Analysis", false, false)
-bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(
- MachineFunction &MF) {
+bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(MachineFunction &MF) {
NU.Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
NU.LI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
NU.MRI = &MF.getRegInfo();
@@ -319,12 +319,11 @@ bool AMDGPUNextUseAnalysisWrapper::runOnMachineFunction(
assert(NU.MRI->isSSA());
NU.init(MF);
NU.analyze(MF);
-// LLVM_DEBUG(NU.dump());
+ // LLVM_DEBUG(NU.dump());
return false;
}
-void AMDGPUNextUseAnalysisWrapper::getAnalysisUsage(
- AnalysisUsage &AU) const {
+void AMDGPUNextUseAnalysisWrapper::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<SlotIndexesWrapperPass>();
@@ -336,44 +335,45 @@ AMDGPUNextUseAnalysisWrapper::AMDGPUNextUseAnalysisWrapper()
initializeAMDGPUNextUseAnalysisWrapperPass(*PassRegistry::getPassRegistry());
}
void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "=== NextUseAnalysis Results for " << MF.getName() << " ===\n");
-
+ LLVM_DEBUG(dbgs() << "=== NextUseAnalysis Results for " << MF.getName()
+ << " ===\n");
+
for (const auto &MBB : MF) {
unsigned MBBNum = MBB.getNumber();
LLVM_DEBUG(dbgs() << "\n--- MBB_" << MBBNum << " ---\n");
-
+
if (!NextUseMap.contains(MBBNum)) {
LLVM_DEBUG(dbgs() << " No analysis data for this block\n");
continue;
}
-
+
const NextUseInfo &Info = NextUseMap.at(MBBNum);
-
+
// Process each instruction in the block
for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
const MachineInstr &MI = *II;
-
+
// Print instruction
LLVM_DEBUG(dbgs() << " Instr: ");
- LLVM_DEBUG(MI.print(dbgs(), /*IsStandalone=*/false, /*SkipOpers=*/false,
- /*SkipDebugLoc=*/true, /*AddNewLine=*/false));
+ LLVM_DEBUG(MI.print(dbgs(), /*IsStandalone=*/false, /*SkipOpers=*/false,
+ /*SkipDebugLoc=*/true, /*AddNewLine=*/false));
LLVM_DEBUG(dbgs() << "\n");
-
+
// Print distances at this instruction
if (Info.InstrDist.contains(&MI)) {
const VRegDistances &Dists = Info.InstrDist.at(&MI);
LLVM_DEBUG(dbgs() << " Next-use distances:\n");
-
+
for (const auto &VRegEntry : Dists) {
unsigned VReg = VRegEntry.getFirst();
const auto &Records = VRegEntry.getSecond();
-
+
for (const auto &Record : Records) {
LaneBitmask LaneMask = Record.first;
unsigned Distance = Record.second;
-
+
LLVM_DEBUG(dbgs() << " ");
-
+
// Print register with sub-register if applicable
LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
if (LaneMask != FullMask) {
@@ -382,7 +382,7 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
} else {
LLVM_DEBUG(dbgs() << printReg(VReg, TRI));
}
-
+
if (Distance == Infinity) {
LLVM_DEBUG(dbgs() << " -> DEAD (infinite distance)\n");
} else {
@@ -390,7 +390,7 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
}
}
}
-
+
if (Dists.size() == 0) {
LLVM_DEBUG(dbgs() << " (no register uses)\n");
}
@@ -398,19 +398,19 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
LLVM_DEBUG(dbgs() << " (no distance data)\n");
}
}
-
+
// Print distances at end of block
LLVM_DEBUG(dbgs() << " Block End Distances:\n");
for (const auto &VRegEntry : Info.Bottom) {
unsigned VReg = VRegEntry.getFirst();
const auto &Records = VRegEntry.getSecond();
-
+
for (const auto &Record : Records) {
LaneBitmask LaneMask = Record.first;
unsigned Distance = Record.second;
-
+
LLVM_DEBUG(dbgs() << " ");
-
+
LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(VReg);
if (LaneMask != FullMask) {
unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
@@ -418,7 +418,7 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
} else {
LLVM_DEBUG(dbgs() << printReg(VReg, TRI));
}
-
+
if (Distance == Infinity) {
LLVM_DEBUG(dbgs() << " -> DEAD\n");
} else {
@@ -426,11 +426,11 @@ void NextUseResult::dumpAllNextUseDistances(const MachineFunction &MF) {
}
}
}
-
+
if (Info.Bottom.size() == 0) {
LLVM_DEBUG(dbgs() << " (no registers live at block end)\n");
}
}
-
+
LLVM_DEBUG(dbgs() << "\n=== End NextUseAnalysis Results ===\n");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
index 22536f5de..aeebaa6e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
@@ -14,9 +14,9 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "SIRegisterInfo.h"
-#include "GCNSubtarget.h"
#include "AMDGPUSSARAUtils.h"
+#include "GCNSubtarget.h"
+#include "SIRegisterInfo.h"
#include "VRegMaskPair.h"
#include <algorithm>
@@ -27,7 +27,6 @@ using namespace llvm;
// namespace {
-
class NextUseResult {
friend class AMDGPUNextUseAnalysisWrapper;
SlotIndexes *Indexes;
@@ -80,9 +79,7 @@ class NextUseResult {
return Keys;
}
- bool contains(unsigned Key) {
- return NextUseMap.contains(Key);
- }
+ bool contains(unsigned Key) { return NextUseMap.contains(Key); }
bool insert(VRegMaskPair VMP, unsigned Dist) {
Record R(VMP.getLaneMask(), Dist);
@@ -114,21 +111,22 @@ class NextUseResult {
void clear(VRegMaskPair VMP) {
if (NextUseMap.contains(VMP.getVReg())) {
auto &Dists = NextUseMap[VMP.getVReg()];
- std::erase_if(Dists,
- [&](Record R) { return (R.first &= ~VMP.getLaneMask()).none(); });
+ std::erase_if(Dists, [&](Record R) {
+ return (R.first &= ~VMP.getLaneMask()).none();
+ });
if (Dists.empty())
NextUseMap.erase(VMP.getVReg());
}
}
- bool operator == (const VRegDistances Other) const {
-
+ bool operator==(const VRegDistances Other) const {
+
if (Other.size() != size())
return false;
for (auto P : NextUseMap) {
unsigned Key = P.getFirst();
-
+
std::pair<bool, SortedRecords> OtherDists = Other.get(P.getFirst());
if (!OtherDists.first)
return false;
@@ -181,7 +179,7 @@ class NextUseResult {
};
class NextUseInfo {
// FIXME: need to elaborate proper class interface!
- public:
+ public:
VRegDistances Bottom;
DenseMap<const MachineInstr *, VRegDistances> InstrDist;
};
@@ -189,8 +187,6 @@ class NextUseResult {
DenseMap<unsigned, NextUseInfo> NextUseMap;
public:
-
-
private:
DenseMap<unsigned, SetVector<VRegMaskPair>> UsedInBlock;
DenseMap<int, int> LoopExits;
@@ -247,9 +243,8 @@ public:
getSortedSubregUses(const MachineBasicBlock::iterator I,
const VRegMaskPair VMP);
- SmallVector<VRegMaskPair>
- getSortedSubregUses(const MachineBasicBlock &MBB,
- const VRegMaskPair VMP);
+ SmallVector<VRegMaskPair> getSortedSubregUses(const MachineBasicBlock &MBB,
+ const VRegMaskPair VMP);
bool isDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const VRegMaskPair VMP) {
@@ -270,7 +265,7 @@ public:
: getNextUseDistance(I, VMP) == Infinity;
}
- SetVector<VRegMaskPair>& usedInBlock(MachineBasicBlock &MBB) {
+ SetVector<VRegMaskPair> &usedInBlock(MachineBasicBlock &MBB) {
return UsedInBlock[MBB.getNumber()];
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.cpp
index 9143e111c..ab1fed5d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.cpp
@@ -8,8 +8,8 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/Timer.h"
#include "llvm/Target/TargetMachine.h"
@@ -23,9 +23,7 @@ using namespace llvm;
namespace {
-
-
-class AMDGPUSSASpiller : public PassInfoMixin <AMDGPUSSASpiller> {
+class AMDGPUSSASpiller : public PassInfoMixin<AMDGPUSSASpiller> {
LiveIntervals &LIS;
MachineLoopInfo &LI;
MachineDominatorTree &MDT;
@@ -73,7 +71,7 @@ class AMDGPUSSASpiller : public PassInfoMixin <AMDGPUSSASpiller> {
Timer *T3;
Timer *T4;
struct SpillInfo {
- //MachineBasicBlock *Parent;
+ // MachineBasicBlock *Parent;
RegisterSet ActiveSet;
RegisterSet SpillSet;
};
@@ -96,7 +94,7 @@ class AMDGPUSSASpiller : public PassInfoMixin <AMDGPUSSASpiller> {
dbgs() << printReg(P.VReg) << "]\n";
}
- #ifndef NDEBUG
+#ifndef NDEBUG
void dump() {
for (auto SI : RegisterMap) {
dbgs() << "\nMBB: " << SI.first;
@@ -111,12 +109,10 @@ class AMDGPUSSASpiller : public PassInfoMixin <AMDGPUSSASpiller> {
dbgs() << "\n";
}
}
- #endif
+#endif
void init(MachineFunction &MF, bool IsVGPRs) {
IsVGPRsPass = IsVGPRs;
-
-
NumAvailableRegs =
IsVGPRsPass ? ST->getMaxNumVGPRs(MF) : ST->getMaxNumSGPRs(MF);
@@ -147,8 +143,9 @@ class AMDGPUSSASpiller : public PassInfoMixin <AMDGPUSSASpiller> {
unsigned getLoopMaxRP(MachineLoop *L);
// Returns number of spilled VRegs
- unsigned limit(MachineBasicBlock &MBB, RegisterSet &Active, RegisterSet &Spilled,
- MachineBasicBlock::iterator I, unsigned Limit);
+ unsigned limit(MachineBasicBlock &MBB, RegisterSet &Active,
+ RegisterSet &Spilled, MachineBasicBlock::iterator I,
+ unsigned Limit);
unsigned getRegSetSizeInRegs(const RegisterSet VRegs);
@@ -163,7 +160,7 @@ class AMDGPUSSASpiller : public PassInfoMixin <AMDGPUSSASpiller> {
bool BlockEnd = I == MBB.end();
for (auto VMP : VRegs)
M[VMP] = BlockEnd ? NU.getNextUseDistance(MBB, VMP)
- : NU.getNextUseDistance(I, VMP);
+ : NU.getNextUseDistance(I, VMP);
auto SortByDist = [&](const VRegMaskPair LHS, const VRegMaskPair RHS) {
return M[LHS] < M[RHS];
@@ -207,8 +204,7 @@ public:
};
#ifndef NDEBUG
-LLVM_ATTRIBUTE_NOINLINE void
-AMDGPUSSASpiller::dumpRegSet(RegisterSet VMPs) {
+LLVM_ATTRIBUTE_NOINLINE void AMDGPUSSASpiller::dumpRegSet(RegisterSet VMPs) {
dbgs() << "\n";
for (auto P : VMPs) {
printVRegMaskPair(P);
@@ -239,10 +235,10 @@ AMDGPUSSASpiller::getBlockInfo(const MachineBasicBlock &MBB) {
void AMDGPUSSASpiller::processFunction(MachineFunction &MF) {
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
-
+
// T1->startTimer();
for (auto MBB : RPOT) {
-
+
// T3->startTimer();
if (LI.isLoopHeader(MBB)) {
initActiveSetLoopHeader(*MBB);
@@ -275,7 +271,7 @@ void AMDGPUSSASpiller::processBlock(MachineBasicBlock &MBB) {
auto &Entry = RegisterMap[MBB.getNumber()];
RegisterSet &Active = Entry.ActiveSet;
RegisterSet &Spilled = Entry.SpillSet;
-
+
// for (MachineBasicBlock::iterator I : MBB) {
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
RegisterSet Reloads;
@@ -302,12 +298,12 @@ void AMDGPUSSASpiller::processBlock(MachineBasicBlock &MBB) {
assert(Spilled.getCoverage(SpilledVMP).isFullyCovered() &&
"Instruction register operand is neither live no "
"spilled!");
-
- if (!U.isUndef()) {
- Reloads.insert(SpilledVMP);
- }
+
+ if (!U.isUndef()) {
+ Reloads.insert(SpilledVMP);
}
}
+ }
if (I->isPHI()) {
// We don't need to make room for the PHI-defined values as they will be
@@ -357,7 +353,6 @@ void AMDGPUSSASpiller::processBlock(MachineBasicBlock &MBB) {
// T4->startTimer();
-
Active.insert(Defs.begin(), Defs.end());
// Add reloads for VRegs in Reloads before I
for (auto R : Reloads) {
@@ -423,12 +418,12 @@ void AMDGPUSSASpiller::processBlock(MachineBasicBlock &MBB) {
auto *RC = VMP.getRegClass(MRI, TRI);
Register FullVReg = MRI->createVirtualRegister(RC);
BuildMI(MBB, MBB.getFirstInstrTerminator(),
- MBB.getFirstInstrTerminator()->getDebugLoc(),
- TII->get(AMDGPU::REG_SEQUENCE), FullVReg)
- .addReg(NewVReg, 0, SpilledSubReg)
- .addImm(SpilledSubReg)
- .addReg(VMP.getVReg(), 0, ActiveSubReg)
- .addImm(ActiveSubReg);
+ MBB.getFirstInstrTerminator()->getDebugLoc(),
+ TII->get(AMDGPU::REG_SEQUENCE), FullVReg)
+ .addReg(NewVReg, 0, SpilledSubReg)
+ .addImm(SpilledSubReg)
+ .addReg(VMP.getVReg(), 0, ActiveSubReg)
+ .addImm(ActiveSubReg);
NewVReg = FullVReg;
}
rewriteUses(VMP.getVReg(), NewVReg);
@@ -514,7 +509,7 @@ void AMDGPUSSASpiller::connectToPredecessors(MachineBasicBlock &MBB,
// fail if the CF reached BB3 along the BB0 -> BB3 edge]
// set_intersect(Entry.SpillSet, Entry.ActiveSet);
- DenseMap<MachineBasicBlock*, RegisterSet> ToSpill;
+ DenseMap<MachineBasicBlock *, RegisterSet> ToSpill;
for (auto Pred : Preds) {
if (Pred == &MBB)
continue;
@@ -623,8 +618,8 @@ void AMDGPUSSASpiller::initActiveSetUsualBlock(MachineBasicBlock &MBB) {
if (Take.empty() && Cand.empty())
return;
- LLVM_DEBUG(dbgs()<< "Take : "; dumpRegSet(Take));
- LLVM_DEBUG(dbgs()<< "Cand : "; dumpRegSet(Cand));
+ LLVM_DEBUG(dbgs() << "Take : "; dumpRegSet(Take));
+ LLVM_DEBUG(dbgs() << "Cand : "; dumpRegSet(Cand));
unsigned TakeSize = fillActiveSet(MBB, Take);
if (TakeSize < NumAvailableRegs) {
@@ -658,7 +653,7 @@ void AMDGPUSSASpiller::initActiveSetLoopHeader(MachineBasicBlock &MBB) {
auto &Entry = RegisterMap[MBB.getNumber()];
auto &Spilled = Entry.SpillSet;
for (auto P : predecessors(&MBB)) {
- Spilled.set_union(getBlockInfo(*P).SpillSet);
+ Spilled.set_union(getBlockInfo(*P).SpillSet);
}
RegisterSet UsedInLoop;
@@ -702,7 +697,8 @@ void AMDGPUSSASpiller::initActiveSetLoopHeader(MachineBasicBlock &MBB) {
getBlockInfo(MBB).ActiveSet.dump());
}
-Register AMDGPUSSASpiller::reloadAtEnd(MachineBasicBlock &MBB, VRegMaskPair VMP) {
+Register AMDGPUSSASpiller::reloadAtEnd(MachineBasicBlock &MBB,
+ VRegMaskPair VMP) {
return reloadBefore(*MBB.getFirstInstrTerminator(), VMP);
}
@@ -748,8 +744,7 @@ void AMDGPUSSASpiller::spillBefore(MachineBasicBlock &MBB,
SpillPoints[VMP] = &Spill;
}
-void AMDGPUSSASpiller::rewriteUses(Register OldVReg,
- Register NewVReg) {
+void AMDGPUSSASpiller::rewriteUses(Register OldVReg, Register NewVReg) {
MachineInstr *DefMI = MRI->getVRegDef(NewVReg);
assert(DefMI);
MachineBasicBlock *ReloadBB = DefMI->getParent();
@@ -810,7 +805,6 @@ unsigned AMDGPUSSASpiller::limit(MachineBasicBlock &MBB, RegisterSet &Active,
return NumSpills;
}
-
sortRegSetAt(MBB, I, Active);
RegisterSet ToSpill;
@@ -863,7 +857,7 @@ unsigned AMDGPUSSASpiller::limit(MachineBasicBlock &MBB, RegisterSet &Active,
if (!ToSpill.empty()) {
dbgs() << "\nActive set after spilling:\n";
- dumpRegSet(Active);
+ dumpRegSet(Active);
dbgs() << "\nSpilled set after spilling:\n";
dumpRegSet(Spilled);
}
@@ -878,10 +872,6 @@ unsigned AMDGPUSSASpiller::limit(MachineBasicBlock &MBB, RegisterSet &Active,
return NumSpills;
}
-
-
-
-
unsigned AMDGPUSSASpiller::getRegSetSizeInRegs(const RegisterSet VRegs) {
unsigned Size = 0;
for (auto VMP : VRegs) {
@@ -970,7 +960,8 @@ bool AMDGPUSSASpillerLegacy::runOnMachineFunction(MachineFunction &MF) {
MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
AMDGPUNextUseAnalysis::Result &NU =
getAnalysis<AMDGPUNextUseAnalysisWrapper>().getNU();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ MachineDominatorTree &MDT =
+ getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
AMDGPUSSASpiller Impl(LIS, LI, MDT, NU);
return Impl.run(MF);
}
@@ -993,8 +984,8 @@ FunctionPass *llvm::createAMDGPUSSASpillerLegacyPass() {
}
llvm::PassPluginLibraryInfo getAMDGPUSSASpillerPassPluginInfo() {
- return {LLVM_PLUGIN_API_VERSION, "AMDGPUSSASpiller",
- LLVM_VERSION_STRING, [](PassBuilder &PB) {
+ return {LLVM_PLUGIN_API_VERSION, "AMDGPUSSASpiller", LLVM_VERSION_STRING,
+ [](PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[](StringRef Name, MachineFunctionPassManager &MFPM,
ArrayRef<PassBuilder::PipelineElement>) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.h b/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.h
index e3468604f..9ccb69eb9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSSASpiller.h
@@ -1,4 +1,5 @@
-//===- AMDGPUSSASpiller.h ----------------------------------------*- C++- *-===//
+//===- AMDGPUSSASpiller.h ----------------------------------------*- C++-
+//*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 685488183..372539617 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1883,10 +1883,12 @@ unsigned SIInstrInfo::getVectorRegSpillRestoreOpcode(
return getVGPRSpillRestoreOpcode(Size);
}
-void SIInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
- int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg) const {
+void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ Register VReg) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 22c2bcc07..c7e130ed4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -294,7 +294,8 @@ public:
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg) const override;
+ const TargetRegisterInfo *TRI,
+ Register VReg) const override;
bool expandPostRAPseudo(MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/AMDGPU/VRegMaskPair.h b/llvm/lib/Target/AMDGPU/VRegMaskPair.h
index de4e8b818..a53154b89 100644
--- a/llvm/lib/Target/AMDGPU/VRegMaskPair.h
+++ b/llvm/lib/Target/AMDGPU/VRegMaskPair.h
@@ -1,5 +1,5 @@
//===------- VRegMaskPair.h ----------------------------------------*-
-//C++-*-===//
+// C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -21,11 +21,11 @@
#ifndef LLVM_LIB_TARGET_VREGMASKPAIR_H
#define LLVM_LIB_TARGET_VREGMASKPAIR_H
-#include "llvm/CodeGen/Register.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
@@ -54,350 +54,345 @@ public:
VReg = MO.getReg();
LaneMask = MO.getSubReg() ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
: MRI->getMaxLaneMaskForVReg(VReg);
- }
+ }
+
+ const Register getVReg() const { return VReg; }
+ const LaneBitmask getLaneMask() const { return LaneMask; }
+
+ unsigned getSubReg(const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI) const {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask == Mask)
+ return AMDGPU::NoRegister;
+ return getSubRegIndexForLaneMask(LaneMask, TRI);
+ }
+
+ const TargetRegisterClass *getRegClass(const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI) const {
+ const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, VReg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
+ if (LaneMask != Mask) {
+ unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
+ return TRI->getSubRegisterClass(RC, SubRegIdx);
+ }
+ return RC;
+ }
+
+ unsigned getSizeInRegs(const SIRegisterInfo *TRI) const {
+ return TRI->getNumCoveredRegs(LaneMask);
+ }
+
+ bool operator==(const VRegMaskPair &other) const {
+ return VReg == other.VReg && LaneMask == other.LaneMask;
+ }
+};
+
+class LaneCoverageResult {
+ friend class VRegMaskPairSet;
+ LaneBitmask Data;
+ LaneBitmask Covered;
+ LaneBitmask NotCovered;
- const Register getVReg() const { return VReg; }
- const LaneBitmask getLaneMask() const { return LaneMask; }
+public:
+ LaneCoverageResult() = default;
+ LaneCoverageResult(const LaneBitmask Mask) : Data(Mask), NotCovered(Mask) {};
+ bool isFullyCovered() { return Data == Covered; }
+ bool isFullyUncovered() { return Data == NotCovered; }
+ LaneBitmask getCovered() { return Covered; }
+ LaneBitmask getNotCovered() { return NotCovered; }
+};
- unsigned getSubReg(const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI) const {
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
- if (LaneMask == Mask)
- return AMDGPU::NoRegister;
- return getSubRegIndexForLaneMask(LaneMask, TRI);
- }
+class VRegMaskPairSet {
- const TargetRegisterClass *getRegClass(const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI) const {
- const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, VReg);
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(VReg);
- if (LaneMask != Mask) {
- unsigned SubRegIdx = getSubRegIndexForLaneMask(LaneMask, TRI);
- return TRI->getSubRegisterClass(RC, SubRegIdx);
- }
- return RC;
- }
+ using MaskSet = std::set<LaneBitmask>;
+ using SetStorageT = DenseMap<Register, MaskSet>;
+ using LinearStorageT = std::vector<VRegMaskPair>;
- unsigned getSizeInRegs(const SIRegisterInfo *TRI) const {
- return TRI->getNumCoveredRegs(LaneMask);
- }
+ SetStorageT SetStorage;
+ LinearStorageT LinearStorage;
- bool operator==(const VRegMaskPair &other) const {
- return VReg == other.VReg && LaneMask == other.LaneMask;
- }
- };
-
- class LaneCoverageResult {
- friend class VRegMaskPairSet;
- LaneBitmask Data;
- LaneBitmask Covered;
- LaneBitmask NotCovered;
-
- public:
- LaneCoverageResult() = default;
- LaneCoverageResult(const LaneBitmask Mask)
- : Data(Mask), NotCovered(Mask){};
- bool isFullyCovered() { return Data == Covered; }
- bool isFullyUncovered() { return Data == NotCovered; }
- LaneBitmask getCovered() { return Covered; }
- LaneBitmask getNotCovered() { return NotCovered; }
- };
-
- class VRegMaskPairSet {
-
- using MaskSet = std::set<LaneBitmask>;
- using SetStorageT = DenseMap<Register, MaskSet>;
- using LinearStorageT = std::vector<VRegMaskPair>;
-
- SetStorageT SetStorage;
- LinearStorageT LinearStorage;
-
- public:
-
- VRegMaskPairSet() = default;
-
- template <typename ContainerT,
- typename = std::enable_if_t<std::is_same<
- typename ContainerT::value_type, VRegMaskPair>::value>>
- VRegMaskPairSet(const ContainerT &Vec) {
- for (const auto &VMP : Vec)
- insert(VMP);
+public:
+ VRegMaskPairSet() = default;
+
+ template <typename ContainerT,
+ typename = std::enable_if_t<std::is_same<
+ typename ContainerT::value_type, VRegMaskPair>::value>>
+ VRegMaskPairSet(const ContainerT &Vec) {
+ for (const auto &VMP : Vec)
+ insert(VMP);
+ }
+
+ template <typename ContainerT,
+ typename = std::enable_if_t<std::is_same<
+ typename ContainerT::value_type, VRegMaskPair>::value>>
+ VRegMaskPairSet(ContainerT &&Vec) {
+ for (auto &&VMP : Vec)
+ insert(std::move(VMP));
+ }
+
+ bool insert(const VRegMaskPair &VMP) {
+ auto &MaskSet = SetStorage[VMP.VReg];
+ auto Inserted = MaskSet.insert(VMP.LaneMask);
+ if (!Inserted.second)
+ return false;
+ LinearStorage.push_back(VMP);
+ return true;
+ }
+
+ template <typename InputIt> void insert(InputIt First, InputIt Last) {
+ for (auto It = First; It != Last; ++It)
+ insert(*It);
+ }
+
+ void remove(const VRegMaskPair &VMP) {
+ auto MapIt = SetStorage.find(VMP.VReg);
+ if (MapIt == SetStorage.end())
+ return;
+
+ size_t Erased = MapIt->second.erase(VMP.LaneMask);
+ if (!Erased)
+ return;
+
+ if (MapIt->second.empty())
+ SetStorage.erase(MapIt);
+
+ auto VecIt = std::find(LinearStorage.begin(), LinearStorage.end(), VMP);
+ if (VecIt != LinearStorage.end()) {
+ LinearStorage.erase(VecIt);
+ } else {
+ llvm_unreachable("Inconsistent LinearStorage: VMP missing on remove");
+ }
+ }
+
+ template <typename Predicate> void remove_if(Predicate Pred) {
+ for (auto It = LinearStorage.begin(); It != LinearStorage.end();) {
+ const VRegMaskPair VMP = *It;
+ if (Pred(VMP)) {
+ It = LinearStorage.erase(It);
+ SetStorage[VMP.VReg].erase(VMP.LaneMask);
+ if (SetStorage[VMP.VReg].empty())
+ SetStorage.erase(VMP.VReg);
+ } else {
+ ++It;
}
-
- template <typename ContainerT,
- typename = std::enable_if_t<std::is_same<
- typename ContainerT::value_type, VRegMaskPair>::value>>
- VRegMaskPairSet(ContainerT &&Vec) {
- for (auto &&VMP : Vec)
- insert(std::move(VMP));
+ }
+ }
+
+ bool count(const VRegMaskPair &VMP) const {
+ auto It = SetStorage.find(VMP.VReg);
+ if (It == SetStorage.end())
+ return false;
+
+ return It->second.count(VMP.LaneMask) > 0;
+ }
+
+ bool contains(const VRegMaskPair &VMP) const {
+ auto It = SetStorage.find(VMP.VReg);
+ return It != SetStorage.end() && It->second.contains(VMP.LaneMask);
+ }
+
+ void clear() {
+ SetStorage.clear();
+ LinearStorage.clear();
+ }
+
+ size_t size() const { return LinearStorage.size(); }
+ bool empty() const { return LinearStorage.empty(); }
+
+ void sort(llvm::function_ref<bool(const VRegMaskPair &, const VRegMaskPair &)>
+ Cmp) {
+ std::sort(LinearStorage.begin(), LinearStorage.end(), Cmp);
+ }
+
+ VRegMaskPair pop_back_val() {
+ assert(!LinearStorage.empty() && "Pop from empty set");
+ VRegMaskPair VMP = LinearStorage.back();
+ LinearStorage.pop_back();
+
+ auto It = SetStorage.find(VMP.VReg);
+ assert(It != SetStorage.end() && "Inconsistent SetStorage");
+ It->second.erase(VMP.LaneMask);
+ if (It->second.empty())
+ SetStorage.erase(It);
+
+ return VMP;
+ }
+
+ LaneCoverageResult getCoverage(const VRegMaskPair &VMP) const {
+ LaneCoverageResult Result(VMP.LaneMask);
+ auto It = SetStorage.find(VMP.VReg);
+ if (It != SetStorage.end()) {
+ MaskSet Masks = It->second;
+ for (auto Mask : Masks) {
+ Result.Covered |= (Mask & VMP.LaneMask);
}
-
- bool insert(const VRegMaskPair &VMP) {
- auto &MaskSet = SetStorage[VMP.VReg];
- auto Inserted = MaskSet.insert(VMP.LaneMask);
- if (!Inserted.second)
- return false;
- LinearStorage.push_back(VMP);
+ Result.NotCovered = (VMP.LaneMask & ~Result.Covered);
+ }
+ return Result;
+ }
+
+ bool operator==(const VRegMaskPairSet &Other) const {
+ if (SetStorage.size() != Other.SetStorage.size())
+ return false;
+
+ for (const auto &Entry : SetStorage) {
+ auto It = Other.SetStorage.find(Entry.first);
+ if (It == Other.SetStorage.end())
+ return false;
+
+ if (Entry.second != It->second)
+ return false;
+ }
+
+ return true;
+ }
+
+ template <typename ContainerT>
+ VRegMaskPairSet &operator=(const ContainerT &Vec) {
+ static_assert(
+ std::is_same<typename ContainerT::value_type, VRegMaskPair>::value,
+ "Container must hold VRegMaskPair elements");
+
+ clear();
+ for (const auto &VMP : Vec)
+ insert(VMP);
+ return *this;
+ }
+
+ // Set operations based on subregister coverage logic
+
+ /// Adds all elements from Other whose (VReg, LaneMask) overlap with none
+ /// in *this.
+ void set_union(const VRegMaskPairSet &Other) {
+ for (const auto &VMP : Other)
+ insert(VMP);
+ }
+
+ /// Keeps only those elements in *this that are at least partially covered
+ /// by Other.
+ void set_intersect(const VRegMaskPairSet &Other) {
+ std::vector<VRegMaskPair> ToInsert;
+ remove_if([&](const VRegMaskPair &VMP) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (Cov.isFullyUncovered())
return true;
- }
-
- template <typename InputIt> void insert(InputIt First, InputIt Last) {
- for (auto It = First; It != Last; ++It)
- insert(*It);
- }
- void remove(const VRegMaskPair &VMP) {
- auto MapIt = SetStorage.find(VMP.VReg);
- if (MapIt == SetStorage.end())
- return;
-
- size_t Erased = MapIt->second.erase(VMP.LaneMask);
- if (!Erased)
- return;
-
- if (MapIt->second.empty())
- SetStorage.erase(MapIt);
-
- auto VecIt = std::find(LinearStorage.begin(), LinearStorage.end(), VMP);
- if (VecIt != LinearStorage.end()) {
- LinearStorage.erase(VecIt);
- } else {
- llvm_unreachable("Inconsistent LinearStorage: VMP missing on remove");
- }
- }
-
- template <typename Predicate> void remove_if(Predicate Pred) {
- for (auto It = LinearStorage.begin(); It != LinearStorage.end();) {
- const VRegMaskPair VMP = *It;
- if (Pred(VMP)) {
- It = LinearStorage.erase(It);
- SetStorage[VMP.VReg].erase(VMP.LaneMask);
- if (SetStorage[VMP.VReg].empty())
- SetStorage.erase(VMP.VReg);
- } else {
- ++It;
- }
- }
+ if (!Cov.isFullyCovered()) {
+ ToInsert.push_back({VMP.VReg, Cov.getCovered()});
+ return true; // remove current, will reinsert trimmed version
}
- bool count(const VRegMaskPair &VMP) const {
- auto It = SetStorage.find(VMP.VReg);
- if (It == SetStorage.end())
- return false;
-
- return It->second.count(VMP.LaneMask) > 0;
- }
-
- bool contains(const VRegMaskPair &VMP) const {
- auto It = SetStorage.find(VMP.VReg);
- return It != SetStorage.end() && It->second.contains(VMP.LaneMask);
- }
+ return false; // keep as-is
+ });
- void clear() {
- SetStorage.clear();
- LinearStorage.clear();
- }
-
- size_t size() const { return LinearStorage.size(); }
- bool empty() const { return LinearStorage.empty(); }
-
- void
- sort(llvm::function_ref<bool(const VRegMaskPair &, const VRegMaskPair &)>
- Cmp) {
- std::sort(LinearStorage.begin(), LinearStorage.end(), Cmp);
- }
-
- VRegMaskPair pop_back_val() {
- assert(!LinearStorage.empty() && "Pop from empty set");
- VRegMaskPair VMP = LinearStorage.back();
- LinearStorage.pop_back();
-
- auto It = SetStorage.find(VMP.VReg);
- assert(It != SetStorage.end() && "Inconsistent SetStorage");
- It->second.erase(VMP.LaneMask);
- if (It->second.empty())
- SetStorage.erase(It);
-
- return VMP;
- }
-
- LaneCoverageResult getCoverage(const VRegMaskPair &VMP) const {
- LaneCoverageResult Result(VMP.LaneMask);
- auto It = SetStorage.find(VMP.VReg);
- if (It != SetStorage.end()) {
- MaskSet Masks = It->second;
- for (auto Mask : Masks) {
- Result.Covered |= (Mask & VMP.LaneMask);
- }
- Result.NotCovered = (VMP.LaneMask & ~Result.Covered);
- }
- return Result;
- }
-
- bool operator==(const VRegMaskPairSet &Other) const {
- if (SetStorage.size() != Other.SetStorage.size())
- return false;
-
- for (const auto &Entry : SetStorage) {
- auto It = Other.SetStorage.find(Entry.first);
- if (It == Other.SetStorage.end())
- return false;
-
- if (Entry.second != It->second)
- return false;
- }
+ insert(ToInsert.begin(), ToInsert.end());
+ }
+ /// Removes elements from *this that are at least partially covered by
+ /// Other.
+ void set_subtract(const VRegMaskPairSet &Other) {
+ std::vector<VRegMaskPair> ToInsert;
+ remove_if([&](const VRegMaskPair &VMP) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (Cov.isFullyCovered())
return true;
- }
-
- template <typename ContainerT>
- VRegMaskPairSet &operator=(const ContainerT &Vec) {
- static_assert(
- std::is_same<typename ContainerT::value_type, VRegMaskPair>::value,
- "Container must hold VRegMaskPair elements");
- clear();
- for (const auto &VMP : Vec)
- insert(VMP);
- return *this;
+ if (!Cov.isFullyUncovered()) {
+ ToInsert.push_back({VMP.VReg, Cov.getNotCovered()});
+ return true; // remove and reinsert uncovered part
}
- // Set operations based on subregister coverage logic
-
- /// Adds all elements from Other whose (VReg, LaneMask) overlap with none
- /// in *this.
- void set_union(const VRegMaskPairSet &Other) {
- for (const auto &VMP : Other)
- insert(VMP);
+ return false;
+ });
+
+ insert(ToInsert.begin(), ToInsert.end());
+ }
+
+ /// Returns the union (join) of this set and Other under coverage logic.
+ VRegMaskPairSet set_join(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result = *this;
+ Result.set_union(Other);
+ return Result;
+ }
+
+ /// Returns the intersection of this set and Other based on partial
+ /// overlap.
+ VRegMaskPairSet set_intersection(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result;
+ for (const auto &VMP : *this) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (!Cov.isFullyUncovered()) {
+ Result.insert({VMP.VReg, Cov.getCovered()});
}
-
- /// Keeps only those elements in *this that are at least partially covered
- /// by Other.
- void set_intersect(const VRegMaskPairSet &Other) {
- std::vector<VRegMaskPair> ToInsert;
- remove_if([&](const VRegMaskPair &VMP) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (Cov.isFullyUncovered())
- return true;
-
- if (!Cov.isFullyCovered()) {
- ToInsert.push_back({VMP.VReg, Cov.getCovered()});
- return true; // remove current, will reinsert trimmed version
- }
-
- return false; // keep as-is
- });
-
- insert(ToInsert.begin(), ToInsert.end());
+ }
+ return Result;
+ }
+
+ /// Returns all elements of *this that do not overlap with anything in
+ /// Other.
+ VRegMaskPairSet set_difference(const VRegMaskPairSet &Other) const {
+ VRegMaskPairSet Result;
+ for (const auto &VMP : *this) {
+ LaneCoverageResult Cov = Other.getCoverage(VMP);
+ if (!Cov.isFullyCovered()) {
+ Result.insert({VMP.VReg, Cov.getNotCovered()});
}
-
- /// Removes elements from *this that are at least partially covered by
- /// Other.
- void set_subtract(const VRegMaskPairSet &Other) {
- std::vector<VRegMaskPair> ToInsert;
- remove_if([&](const VRegMaskPair &VMP) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (Cov.isFullyCovered())
- return true;
-
- if (!Cov.isFullyUncovered()) {
- ToInsert.push_back({VMP.VReg, Cov.getNotCovered()});
- return true; // remove and reinsert uncovered part
- }
-
- return false;
- });
-
- insert(ToInsert.begin(), ToInsert.end());
+ }
+ return Result;
+ }
+
+ // Debug
+ void dump() const {
+ dbgs() << "=== VRegMaskPairSet Dump ===\n";
+
+ dbgs() << "SetStorage:\n";
+ for (const auto &Entry : SetStorage) {
+ dbgs() << " VReg: " << printReg(Entry.first) << " => { ";
+ for (const auto &Mask : Entry.second) {
+ dbgs() << PrintLaneMask(Mask) << " ";
}
-
- /// Returns the union (join) of this set and Other under coverage logic.
- VRegMaskPairSet set_join(const VRegMaskPairSet &Other) const {
- VRegMaskPairSet Result = *this;
- Result.set_union(Other);
- return Result;
- }
-
- /// Returns the intersection of this set and Other based on partial
- /// overlap.
- VRegMaskPairSet set_intersection(const VRegMaskPairSet &Other) const {
- VRegMaskPairSet Result;
- for (const auto &VMP : *this) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (!Cov.isFullyUncovered()) {
- Result.insert({VMP.VReg, Cov.getCovered()});
- }
- }
- return Result;
- }
-
- /// Returns all elements of *this that do not overlap with anything in
- /// Other.
- VRegMaskPairSet set_difference(const VRegMaskPairSet &Other) const {
- VRegMaskPairSet Result;
- for (const auto &VMP : *this) {
- LaneCoverageResult Cov = Other.getCoverage(VMP);
- if (!Cov.isFullyCovered()) {
- Result.insert({VMP.VReg, Cov.getNotCovered()});
- }
- }
- return Result;
- }
-
- // Debug
- void dump() const {
- dbgs() << "=== VRegMaskPairSet Dump ===\n";
-
- dbgs() << "SetStorage:\n";
- for (const auto &Entry : SetStorage) {
- dbgs() << " VReg: " << printReg(Entry.first) << " => { ";
- for (const auto &Mask : Entry.second) {
- dbgs() << PrintLaneMask(Mask) << " ";
- }
- dbgs() << "}\n";
- }
-
- dbgs() << "LinearStorage (insertion order):\n";
- for (const auto &VMP : LinearStorage) {
- dbgs() << " (" << printReg(VMP.getVReg()) << ", "
- << PrintLaneMask(VMP.getLaneMask()) << ")\n";
- }
-
- dbgs() << "=============================\n";
- }
-
- // Iterators
- using iterator = LinearStorageT::const_iterator;
- iterator begin() const { return LinearStorage.begin(); }
- iterator end() const { return LinearStorage.end(); }
- };
-
- namespace llvm {
- template <> struct DenseMapInfo<VRegMaskPair> {
- static inline VRegMaskPair getEmptyKey() {
- return {Register(DenseMapInfo<unsigned>::getEmptyKey()),
- LaneBitmask(0xFFFFFFFFFFFFFFFFULL)};
- }
-
- static inline VRegMaskPair getTombstoneKey() {
- return {Register(DenseMapInfo<unsigned>::getTombstoneKey()),
- LaneBitmask(0xFFFFFFFFFFFFFFFEULL)};
- }
-
- static unsigned getHashValue(const VRegMaskPair &P) {
- return DenseMapInfo<unsigned>::getHashValue(P.getVReg().id()) ^
- DenseMapInfo<uint64_t>::getHashValue(
- P.getLaneMask().getAsInteger());
- }
-
- static bool isEqual(const VRegMaskPair &LHS, const VRegMaskPair &RHS) {
- return DenseMapInfo<unsigned>::isEqual(LHS.getVReg().id(),
- RHS.getVReg().id()) &&
- DenseMapInfo<uint64_t>::isEqual(
- LHS.getLaneMask().getAsInteger(),
- RHS.getLaneMask().getAsInteger());
- }
- };
-
- } // namespace llvm
+ dbgs() << "}\n";
+ }
+
+ dbgs() << "LinearStorage (insertion order):\n";
+ for (const auto &VMP : LinearStorage) {
+ dbgs() << " (" << printReg(VMP.getVReg()) << ", "
+ << PrintLaneMask(VMP.getLaneMask()) << ")\n";
+ }
+
+ dbgs() << "=============================\n";
+ }
+
+ // Iterators
+ using iterator = LinearStorageT::const_iterator;
+ iterator begin() const { return LinearStorage.begin(); }
+ iterator end() const { return LinearStorage.end(); }
+};
+
+namespace llvm {
+template <> struct DenseMapInfo<VRegMaskPair> {
+ static inline VRegMaskPair getEmptyKey() {
+ return {Register(DenseMapInfo<unsigned>::getEmptyKey()),
+ LaneBitmask(0xFFFFFFFFFFFFFFFFULL)};
+ }
+
+ static inline VRegMaskPair getTombstoneKey() {
+ return {Register(DenseMapInfo<unsigned>::getTombstoneKey()),
+ LaneBitmask(0xFFFFFFFFFFFFFFFEULL)};
+ }
+
+ static unsigned getHashValue(const VRegMaskPair &P) {
+ return DenseMapInfo<unsigned>::getHashValue(P.getVReg().id()) ^
+ DenseMapInfo<uint64_t>::getHashValue(P.getLaneMask().getAsInteger());
+ }
+
+ static bool isEqual(const VRegMaskPair &LHS, const VRegMaskPair &RHS) {
+ return DenseMapInfo<unsigned>::isEqual(LHS.getVReg().id(),
+ RHS.getVReg().id()) &&
+ DenseMapInfo<uint64_t>::isEqual(LHS.getLaneMask().getAsInteger(),
+ RHS.getLaneMask().getAsInteger());
+ }
+};
+
+} // namespace llvm
#endif // LLVM_LIB_TARGET_VREGMASKPAIR_H
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2a73f4977..d6c4003df 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4804,10 +4804,12 @@ void X86InstrInfo::storeRegToStackSlot(
.setMIFlag(Flags);
}
-void X86InstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
- int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg) const {
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ Register DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ Register VReg) const {
const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e09054379..cf8c040c1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -478,7 +478,8 @@ public:
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register DestReg,
int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg) const override;
+ const TargetRegisterInfo *TRI,
+ Register VReg) const override;
void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned Opc, Register Reg, int FrameIdx,
``````````
</details>
https://github.com/llvm/llvm-project/pull/156049
More information about the llvm-commits
mailing list