[llvm] [StackColoring] Change the StackColoring logic + enables it to handle spills (PR #143800)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 22 05:16:31 PDT 2025
https://github.com/Ralender updated https://github.com/llvm/llvm-project/pull/143800
>From f9c024d7c9b217db49518831cd75c5ead260f742 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Tue, 20 May 2025 22:28:40 +0200
Subject: [PATCH 01/19] [NFC][StackColoring] Remove unused member for
StackColoring
---
llvm/lib/CodeGen/StackColoring.cpp | 5 -----
1 file changed, 5 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 0f93822d9792b..8946c7cd44058 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -402,9 +402,6 @@ class StackColoring {
using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
LivenessMap BlockLiveness;
- /// Maps serial numbers to basic blocks.
- DenseMap<const MachineBasicBlock *, int> BasicBlocks;
-
/// Maps basic blocks to a serial number.
SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
@@ -727,7 +724,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// deterministic numbering.
for (MachineBasicBlock *MBB : depth_first(MF)) {
// Assign a serial number to this basic block.
- BasicBlocks[MBB] = BasicBlockNumbering.size();
BasicBlockNumbering.push_back(MBB);
// Keep a reference to avoid repeated lookups.
@@ -1211,7 +1207,6 @@ bool StackColoring::run(MachineFunction &Func) {
MF = &Func;
MFI = &MF->getFrameInfo();
BlockLiveness.clear();
- BasicBlocks.clear();
BasicBlockNumbering.clear();
Markers.clear();
Intervals.clear();
>From 48f34ec2d6fc5f9fe21c6828d4dbc764958779d6 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Tue, 20 May 2025 23:23:46 +0200
Subject: [PATCH 02/19] [NFC][StackColoring] Use LiveRange instead of
LiveInterval in StackColoring
---
llvm/lib/CodeGen/StackColoring.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 8946c7cd44058..609ee3bbc369c 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -407,7 +407,7 @@ class StackColoring {
/// Maps slots to their use interval. Outside of this interval, slots
/// values are either dead or `undef` and they will not be written to.
- SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+ SmallVector<std::unique_ptr<LiveRange>, 16> Intervals;
/// Maps slots to the points where they can become in-use.
SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
@@ -1035,7 +1035,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// validating the instructions.
if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
SlotIndex Index = Indexes->getInstructionIndex(I);
- const LiveInterval *Interval = &*Intervals[FromSlot];
+ const LiveRange *Interval = &*Intervals[FromSlot];
assert(Interval->find(Index) != Interval->end() &&
"Found instruction usage outside of live range.");
}
@@ -1155,7 +1155,7 @@ void StackColoring::removeInvalidSlotRanges() {
// Check that the used slot is inside the calculated lifetime range.
// If it is not, warn about it and invalidate the range.
- LiveInterval *Interval = &*Intervals[Slot];
+ LiveRange *Interval = &*Intervals[Slot];
SlotIndex Index = Indexes->getInstructionIndex(I);
if (Interval->find(Index) == Interval->end()) {
Interval->clear();
@@ -1247,7 +1247,7 @@ bool StackColoring::run(MachineFunction &Func) {
}
for (unsigned i=0; i < NumSlots; ++i) {
- std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
+ std::unique_ptr<LiveRange> LI(new LiveRange());
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
Intervals.push_back(std::move(LI));
SortedSlots.push_back(i);
@@ -1317,8 +1317,8 @@ bool StackColoring::run(MachineFunction &Func) {
if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
continue;
- LiveInterval *First = &*Intervals[FirstSlot];
- LiveInterval *Second = &*Intervals[SecondSlot];
+ LiveRange *First = &*Intervals[FirstSlot];
+ LiveRange *Second = &*Intervals[SecondSlot];
auto &FirstS = LiveStarts[FirstSlot];
auto &SecondS = LiveStarts[SecondSlot];
assert(!First->empty() && !Second->empty() && "Found an empty range");
>From 833849843a2d0cb25222e652d2bcf6a43f9e3f7d Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Tue, 10 Jun 2025 22:08:43 +0200
Subject: [PATCH 03/19] [NFC] make constructor explicit LiveRange
Without it it can lead to crazy situtation, when passing a LiveRange* to a function expecting a const LiveRange&
the LiveRange* is converted to bool and a new empty LiveRange is created.
---
llvm/include/llvm/CodeGen/LiveInterval.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index e1c5717f5face..e9ca9e36c95b9 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -237,9 +237,9 @@ namespace llvm {
}
/// Constructs a new LiveRange object.
- LiveRange(bool UseSegmentSet = false)
- : segmentSet(UseSegmentSet ? std::make_unique<SegmentSet>()
- : nullptr) {}
+ explicit LiveRange(bool UseSegmentSet = false)
+ : segmentSet(UseSegmentSet ? std::make_unique<SegmentSet>() : nullptr) {
+ }
/// Constructs a new LiveRange object by copying segments and valnos from
/// another LiveRange.
>From 5aeb0f01a28b0fbdde07c26dd0a3e221e3652405 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 29 May 2025 15:06:25 +0200
Subject: [PATCH 04/19] [NFC] MachineFrameInfo::print add a bit more
informations
---
llvm/lib/CodeGen/MachineFrameInfo.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index e4b993850f73d..14dc871d89c13 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -221,6 +222,12 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (SO.StackID != 0)
OS << "id=" << static_cast<unsigned>(SO.StackID) << ' ';
+ if (SO.Alloca && !SO.Alloca->getName().empty())
+ OS << "alloca=" << SO.Alloca->getName() << ' ';
+
+ if (SO.isSpillSlot)
+ OS << "spill ";
+
if (SO.Size == ~0ULL) {
OS << "dead\n";
continue;
>From 0ee9f612f02ffb6c7d35301694b9d71b6972f965 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 29 May 2025 17:42:37 +0200
Subject: [PATCH 05/19] [NFC][StackSlotColoring] Remove dead code
---
llvm/lib/CodeGen/StackSlotColoring.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 2f81bea4e86ba..aaff2d6238c1e 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -369,7 +369,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
SmallVector<int, 16> SlotMapping(NumObjs, -1);
SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
- BitVector UsedColors(NumObjs);
LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n");
bool Changed = false;
@@ -380,7 +379,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
SlotMapping[SS] = NewSS;
RevMap[NewSS].push_back(SS);
SlotWeights[NewSS] += li->weight();
- UsedColors.set(NewSS);
Changed |= (SS != NewSS);
}
>From a352f1ecacbea556205af5220cc72624fb452047 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 29 May 2025 23:01:22 +0200
Subject: [PATCH 06/19] [NFC][StackColoring] Use block numbers instead of maps
---
llvm/lib/CodeGen/StackColoring.cpp | 43 +++++++++++++++---------------
1 file changed, 21 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 609ee3bbc369c..1bb1861a06f44 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -396,11 +396,11 @@ class StackColoring {
/// Which slots are marked as LIVE_OUT, coming out of each basic block.
BitVector LiveOut;
+
+ bool isEmpty() { return Begin.empty(); }
};
- /// Maps active slots (per bit) for each basic block.
- using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
- LivenessMap BlockLiveness;
+ SmallVector<BlockLifetimeInfo, 0> BlockLiveness;
/// Maps basic blocks to a serial number.
SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
@@ -438,9 +438,6 @@ class StackColoring {
bool run(MachineFunction &Func);
private:
- /// Used in collectMarkers
- using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
-
/// Debug.
void dump() const;
void dumpIntervals() const;
@@ -538,9 +535,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
}
LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
- LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
- assert(BI != BlockLiveness.end() && "Block not found");
- const BlockLifetimeInfo &BlockInfo = BI->second;
+ const BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB->getNumber()];
dumpBV("BEGIN", BlockInfo.Begin);
dumpBV("END", BlockInfo.End);
@@ -624,7 +619,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
unsigned StackColoring::collectMarkers(unsigned NumSlot) {
unsigned MarkersFound = 0;
- BlockBitVecMap SeenStartMap;
+ SmallVector<BitVector> SeenStartMap;
InterestingSlots.clear();
InterestingSlots.resize(NumSlot);
ConservativeSlots.clear();
@@ -634,6 +629,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
+ SeenStartMap.resize(MF->getNumBlockIDs());
+
// Step 1: collect markers and populate the "InterestingSlots"
// and "ConservativeSlots" sets.
for (MachineBasicBlock *MBB : depth_first(MF)) {
@@ -642,10 +639,11 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// to this bb).
BitVector BetweenStartEnd;
BetweenStartEnd.resize(NumSlot);
+ SeenStartMap[MBB->getNumber()].resize(NumSlot);
for (const MachineBasicBlock *Pred : MBB->predecessors()) {
- BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
- if (I != SeenStartMap.end()) {
- BetweenStartEnd |= I->second;
+ BitVector &PredSet = SeenStartMap[Pred->getNumber()];
+ if (!PredSet.empty()) {
+ BetweenStartEnd |= PredSet;
}
}
@@ -691,7 +689,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
}
}
}
- BitVector &SeenStart = SeenStartMap[MBB];
+ BitVector &SeenStart = SeenStartMap[MBB->getNumber()];
SeenStart |= BetweenStartEnd;
}
if (!MarkersFound) {
@@ -718,6 +716,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
+ BlockLiveness.resize(MF->getNumBlockIDs());
// Step 2: compute begin/end sets for each block
// NOTE: We use a depth-first iteration to ensure that we obtain a
@@ -727,7 +726,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
BasicBlockNumbering.push_back(MBB);
// Keep a reference to avoid repeated lookups.
- BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB->getNumber()];
BlockInfo.Begin.resize(NumSlot);
BlockInfo.End.resize(NumSlot);
@@ -784,19 +783,19 @@ void StackColoring::calculateLocalLiveness() {
for (const MachineBasicBlock *BB : BasicBlockNumbering) {
// Use an iterator to avoid repeated lookups.
- LivenessMap::iterator BI = BlockLiveness.find(BB);
- assert(BI != BlockLiveness.end() && "Block not found");
- BlockLifetimeInfo &BlockInfo = BI->second;
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB->getNumber()];
+ if (BlockInfo.isEmpty())
+ continue;
// Compute LiveIn by unioning together the LiveOut sets of all preds.
LocalLiveIn.clear();
for (MachineBasicBlock *Pred : BB->predecessors()) {
- LivenessMap::const_iterator I = BlockLiveness.find(Pred);
+ BlockLifetimeInfo &PrefInfo = BlockLiveness[Pred->getNumber()];
// PR37130: transformations prior to stack coloring can
// sometimes leave behind statically unreachable blocks; these
// can be safely skipped here.
- if (I != BlockLiveness.end())
- LocalLiveIn |= I->second.LiveOut;
+ if (!PrefInfo.isEmpty())
+ LocalLiveIn |= PrefInfo.LiveOut;
}
// Compute LiveOut by subtracting out lifetimes that end in this
@@ -840,7 +839,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
DefinitelyInUse.resize(NumSlots);
// Start the interval of the slots that we previously found to be 'in-use'.
- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()];
for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
pos = MBBLiveness.LiveIn.find_next(pos)) {
Starts[pos] = Indexes->getMBBStartIdx(&MBB);
>From 5a1a3464cd4b6a25820691a90d27de081cf68258 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 19 Jun 2025 23:01:33 +0200
Subject: [PATCH 07/19] [NFC] Move NumDigits to MathExtras.h and update some
users of log10 to use NumDigits
---
.../llvm/DebugInfo/PDB/Native/FormatUtil.h | 43 -------------------
llvm/include/llvm/Support/MathExtras.h | 43 +++++++++++++++++++
llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 2 +-
llvm/lib/Support/Signals.cpp | 2 +-
.../llvm-remarkutil/RemarkInstructionMix.cpp | 2 +-
llvm/utils/FileCheck/FileCheck.cpp | 2 +-
6 files changed, 47 insertions(+), 47 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h
index 76a019ddf8f34..a76b5c0d44791 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h
@@ -62,49 +62,6 @@ LLVM_ABI std::string formatChunkKind(codeview::DebugSubsectionKind Kind,
LLVM_ABI std::string formatSymbolKind(codeview::SymbolKind K);
LLVM_ABI std::string formatTypeLeafKind(codeview::TypeLeafKind K);
-/// Returns the number of digits in the given integer.
-inline int NumDigits(uint64_t N) {
- if (N < 10ULL)
- return 1;
- if (N < 100ULL)
- return 2;
- if (N < 1000ULL)
- return 3;
- if (N < 10000ULL)
- return 4;
- if (N < 100000ULL)
- return 5;
- if (N < 1000000ULL)
- return 6;
- if (N < 10000000ULL)
- return 7;
- if (N < 100000000ULL)
- return 8;
- if (N < 1000000000ULL)
- return 9;
- if (N < 10000000000ULL)
- return 10;
- if (N < 100000000000ULL)
- return 11;
- if (N < 1000000000000ULL)
- return 12;
- if (N < 10000000000000ULL)
- return 13;
- if (N < 100000000000000ULL)
- return 14;
- if (N < 1000000000000000ULL)
- return 15;
- if (N < 10000000000000000ULL)
- return 16;
- if (N < 100000000000000000ULL)
- return 17;
- if (N < 1000000000000000000ULL)
- return 18;
- if (N < 10000000000000000000ULL)
- return 19;
- return 20;
-}
-
namespace detail {
template <typename T>
struct EndianAdapter final
diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index ae3150e5602ee..a374f93d8538e 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -795,6 +795,49 @@ using stack_float_t = volatile float;
using stack_float_t = float;
#endif
+/// Returns the number of digits in the given integer.
+inline int NumDigits(uint64_t N) {
+ if (N < 10ULL)
+ return 1;
+ if (N < 100ULL)
+ return 2;
+ if (N < 1000ULL)
+ return 3;
+ if (N < 10000ULL)
+ return 4;
+ if (N < 100000ULL)
+ return 5;
+ if (N < 1000000ULL)
+ return 6;
+ if (N < 10000000ULL)
+ return 7;
+ if (N < 100000000ULL)
+ return 8;
+ if (N < 1000000000ULL)
+ return 9;
+ if (N < 10000000000ULL)
+ return 10;
+ if (N < 100000000000ULL)
+ return 11;
+ if (N < 1000000000000ULL)
+ return 12;
+ if (N < 10000000000000ULL)
+ return 13;
+ if (N < 100000000000000ULL)
+ return 14;
+ if (N < 1000000000000000ULL)
+ return 15;
+ if (N < 10000000000000000ULL)
+ return 16;
+ if (N < 100000000000000000ULL)
+ return 17;
+ if (N < 1000000000000000000ULL)
+ return 18;
+ if (N < 10000000000000000000ULL)
+ return 19;
+ return 20;
+}
+
} // namespace llvm
#endif
diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index 989fde9749b18..1c6876ce4e87c 100644
--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -84,7 +84,7 @@ class SourceCode {
void format(raw_ostream &OS) {
if (!PrunedSource)
return;
- size_t MaxLineNumberWidth = std::ceil(std::log10(LastLine));
+ size_t MaxLineNumberWidth = NumDigits(LastLine);
int64_t L = FirstLine;
for (size_t Pos = 0; Pos < PrunedSource->size(); ++L) {
size_t PosEnd = PrunedSource->find('\n', Pos);
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 9f9030e79d104..b8449683363b0 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -221,7 +221,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
for (int i = 0; i < Depth; i++) {
auto PrintLineHeader = [&]() {
OS << right_justify(formatv("#{0}", frame_no++).str(),
- std::log10(Depth) + 2)
+ NumDigits(Depth) + 1)
<< ' ' << format_ptr(StackTrace[i]) << ' ';
};
if (!Modules[i]) {
diff --git a/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp b/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp
index 7c8ac474c0fdb..9b0a518f7e49a 100644
--- a/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp
+++ b/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp
@@ -111,7 +111,7 @@ static Error tryInstructionMix() {
Mix.begin(), Mix.end(), 1, [](unsigned MaxValue, const MixEntry &Elt) {
return std::max(MaxValue, Elt.second);
});
- unsigned ValueWidth = std::log10(MaxValue) + 1;
+ unsigned ValueWidth = NumDigits(MaxValue);
FOS << "Instruction";
FOS.PadToColumn(MaxMnemonic + 1) << "Count\n";
FOS << "-----------";
diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp
index 9cf3a3164dfec..96e6e418b96f4 100644
--- a/llvm/utils/FileCheck/FileCheck.cpp
+++ b/llvm/utils/FileCheck/FileCheck.cpp
@@ -595,7 +595,7 @@ static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req,
unsigned LineCount = InputFileText.count('\n');
if (InputFileEnd[-1] != '\n')
++LineCount;
- unsigned LineNoWidth = std::log10(LineCount) + 1;
+ unsigned LineNoWidth = NumDigits(LineCount);
// +3 below adds spaces (1) to the left of the (right-aligned) line numbers
// on input lines and (2) to the right of the (left-aligned) labels on
// annotation lines so that input lines and annotation lines are more
>From 353a1e58ef7085b19c577c2469c10c677e7bbe30 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 19 Jun 2025 23:05:40 +0200
Subject: [PATCH 08/19] [NFC] Improve debug output of StackColoring
---
llvm/lib/CodeGen/StackColoring.cpp | 50 ++++++++++++++++++++++++------
1 file changed, 41 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 1bb1861a06f44..64a5e294d2a0c 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -442,7 +442,6 @@ class StackColoring {
void dump() const;
void dumpIntervals() const;
void dumpBB(MachineBasicBlock *MBB) const;
- void dumpBV(const char *tag, const BitVector &BV) const;
/// Removes all of the lifetime marker instructions from the function.
/// \returns true if any markers were removed.
@@ -526,12 +525,39 @@ void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
- const BitVector &BV) const {
- dbgs() << tag << " : { ";
- for (unsigned I = 0, E = BV.size(); I != E; ++I)
- dbgs() << BV.test(I) << " ";
- dbgs() << "}\n";
+
+LLVM_DUMP_METHOD void dumpBV(StringRef tag, const BitVector &BV) {
+ constexpr unsigned ColumnWidth = 150;
+ unsigned LineStartOffset = tag.size() + /*" : "*/ 3;
+ unsigned WidthAfterTag = ColumnWidth - LineStartOffset;
+ unsigned NumBitsPerColumn = WidthAfterTag / 2;
+ unsigned BitsCount = BV.size();
+ for (unsigned Bits = 0; Bits < BitsCount; Bits += NumBitsPerColumn) {
+ unsigned Start = Bits;
+ unsigned End = std::min(Start + NumBitsPerColumn, BitsCount);
+
+ dbgs() << tag << " : ";
+
+ for (unsigned I = Start; I < End; ++I)
+ dbgs() << BV.test(I) << " ";
+ dbgs() << '\n';
+ dbgs() << tag << " : ";
+ unsigned next = Start;
+ for (unsigned I = Start; I < End; ++I) {
+ if (I < next)
+ continue;
+ if (BV.test(I)) {
+ int numDigits = NumDigits(I);
+ // Make sure number have spacing while staying aligned to the line above
+ next = I + 1 + numDigits / 2;
+ dbgs() << I << ' ';
+ if (numDigits % 2 == 0)
+ dbgs() << ' ';
+ } else
+ dbgs() << " ";
+ }
+ dbgs() << '\n';
+ }
}
LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
@@ -553,8 +579,14 @@ LLVM_DUMP_METHOD void StackColoring::dump() const {
LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
- dbgs() << "Interval[" << I << "]:\n";
- Intervals[I]->dump();
+ dbgs() << "Interval[" << I << "]:";
+ if (MFI->getObjectAllocation(I))
+ dbgs() << *MFI->getObjectAllocation(I);
+ dbgs() << '\n' << *Intervals[I] << '\n';
+ dbgs() << "LiveStarts:";
+ for (SlotIndex SIdx : LiveStarts[I])
+ dbgs() << ' ' << SIdx;
+ dbgs() << '\n';
}
}
#endif
>From 3c5f135c17c5189354749ecac86dba592360e91c Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 29 May 2025 16:18:52 +0200
Subject: [PATCH 09/19] [NFC][LiveStacks] Use vectors instead of map and
unordred_map
---
llvm/include/llvm/CodeGen/LiveStacks.h | 44 ++++++++-----------
llvm/lib/CodeGen/LiveStacks.cpp | 41 +++++++++--------
llvm/lib/CodeGen/StackSlotColoring.cpp | 20 +++------
.../AMDGPU/AMDGPUMarkLastScratchLoad.cpp | 6 +--
4 files changed, 49 insertions(+), 62 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/LiveStacks.h b/llvm/include/llvm/CodeGen/LiveStacks.h
index 02c640bfc4a93..3b4550901dc7e 100644
--- a/llvm/include/llvm/CodeGen/LiveStacks.h
+++ b/llvm/include/llvm/CodeGen/LiveStacks.h
@@ -40,49 +40,43 @@ class LiveStacks {
///
VNInfo::Allocator VNInfoAllocator;
- /// S2IMap - Stack slot indices to live interval mapping.
- using SS2IntervalMap = std::unordered_map<int, LiveInterval>;
- SS2IntervalMap S2IMap;
-
- /// S2RCMap - Stack slot indices to register class mapping.
- std::map<int, const TargetRegisterClass *> S2RCMap;
+ int StartIdx = -1;
+ SmallVector<LiveInterval *> S2LI;
+ SmallVector<const TargetRegisterClass *> S2RC;
public:
- using iterator = SS2IntervalMap::iterator;
- using const_iterator = SS2IntervalMap::const_iterator;
+ using iterator = SmallVector<LiveInterval *>::iterator;
+ using const_iterator = SmallVector<LiveInterval *>::const_iterator;
- const_iterator begin() const { return S2IMap.begin(); }
- const_iterator end() const { return S2IMap.end(); }
- iterator begin() { return S2IMap.begin(); }
- iterator end() { return S2IMap.end(); }
+ const_iterator begin() const { return S2LI.begin(); }
+ const_iterator end() const { return S2LI.end(); }
+ iterator begin() { return S2LI.begin(); }
+ iterator end() { return S2LI.end(); }
- unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
+ unsigned getStartIdx() const { return StartIdx; }
+ unsigned getNumIntervals() const { return (unsigned)S2LI.size(); }
LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
LiveInterval &getInterval(int Slot) {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
- SS2IntervalMap::iterator I = S2IMap.find(Slot);
- assert(I != S2IMap.end() && "Interval does not exist for stack slot");
- return I->second;
+ return *S2LI[Slot - StartIdx];
}
const LiveInterval &getInterval(int Slot) const {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
- SS2IntervalMap::const_iterator I = S2IMap.find(Slot);
- assert(I != S2IMap.end() && "Interval does not exist for stack slot");
- return I->second;
+ return *S2LI[Slot - StartIdx];
}
- bool hasInterval(int Slot) const { return S2IMap.count(Slot); }
+ bool hasInterval(int Slot) const {
+ if (Slot < StartIdx || StartIdx == -1)
+ return false;
+ return !getInterval(Slot).empty();
+ }
const TargetRegisterClass *getIntervalRegClass(int Slot) const {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
- std::map<int, const TargetRegisterClass *>::const_iterator I =
- S2RCMap.find(Slot);
- assert(I != S2RCMap.end() &&
- "Register class info does not exist for stack slot");
- return I->second;
+ return S2RC[Slot - StartIdx];
}
VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; }
diff --git a/llvm/lib/CodeGen/LiveStacks.cpp b/llvm/lib/CodeGen/LiveStacks.cpp
index c07d985a09d1f..ea158b2d96a4e 100644
--- a/llvm/lib/CodeGen/LiveStacks.cpp
+++ b/llvm/lib/CodeGen/LiveStacks.cpp
@@ -37,10 +37,12 @@ void LiveStacksWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
}
void LiveStacks::releaseMemory() {
+ for (int Idx = 0; Idx < (int)S2LI.size(); ++Idx)
+ S2LI[Idx]->~LiveInterval();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
- S2IMap.clear();
- S2RCMap.clear();
+ S2LI.clear();
+ S2RC.clear();
}
void LiveStacks::init(MachineFunction &MF) {
@@ -52,20 +54,22 @@ void LiveStacks::init(MachineFunction &MF) {
LiveInterval &
LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
- SS2IntervalMap::iterator I = S2IMap.find(Slot);
- if (I == S2IMap.end()) {
- I = S2IMap
- .emplace(
- std::piecewise_construct, std::forward_as_tuple(Slot),
- std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F))
- .first;
- S2RCMap.insert(std::make_pair(Slot, RC));
+ if (StartIdx == -1)
+ StartIdx = Slot;
+
+ int Idx = Slot - StartIdx;
+ assert(Idx >= 0 && "Slot not in order ?");
+ if (Idx < (int)S2LI.size()) {
+ S2RC[Idx] = TRI->getCommonSubClass(S2RC[Idx], RC);
} else {
- // Use the largest common subclass register class.
- const TargetRegisterClass *&OldRC = S2RCMap[Slot];
- OldRC = TRI->getCommonSubClass(OldRC, RC);
+ S2RC.resize(Idx + 1);
+ S2LI.resize(Idx + 1);
+ S2LI[Idx] = this->VNInfoAllocator.Allocate<LiveInterval>();
+ new (S2LI[Idx]) LiveInterval(Register::index2StackSlot(Slot), 0.0F);
+ S2RC[Idx] = RC;
}
- return I->second;
+ assert(S2RC.size() == S2LI.size());
+ return *S2LI[Idx];
}
AnalysisKey LiveStacksAnalysis::Key;
@@ -96,13 +100,12 @@ void LiveStacksWrapperLegacy::print(raw_ostream &OS, const Module *) const {
}
/// print - Implement the dump method.
-void LiveStacks::print(raw_ostream &OS, const Module*) const {
+void LiveStacks::print(raw_ostream &OS, const Module *) const {
OS << "********** INTERVALS **********\n";
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- I->second.print(OS);
- int Slot = I->first;
- const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ for (int Idx = 0; Idx < (int)S2LI.size(); ++Idx) {
+ S2LI[Idx]->print(OS);
+ const TargetRegisterClass *RC = S2RC[Idx];
if (RC)
OS << " [" << TRI->getRegClassName(RC) << "]\n";
else
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index aaff2d6238c1e..95597be5f1ebe 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -262,24 +262,14 @@ void StackSlotColoring::InitializeSlots() {
UsedColors[0].resize(LastFI);
Assignments.resize(LastFI);
- using Pair = std::iterator_traits<LiveStacks::iterator>::value_type;
-
- SmallVector<Pair *, 16> Intervals;
-
- Intervals.reserve(LS->getNumIntervals());
- for (auto &I : *LS)
- Intervals.push_back(&I);
- llvm::sort(Intervals,
- [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
-
// Gather all spill slots into a list.
LLVM_DEBUG(dbgs() << "Spill slot intervals:\n");
- for (auto *I : Intervals) {
- LiveInterval &li = I->second;
- LLVM_DEBUG(li.dump());
- int FI = li.reg().stackSlotIndex();
- if (MFI->isDeadObjectIndex(FI))
+ for (auto [Idx, I] : llvm::enumerate(*LS)) {
+ int FI = Idx + LS->getStartIdx();
+ if (!I || MFI->isDeadObjectIndex(FI))
continue;
+ LiveInterval &li = *I;
+ LLVM_DEBUG(li.dump());
SSIntervals.push_back(&li);
OrigAlignments[FI] = MFI->getObjectAlign(FI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
index 9b6bb56c85d24..2dcf695e9c583 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -102,15 +102,15 @@ bool AMDGPUMarkLastScratchLoad::run(MachineFunction &MF) {
bool Changed = false;
- for (auto &[SS, LI] : *LS) {
- for (const LiveRange::Segment &Segment : LI.segments) {
+ for (auto *LI : *LS) {
+ for (const LiveRange::Segment &Segment : LI->segments) {
// Ignore segments that run to the end of basic block because in this case
// slot is still live at the end of it.
if (Segment.end.isBlock())
continue;
- const int FrameIndex = LI.reg().stackSlotIndex();
+ const int FrameIndex = LI->reg().stackSlotIndex();
MachineInstr *LastLoad = nullptr;
MachineInstr *MISegmentEnd = SI->getInstructionFromIndex(Segment.end);
>From 0ff8b6c91202310c465a39a8dc8748db16c39f6a Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 22 May 2025 20:11:39 +0200
Subject: [PATCH 10/19] [NFC][CodeGen] Cleanup lifetime in StackColoring
instead of DeadMachineInstructionElim
---
llvm/lib/CodeGen/MachineInstr.cpp | 4 ---
llvm/lib/CodeGen/StackColoring.cpp | 21 +++++++++++++---
llvm/test/CodeGen/X86/StackColoring.ll | 35 ++++++++++++++++++++++++++
3 files changed, 52 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index da3665b3b6a0b..3e5fd59534105 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1417,10 +1417,6 @@ bool MachineInstr::isDead(const MachineRegisterInfo &MRI,
if (isInlineAsm())
return false;
- // FIXME: See issue #105950 for why LIFETIME markers are considered dead here.
- if (isLifetimeMarker())
- return true;
-
// If there are no defs with uses, then we call the instruction dead so long
// as we do not suspect it may have sideeffects.
return wouldBeTriviallyDead();
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 64a5e294d2a0c..22f50c88dec21 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -686,8 +686,10 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) {
int Slot = getStartOrEndSlot(MI);
- if (Slot < 0)
+ if (Slot < 0) {
+ Markers.push_back(&MI);
continue;
+ }
InterestingSlots.set(Slot);
if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
BetweenStartEnd.set(Slot);
@@ -927,6 +929,17 @@ bool StackColoring::removeAllMarkers() {
}
Markers.clear();
+ for (MachineBasicBlock &MBB : *MF) {
+ if (BlockLiveness.empty() || BlockLiveness[MBB.getNumber()].isEmpty())
+ for (MachineInstr &MI : make_early_inc_range(MBB)) {
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ Count++;
+ MI.eraseFromParent();
+ }
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
return Count;
}
@@ -1247,8 +1260,8 @@ bool StackColoring::run(MachineFunction &Func) {
unsigned NumSlots = MFI->getObjectIndexEnd();
// If there are no stack slots then there are no markers to remove.
- if (!NumSlots)
- return false;
+ if (!NumSlots || DisableColoring)
+ return removeAllMarkers();
SmallVector<int, 8> SortedSlots;
SortedSlots.reserve(NumSlots);
@@ -1272,7 +1285,7 @@ bool StackColoring::run(MachineFunction &Func) {
// Don't continue because there are not enough lifetime markers, or the
// stack is too small, or we are told not to optimize the slots.
- if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
+ if (NumMarkers < 2 || TotalSize < 16) {
LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
return removeAllMarkers();
}
diff --git a/llvm/test/CodeGen/X86/StackColoring.ll b/llvm/test/CodeGen/X86/StackColoring.ll
index db3e7dcdfe2d5..4cc54c5bd1361 100644
--- a/llvm/test/CodeGen/X86/StackColoring.ll
+++ b/llvm/test/CodeGen/X86/StackColoring.ll
@@ -581,6 +581,41 @@ onerr:
%Data = type { [32 x i64] }
+declare void @throw()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @llvm.trap()
+
+;CHECK-LABEL: removed_all_lifetime:
+;YESCOLOR-NOT: LIFETIME_END
+;NOFIRSTUSE-NOT: LIFETIME_END
+;NOCOLOR-NOT: LIFETIME_END
+define void @removed_all_lifetime() personality ptr @__CxxFrameHandler3 {
+entry:
+ %alloca2 = alloca ptr, align 4
+ %alloca1 = alloca ptr, align 4
+ store volatile ptr null, ptr %alloca1
+ invoke void @throw()
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %catch.pad] unwind to caller
+
+catch.pad: ; preds = %catch.dispatch
+ %cp = catchpad within %cs [ptr null, i32 0, ptr %alloca1]
+ %v = load volatile ptr, ptr %alloca1
+ store volatile ptr null, ptr %alloca1
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %alloca1)
+ call void @llvm.lifetime.start.p0(i64 4, ptr %alloca2)
+ store volatile ptr null, ptr %alloca1
+ call void @llvm.trap()
+ unreachable
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
declare void @destructor()
declare void @inita(ptr)
>From eef9b6e892668d00fb1dc8356126cb334c86d818 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Fri, 23 May 2025 22:23:00 +0200
Subject: [PATCH 11/19] [CodeGen] Add option to move StackColoring after
register allocation + deal with direct fallout
---
llvm/include/llvm/CodeGen/MachineInstr.h | 4 +++-
llvm/lib/CodeGen/StackColoring.cpp | 2 +-
llvm/lib/CodeGen/TargetPassConfig.cpp | 17 ++++++++++++++---
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 94d04b82666be..faf860c656af4 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1340,7 +1340,9 @@ class MachineInstr
}
// True if the instruction represents a position in the function.
- bool isPosition() const { return isLabel() || isCFIInstruction(); }
+ bool isPosition() const {
+ return isLifetimeMarker() || isLabel() || isCFIInstruction();
+ }
bool isNonListDebugValue() const {
return getOpcode() == TargetOpcode::DBG_VALUE;
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 22f50c88dec21..c5fd00b558d9f 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -686,7 +686,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) {
int Slot = getStartOrEndSlot(MI);
- if (Slot < 0) {
+ if (Slot < 0 || MFI->isObjectPreAllocated(Slot)) {
Markers.push_back(&MI);
continue;
}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4ae52b056d844..864c7c8acd3b2 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -266,6 +266,9 @@ static cl::opt<bool>
cl::desc("Split static data sections into hot and cold "
"sections using profile information"));
+static cl::opt<bool> MergedStackColoring("merged-stack-coloring",
+ cl::init(false), cl::Hidden);
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -1305,9 +1308,11 @@ void TargetPassConfig::addMachineSSAOptimization() {
// instructions dead.
addPass(&OptimizePHIsLegacyID);
- // This pass merges large allocas. StackSlotColoring is a different pass
- // which merges spill slots.
- addPass(&StackColoringLegacyID);
+ if (!MergedStackColoring) {
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringLegacyID);
+ }
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
@@ -1496,6 +1501,12 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// Perform stack slot coloring and post-ra machine LICM.
addPass(&StackSlotColoringID);
+ if (MergedStackColoring) {
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringLegacyID);
+ }
+
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
addPostRewrite();
>From fb19fd01466926f6976cb8a494c3eabb66cab2c1 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Tue, 10 Jun 2025 22:44:32 +0200
Subject: [PATCH 12/19] Add new StackColoring algo
---
llvm/include/llvm/CodeGen/MachineFrameInfo.h | 34 +-
llvm/lib/CodeGen/MachineFrameInfo.cpp | 22 +-
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 29 +-
llvm/lib/CodeGen/StackColoring.cpp | 570 +++++++++++++++----
4 files changed, 530 insertions(+), 125 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 403e5eda949f1..fdb2fbd133397 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -120,12 +120,18 @@ class MachineFrameInfo {
///< triggered protection. 3rd closest to the protector.
};
+ static constexpr int NoUnderlyingSlot = std::numeric_limits<int>::min();
+ static constexpr int IsUnderlyingSlot = std::numeric_limits<int>::min() + 1;
+
private:
// Represent a single object allocated on the stack.
struct StackObject {
// The offset of this object from the stack pointer on entry to
// the function. This field has no meaning for a variable sized element.
- int64_t SPOffset;
+ // After getting placed this is relative to SP
+ // If UnderlyingSlot is not NoUnderlyingSlot, this is relative to the start
+ // of the UnderlyingSlot
+ int64_t Offset;
// The size of this object on the stack. 0 means a variable sized object,
// ~0ULL means a dead object.
@@ -134,6 +140,10 @@ class MachineFrameInfo {
// The required alignment of this stack slot.
Align Alignment;
+ // If not NoUnderlyingSlot, it Indicate that this slot should be placed
+ // at Offset, into the slot UnderlyingSlot
+ int UnderlyingSlot = NoUnderlyingSlot;
+
// If true, the value of the stack object is set before
// entering the function and is not modified inside the function. By
// default, fixed objects are immutable unless marked otherwise.
@@ -183,10 +193,10 @@ class MachineFrameInfo {
uint8_t SSPLayout = SSPLK_None;
- StackObject(uint64_t Size, Align Alignment, int64_t SPOffset,
+ StackObject(uint64_t Size, Align Alignment, int64_t Offset,
bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
bool IsAliased, uint8_t StackID = 0)
- : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
+ : Offset(Offset), Size(Size), Alignment(Alignment),
isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID),
Alloca(Alloca), isAliased(IsAliased) {}
};
@@ -532,7 +542,7 @@ class MachineFrameInfo {
"Invalid Object Idx!");
assert(!isDeadObjectIndex(ObjectIdx) &&
"Getting frame offset for a dead object?");
- return Objects[ObjectIdx+NumFixedObjects].SPOffset;
+ return Objects[ObjectIdx + NumFixedObjects].Offset;
}
bool isObjectZExt(int ObjectIdx) const {
@@ -561,12 +571,12 @@ class MachineFrameInfo {
/// Set the stack frame offset of the specified object. The
/// offset is relative to the stack pointer on entry to the function.
- void setObjectOffset(int ObjectIdx, int64_t SPOffset) {
+ void setObjectOffset(int ObjectIdx, int64_t Offset) {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
assert(!isDeadObjectIndex(ObjectIdx) &&
"Setting frame offset for a dead object?");
- Objects[ObjectIdx+NumFixedObjects].SPOffset = SPOffset;
+ Objects[ObjectIdx + NumFixedObjects].Offset = Offset;
}
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const {
@@ -762,6 +772,18 @@ class MachineFrameInfo {
// If ID == 0, MaxAlignment will need to be updated separately.
}
+ int getUnderlyingSlot(int ObjectIdx) {
+ assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ return Objects[ObjectIdx + NumFixedObjects].UnderlyingSlot;
+ }
+
+ void setUnderlyingSlot(int ObjectIdx, int Underlying) {
+ assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ Objects[ObjectIdx + NumFixedObjects].UnderlyingSlot = Underlying;
+ }
+
/// Returns true if the specified index corresponds to a dead object.
bool isDeadObjectIndex(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 14dc871d89c13..e3d1761ef894a 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -81,7 +81,7 @@ int MachineFrameInfo::CreateVariableSizedObject(Align Alignment,
return (int)Objects.size()-NumFixedObjects-1;
}
-int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t Offset,
bool IsImmutable, bool IsAliased) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
// The alignment of the frame index can be determined from its offset from
@@ -91,23 +91,22 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// stack needs realignment, we can't assume that the stack will in fact be
// aligned.
Align Alignment =
- commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset);
+ commonAlignment(ForcedRealign ? Align(1) : StackAlignment, Offset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
- StackObject(Size, Alignment, SPOffset, IsImmutable,
+ StackObject(Size, Alignment, Offset, IsImmutable,
/*IsSpillSlot=*/false, /*Alloca=*/nullptr,
IsAliased));
return -++NumFixedObjects;
}
-int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
- int64_t SPOffset,
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t Offset,
bool IsImmutable) {
Align Alignment =
- commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset);
+ commonAlignment(ForcedRealign ? Align(1) : StackAlignment, Offset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
- StackObject(Size, Alignment, SPOffset, IsImmutable,
+ StackObject(Size, Alignment, Offset, IsImmutable,
/*IsSpillSlot=*/true, /*Alloca=*/nullptr,
/*IsAliased=*/false));
return -++NumFixedObjects;
@@ -240,8 +239,13 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (i < NumFixedObjects)
OS << ", fixed";
- if (i < NumFixedObjects || SO.SPOffset != -1) {
- int64_t Off = SO.SPOffset - ValOffset;
+ if (SO.UnderlyingSlot == MachineFrameInfo::IsUnderlyingSlot)
+ OS << ", underlying";
+ if (SO.UnderlyingSlot > MachineFrameInfo::IsUnderlyingSlot) {
+ OS << ", placed=" << "fi#" << (int)(SO.UnderlyingSlot - NumFixedObjects)
+ << "+" << SO.Offset;
+ } else if (i < NumFixedObjects || SO.Offset != -1) {
+ int64_t Off = SO.Offset - ValOffset;
OS << ", at location [SP";
if (Off > 0)
OS << "+" << Off;
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index f66f54682c84c..7a44b3937a63b 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -694,6 +694,13 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
}
}
+static inline void UpdateOffset(MachineFrameInfo &MFI, int FrameIdx,
+ int64_t Offset) {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset
+ << "]\n");
+ MFI.setObjectOffset(FrameIdx, Offset); // Set the computed offset
+}
+
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
bool StackGrowsDown, int64_t &Offset,
@@ -712,13 +719,9 @@ static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
Offset = alignTo(Offset, Alignment);
if (StackGrowsDown) {
- LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
- << "]\n");
- MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ UpdateOffset(MFI, FrameIdx, -Offset);
} else {
- LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset
- << "]\n");
- MFI.setObjectOffset(FrameIdx, Offset);
+ UpdateOffset(MFI, FrameIdx, Offset);
Offset += MFI.getObjectSize(FrameIdx);
}
}
@@ -1044,6 +1047,7 @@ void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) {
}
SmallVector<int, 8> ObjectsToAllocate;
+ SmallVector<int, 8> UpdateOffsetAfterAllocate;
// Then prepare to assign frame offsets to stack objects that are not used to
// spill callee saved registers.
@@ -1064,6 +1068,11 @@ void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) {
if (MFI.getStackID(i) != TargetStackID::Default)
continue;
+ if (MFI.getUnderlyingSlot(i) > MachineFrameInfo::IsUnderlyingSlot) {
+ UpdateOffsetAfterAllocate.push_back(i);
+ continue;
+ }
+
// Add the objects that we need to allocate to our working set.
ObjectsToAllocate.push_back(i);
}
@@ -1104,6 +1113,14 @@ void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) {
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
+ for (int FrameIdx : UpdateOffsetAfterAllocate) {
+ int UnderlyingSlot = MFI.getUnderlyingSlot(FrameIdx);
+ int64_t ObjOffset =
+ MFI.getObjectOffset(UnderlyingSlot) + MFI.getObjectOffset(FrameIdx);
+ UpdateOffset(MFI, FrameIdx, ObjOffset);
+ MFI.setUnderlyingSlot(FrameIdx, MachineFrameInfo::NoUnderlyingSlot);
+ }
+
if (!TFI.targetHandlesStackFrameRounding()) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index c5fd00b558d9f..4cfdc678643f8 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -55,6 +56,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DebugCounter.h"
#include <algorithm>
#include <cassert>
#include <limits>
@@ -65,6 +67,9 @@ using namespace llvm;
#define DEBUG_TYPE "stack-coloring"
+DEBUG_COUNTER(ProcessSlot, DEBUG_TYPE "-slot",
+ "Controls which slot get processed");
+
static cl::opt<bool>
DisableColoring("no-stack-coloring",
cl::init(false), cl::Hidden,
@@ -90,8 +95,19 @@ LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
cl::init(true), cl::Hidden,
cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
+static cl::opt<bool> UseNewStackColoring(
+ "new-stack-coloring", cl::init(false), cl::Hidden,
+ cl::desc("Use a better logic to try to reduce stack usage"));
+
+static constexpr unsigned MaxCandidatesToConsiderDefault = 5;
+static cl::opt<unsigned> MaxCandidatesToConsider(
+ "stackcoloring-max-candidates", cl::init(MaxCandidatesToConsiderDefault),
+ cl::Hidden,
+ cl::desc(
+ "Max number of candidates that will be evaluated, 0 means no limit"));
STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(GeneratedWorse, "Number of times worse layout were generated");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
@@ -375,12 +391,43 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
namespace {
+constexpr unsigned InvalidIdx = -1;
+
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
class StackColoring {
MachineFrameInfo *MFI = nullptr;
MachineFunction *MF = nullptr;
+ struct SlotInfo {
+ // All places in the current function where this Slot is live
+ BitVector Liveness;
+
+ // Use to make overlap queries faster
+ SmallVector<unsigned, 4> StartLiveness;
+
+ uint64_t SlotPriority = 0;
+
+ unsigned Offset = InvalidIdx;
+
+ unsigned Size = 0;
+
+ Align Align;
+
+ bool hasOverlap(SlotInfo &Other) {
+ // NOTE: This is not just a faster way to say
+ // return Liveness.anyCommon(Other.Liveness);
+ // This also allows merging slots that have overlapping lifetimes but
+ // cannot be live simultaneously
+ return any_of(StartLiveness,
+ [&](int Idx) { return Other.Liveness[Idx]; }) ||
+ any_of(Other.StartLiveness,
+ [&](int Idx) { return Liveness[Idx]; });
+ }
+
+ LLVM_DUMP_METHOD void dump(const StackColoring* State = nullptr) const;
+ };
+
/// A class representing liveness information for a single basic block.
/// Each bit in the BitVector represents the liveness property
/// for a different stack slot.
@@ -405,6 +452,9 @@ class StackColoring {
/// Maps basic blocks to a serial number.
SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering;
+ unsigned LivenessSize;
+ SmallVector<SlotInfo, 0> Slot2Info;
+
/// Maps slots to their use interval. Outside of this interval, slots
/// values are either dead or `undef` and they will not be written to.
SmallVector<std::unique_ptr<LiveRange>, 16> Intervals;
@@ -458,6 +508,8 @@ class StackColoring {
/// in and out blocks.
void calculateLocalLiveness();
+ unsigned doMerging(unsigned NumSlots);
+
/// Returns TRUE if we're using the first-use-begins-lifetime method for
/// this slot (if FALSE, then the start marker is treated as start of lifetime).
bool applyFirstUse(int Slot) {
@@ -482,7 +534,7 @@ class StackColoring {
/// Go over the machine function and change instructions which use stack
/// slots to use the joint slots.
- void remapInstructions(DenseMap<int, int> &SlotRemap);
+ void remapInstructions(DenseMap<int, int> &SlotRemap, int MergedSlot);
/// The input program may contain instructions which are not inside lifetime
/// markers. This can happen due to a bug in the compiler or due to a bug in
@@ -527,6 +579,10 @@ void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dumpBV(StringRef tag, const BitVector &BV) {
+ if (BV.size() == 0) {
+ dbgs() << tag << " : EMPTY\n";
+ return;
+ }
constexpr unsigned ColumnWidth = 150;
unsigned LineStartOffset = tag.size() + /*" : "*/ 3;
unsigned WidthAfterTag = ColumnWidth - LineStartOffset;
@@ -588,7 +644,38 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
dbgs() << ' ' << SIdx;
dbgs() << '\n';
}
+ for (unsigned Slot = 0; Slot < Slot2Info.size(); Slot++) {
+ Slot2Info[Slot].dump(this);
+ }
}
+
+LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State) const {
+ unsigned Slot = InvalidIdx;
+ if (State)
+ Slot = this - State->Slot2Info.data();
+ dbgs() << "SlotInfo";
+ if (State)
+ dbgs() << "(" << Slot << ")";
+ dbgs()<< ": ";
+ dbgs() << '\n';
+ if (State)
+ if (State->MFI->getObjectAllocation(Slot)) {
+ State->MFI->getObjectAllocation(Slot)->print(dbgs());
+ dbgs() << '\n';
+ }
+ dbgs() << "Size=" << Size << " Align=" << Align.value() << '\n';
+ dumpBV("LIVENESS ", Liveness);
+ BitVector Start;
+ Start.resize(Liveness.size());
+ for (unsigned idx : StartLiveness) {
+ if (idx >= Start.size())
+ Start.resize(idx + 1);
+ Start[idx] = true;
+ }
+ dumpBV("LIVE START ", Start);
+ dbgs() << "\n";
+}
+
#endif
static inline int getStartOrEndSlot(const MachineInstr &MI)
@@ -862,23 +949,39 @@ void StackColoring::calculateLocalLiveness() {
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SmallVector<SlotIndex, 16> Starts;
- SmallVector<bool, 16> DefinitelyInUse;
+ BitVector DefinitelyInUse;
+ SmallVector<int, 16> StartIdx;
+
+ int CurrIdx = 0;
+
+ DefinitelyInUse.resize(NumSlots);
// For each block, find which slots are active within this block
// and update the live intervals.
for (const MachineBasicBlock &MBB : *MF) {
- Starts.clear();
- Starts.resize(NumSlots);
- DefinitelyInUse.clear();
- DefinitelyInUse.resize(NumSlots);
+ Starts.assign(NumSlots, SlotIndex());
+ StartIdx.assign(NumSlots, -1);
+ DefinitelyInUse.reset();
// Start the interval of the slots that we previously found to be 'in-use'.
BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()];
- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
- pos = MBBLiveness.LiveIn.find_next(pos)) {
+ for (int pos : MBBLiveness.LiveIn.set_bits()) {
Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+ StartIdx[pos] = CurrIdx;
}
+ bool StartedSinceInc = false;
+ auto EndRangeFor = [&](int Slot) {
+ if (StartIdx[Slot] == CurrIdx || StartedSinceInc) {
+ CurrIdx++;
+ StartedSinceInc = false;
+ }
+ Slot2Info[Slot].Liveness.resize(CurrIdx + 1);
+ Slot2Info[Slot].Liveness.set(StartIdx[Slot], CurrIdx);
+ StartIdx[Slot] = -1;
+ DefinitelyInUse[Slot] = false;
+ };
+
// Create the interval for the basic blocks containing lifetime begin/end.
for (const MachineInstr &MI : MBB) {
SmallVector<int, 4> slots;
@@ -888,16 +991,21 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
for (auto Slot : slots) {
if (IsStart) {
+ StartedSinceInc = true;
// If a slot is already definitely in use, we don't have to emit
// a new start marker because there is already a pre-existing
// one.
if (!DefinitelyInUse[Slot]) {
LiveStarts[Slot].push_back(ThisIndex);
+ Slot2Info[Slot].StartLiveness.push_back(CurrIdx);
DefinitelyInUse[Slot] = true;
}
if (!Starts[Slot].isValid())
Starts[Slot] = ThisIndex;
+ if (StartIdx[Slot] == -1)
+ StartIdx[Slot] = CurrIdx;
} else {
+ assert(Starts[Slot].isValid() == (StartIdx[Slot] != -1));
if (Starts[Slot].isValid()) {
VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
Intervals[Slot]->addSegment(
@@ -905,10 +1013,18 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
Starts[Slot] = SlotIndex(); // Invalidate the start index
DefinitelyInUse[Slot] = false;
}
+ if (StartIdx[Slot] != -1)
+ EndRangeFor(Slot);
}
}
}
+ for (unsigned i = 0; i < NumSlots; ++i) {
+ if (StartIdx[i] == -1)
+ continue;
+ EndRangeFor(i);
+ }
+
// Finish up started segments
for (unsigned i = 0; i < NumSlots; ++i) {
if (!Starts[i].isValid())
@@ -919,6 +1035,18 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
}
}
+ LivenessSize = CurrIdx;
+ for (SlotInfo &Info : Slot2Info) {
+ Info.Liveness.resize(CurrIdx);
+
+ // This is only to make us index into Liveness in order when doing a
+ // SlotInfo::hasOverlap, which should have better cache locality
+ std::sort(Info.StartLiveness.begin(), Info.StartLiveness.end());
+#ifndef NDEBUG
+ for (int Start : Info.StartLiveness)
+ assert(Info.Liveness[Start]);
+#endif
+ }
}
bool StackColoring::removeAllMarkers() {
@@ -944,7 +1072,7 @@ bool StackColoring::removeAllMarkers() {
return Count;
}
-void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedSlot) {
unsigned FixedInstr = 0;
unsigned FixedMemOp = 0;
unsigned FixedDbg = 0;
@@ -954,6 +1082,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (!VI.Var || !VI.inStackSlot())
continue;
int Slot = VI.getStackSlot();
+ if (Slot >= 0 && Slot2Info[Slot].Offset != InvalidIdx) {
+ // FIXME: properly update the offset into MergedSlot debug
+ VI.updateStackSlot(MergedSlot);
+ }
if (auto It = SlotRemap.find(Slot); It != SlotRemap.end()) {
LLVM_DEBUG(dbgs() << "Remapping debug info for ["
<< cast<DILocalVariable>(VI.Var)->getName() << "].\n");
@@ -1062,6 +1194,12 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (FromSlot<0)
continue;
+ if (FromSlot >= 0 && Slot2Info[FromSlot].Offset != InvalidIdx) {
+ MO.setIndex(MergedSlot);
+ MO.setOffset(MO.getOffset() + Slot2Info[FromSlot].Offset);
+ continue;
+ }
+
// Only look at mapped slots.
if (!SlotRemap.count(FromSlot))
continue;
@@ -1103,6 +1241,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
auto To = SlotRemap.find(FI);
if (To != SlotRemap.end())
SSRefs[FI].push_back(MMO);
+ if (FI >= 0 && Slot2Info[FI].Offset != InvalidIdx)
+ SSRefs[FI].push_back(MMO);
}
// If this memory location can be a slot remapped here,
@@ -1121,7 +1261,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// that is not remapped, we continue checking.
// Otherwise, we need to invalidate AA infomation.
const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
- if (AI && MergedAllocas.count(AI)) {
+ if ((AI && MergedAllocas.count(AI)) || UseNewStackColoring) {
MayHaveConflictingAAMD = true;
break;
}
@@ -1145,10 +1285,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Rewrite MachineMemOperands that reference old frame indices.
for (auto E : enumerate(SSRefs))
if (!E.value().empty()) {
- const PseudoSourceValue *NewSV =
- MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
- for (MachineMemOperand *Ref : E.value())
- Ref->setValue(NewSV);
+ if (UseNewStackColoring) {
+ const PseudoSourceValue *NewSV =
+ MF->getPSVManager().getFixedStack(MergedSlot);
+ for (MachineMemOperand *Ref : E.value())
+ Ref->setValue(NewSV);
+ } else {
+ const PseudoSourceValue *NewSV = MF->getPSVManager().getFixedStack(
+ SlotRemap.find(E.index())->second);
+ for (MachineMemOperand *Ref : E.value())
+ Ref->setValue(NewSV);
+ }
}
// Update the location of C++ catch objects for the MSVC personality routine.
@@ -1245,6 +1392,195 @@ PreservedAnalyses StackColoringPass::run(MachineFunction &MF,
return PreservedAnalyses::all();
}
+unsigned StackColoring::doMerging(unsigned NumSlots) {
+ SmallVector<unsigned> SlotStack;
+ Align FinalAlign;
+
+ int64_t OrigOptSize = 0;
+ int64_t OrigPesSize = 0;
+ for (unsigned Slot = 0; Slot < NumSlots; Slot++) {
+ SlotInfo& Info = Slot2Info[Slot];
+ if (!Info.StartLiveness.empty() &&
+ DebugCounter::shouldExecute(ProcessSlot)) {
+ FinalAlign = std::max(FinalAlign, Info.Align);
+
+ // Note: This is maybe not a completely fair comparaison compared to the
+ // previous algo, as PEI should be smarter than that about alignment But
+ // faire comparaison is hard since the old algo doesn't deal in alignment
+ // at all
+ OrigPesSize = alignTo(OrigPesSize, Info.Align);
+ OrigPesSize += Info.Size;
+ OrigOptSize += Info.Size;
+ SlotStack.push_back(Slot);
+ }
+ }
+
+ if (SlotStack.size() <= 1)
+ return InvalidIdx;
+
+ // This Whole block is only used to try and order the stack, such that the
+ // Slots are processed in an order that helps getting good packing
+ {
+ // Find how much usage of every livepoint there is.
+ SmallVector<unsigned> CumulatedUsage;
+ CumulatedUsage.resize(LivenessSize, 0);
+
+ for (unsigned Idx = 0; Idx < SlotStack.size(); Idx++) {
+ SlotInfo &Info = Slot2Info[SlotStack[Idx]];
+ for (unsigned Pt : Info.Liveness.set_bits()) {
+ CumulatedUsage[Pt] += Info.Size;
+ }
+ }
+
+ for (unsigned Idx = 0; Idx < SlotStack.size(); Idx++) {
+ SlotInfo &Info = Slot2Info[SlotStack[Idx]];
+ for (unsigned Pt : Info.Liveness.set_bits()) {
+ // Since the goal is to minimize the max usage, blocks that are in high
+ // contention areas are given more priority
+ Info.SlotPriority +=
+ (uint64_t)CumulatedUsage[Pt] * (uint64_t)CumulatedUsage[Pt] +
+ (uint64_t)Info.Size * (uint64_t)Info.Align.value();
+ }
+ }
+ std::stable_sort(
+ SlotStack.begin(), SlotStack.end(), [&](unsigned Lhs, unsigned Rhs) {
+ if (Lhs == InvalidIdx)
+ return false;
+ if (Rhs == InvalidIdx)
+ return true;
+ return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority;
+ });
+ }
+
+ SlotInfo* LastQueryLhs = nullptr;
+ SlotInfo* LastQueryRhs = nullptr;
+ bool LastQueryRes = false;
+ auto HasOverlapCached = [&](SlotInfo &Lhs, SlotInfo &Rhs) {
+ if (&Lhs == LastQueryLhs && LastQueryRhs == &Rhs)
+ return LastQueryRes;
+ LastQueryLhs = &Lhs;
+ LastQueryRhs = &Rhs;
+ LastQueryRes = Lhs.hasOverlap(Rhs);
+ return LastQueryRes;
+ };
+
+ struct Status {
+ unsigned Offset = 0;
+ unsigned Slot = InvalidIdx;
+ unsigned Prev = InvalidIdx;
+ };
+
+ SmallVector<Status> LatestStatus;
+ LatestStatus.resize(LivenessSize, Status{});
+ SmallVector<Status> OlderStatus;
+
+ auto FindOffset = [&](SlotInfo &Info, unsigned Pt) {
+ Status *Last = &LatestStatus[Pt];
+
+ // This is only called on Slot that have overlapping lifetimes
+ // So the no overlap only happens when there lifetime overlap but only one
+ // can be live because where they start in the CFG is mutually exclusive
+ // See the comment about implementation for an example
+ while (LLVM_UNLIKELY(Last->Slot != InvalidIdx &&
+ !HasOverlapCached(Info, Slot2Info[Last->Slot])))
+ Last = &OlderStatus[Last->Prev];
+ return Last->Offset;
+ };
+ auto UpdateOffset = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) {
+ Status& Last = LatestStatus[Pt];
+ unsigned Idx = OlderStatus.size();
+ OlderStatus.push_back(Last);
+ Last.Prev = Idx;
+ Last.Offset = Offset;
+ Last.Slot = &Info - Slot2Info.data();
+ };
+
+ SmallVector<unsigned, MaxCandidatesToConsiderDefault> Candidates;
+ unsigned MaxCandidates =
+ MaxCandidatesToConsider == 0 ? ~0u : MaxCandidatesToConsider;
+ for (unsigned I = 0; I < MaxCandidates; I++) {
+ if (SlotStack.empty())
+ break;
+ Candidates.push_back(SlotStack.pop_back_val());
+ }
+
+ while (!Candidates.empty()) {
+ int64_t BestScore = std::numeric_limits<int64_t>::max();
+ unsigned BestIdx = InvalidIdx;
+ unsigned BestOffset = InvalidIdx;
+
+ for (unsigned K = 0; K < Candidates.size(); K++) {
+ SlotInfo &Info = Slot2Info[Candidates[K]];
+ unsigned Offset = 0;
+ for (unsigned Pt : Info.Liveness.set_bits())
+ Offset = std::max(Offset, FindOffset(Info, Pt));
+
+ Offset = alignTo(Offset, Info.Align);
+
+ int64_t Score = (int64_t)Offset - (int64_t)Log2(Info.Align);
+ LLVM_DEBUG(dbgs() << "SlotInfo(" << Candidates[K] << ") Score=" << Score << "\n");
+ bool IsBetter = [&] {
+ if (BestScore != Score)
+ return BestScore > Score;
+ SlotInfo &Other = Slot2Info[Candidates[K]];
+ if (Other.Size != Info.Size)
+ return Other.Size < Info.Size;
+ if (Other.SlotPriority != Info.SlotPriority)
+ return Other.SlotPriority < Info.SlotPriority;
+
+ // Both are always stored in Slot2Info, so this is deterministic
+ return &Other < &Info;
+ }();
+
+ if (IsBetter) {
+ BestScore = Score;
+ BestIdx = K;
+ BestOffset = Offset;
+ }
+ }
+ SlotInfo &Info = Slot2Info[Candidates[BestIdx]];
+
+ LLVM_DEBUG(Info.dump(this));
+ LLVM_DEBUG(dbgs() << "Placing SlotInfo(" << Candidates[BestIdx] << ") at "
+ << BestOffset << " Score=" << BestScore << "\n");
+
+ Info.Offset = BestOffset;
+ for (unsigned Pt : Info.Liveness.set_bits())
+ UpdateOffset(Info, Pt, BestOffset + Info.Size);
+
+ std::swap(Candidates[BestIdx], Candidates.back());
+ Candidates.pop_back();
+ if (!SlotStack.empty())
+ Candidates.push_back(SlotStack.pop_back_val());
+ }
+
+ unsigned FinalSize = 0;
+ for (Status& U : LatestStatus)
+ FinalSize = std::max(FinalSize, U.Offset);
+ LLVM_DEBUG(dbgs() << "MergedSize=" << FinalSize << " OrigPesSize="
+ << OrigPesSize << " OrigOptSize" << OrigOptSize << "\n");
+ if (FinalSize >= OrigPesSize) {
+ GeneratedWorse++;
+ return InvalidIdx;
+ }
+
+ int MergedSlot =
+ MFI->CreateStackObject(FinalSize, FinalAlign, /*isSpillSlot=*/false);
+ MFI->setUnderlyingSlot(MergedSlot, MachineFrameInfo::IsUnderlyingSlot);
+
+ for (unsigned Slot = 0; Slot < NumSlots; Slot++)
+ if (Slot2Info[Slot].Offset != InvalidIdx) {
+ MFI->setUnderlyingSlot(Slot, MergedSlot);
+ MFI->setObjectOffset(Slot, Slot2Info[Slot].Offset);
+ }
+
+ // Note: this is counts differently from the previous algo because this logic
+ // cares about alignment, while the older algo doesn't.
+ StackSpaceSaved += OrigPesSize - FinalSize;
+
+ return MergedSlot;
+}
+
bool StackColoring::run(MachineFunction &Func) {
LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
<< "********** Function: " << Func.getName() << '\n');
@@ -1256,11 +1592,12 @@ bool StackColoring::run(MachineFunction &Func) {
Intervals.clear();
LiveStarts.clear();
VNInfoAllocator.Reset();
+ Slot2Info.clear();
unsigned NumSlots = MFI->getObjectIndexEnd();
// If there are no stack slots then there are no markers to remove.
- if (!NumSlots || DisableColoring)
+ if (NumSlots < 2 || DisableColoring)
return removeAllMarkers();
SmallVector<int, 8> SortedSlots;
@@ -1290,11 +1627,16 @@ bool StackColoring::run(MachineFunction &Func) {
return removeAllMarkers();
}
+ Slot2Info.resize(NumSlots);
for (unsigned i=0; i < NumSlots; ++i) {
std::unique_ptr<LiveRange> LI(new LiveRange());
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
Intervals.push_back(std::move(LI));
SortedSlots.push_back(i);
+
+ Slot2Info[i].Align = MFI->getObjectAlign(i);
+ Slot2Info[i].Size = MFI->getObjectSize(i);
+ Slot2Info[i].Offset = InvalidIdx;
}
// Calculate the liveness of each block.
@@ -1311,105 +1653,125 @@ bool StackColoring::run(MachineFunction &Func) {
if (ProtectFromEscapedAllocas)
removeInvalidSlotRanges();
- // Maps old slots to new slots.
- DenseMap<int, int> SlotRemap;
- unsigned RemovedSlots = 0;
- unsigned ReducedSize = 0;
+ if (!UseNewStackColoring) {
+ // Maps old slots to new slots.
+ DenseMap<int, int> SlotRemap;
+ unsigned RemovedSlots = 0;
+ unsigned ReducedSize = 0;
- // Do not bother looking at empty intervals.
- for (unsigned I = 0; I < NumSlots; ++I) {
- if (Intervals[SortedSlots[I]]->empty())
- SortedSlots[I] = -1;
- }
-
- // This is a simple greedy algorithm for merging allocas. First, sort the
- // slots, placing the largest slots first. Next, perform an n^2 scan and look
- // for disjoint slots. When you find disjoint slots, merge the smaller one
- // into the bigger one and update the live interval. Remove the small alloca
- // and continue.
-
- // Sort the slots according to their size. Place unused slots at the end.
- // Use stable sort to guarantee deterministic code generation.
- llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
- // We use -1 to denote a uninteresting slot. Place these slots at the end.
- if (LHS == -1)
- return false;
- if (RHS == -1)
- return true;
- // Sort according to size.
- return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
- });
-
- for (auto &s : LiveStarts)
- llvm::sort(s);
-
- bool Changed = true;
- while (Changed) {
- Changed = false;
+ // Do not bother looking at empty intervals.
for (unsigned I = 0; I < NumSlots; ++I) {
- if (SortedSlots[I] == -1)
- continue;
+ if (Intervals[SortedSlots[I]]->empty())
+ SortedSlots[I] = -1;
+ }
- for (unsigned J=I+1; J < NumSlots; ++J) {
- if (SortedSlots[J] == -1)
+ // This is a simple greedy algorithm for merging allocas. First, sort the
+ // slots, placing the largest slots first. Next, perform an n^2 scan and
+ // look for disjoint slots. When you find disjoint slots, merge the smaller
+ // one into the bigger one and update the live interval. Remove the small
+ // alloca and continue.
+
+ // Sort the slots according to their size. Place unused slots at the end.
+ // Use stable sort to guarantee deterministic code generation.
+ llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
+ // We use -1 to denote a uninteresting slot. Place these slots at the end.
+ if (LHS == -1)
+ return false;
+ if (RHS == -1)
+ return true;
+ // Sort according to size.
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ });
+
+ for (auto &s : LiveStarts)
+ llvm::sort(s);
+
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (SortedSlots[I] == -1)
continue;
- int FirstSlot = SortedSlots[I];
- int SecondSlot = SortedSlots[J];
+ for (unsigned J = I + 1; J < NumSlots; ++J) {
+ if (SortedSlots[J] == -1)
+ continue;
- // Objects with different stack IDs cannot be merged.
- if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
- continue;
+ int FirstSlot = SortedSlots[I];
+ int SecondSlot = SortedSlots[J];
- LiveRange *First = &*Intervals[FirstSlot];
- LiveRange *Second = &*Intervals[SecondSlot];
- auto &FirstS = LiveStarts[FirstSlot];
- auto &SecondS = LiveStarts[SecondSlot];
- assert(!First->empty() && !Second->empty() && "Found an empty range");
-
- // Merge disjoint slots. This is a little bit tricky - see the
- // Implementation Notes section for an explanation.
- if (!First->isLiveAtIndexes(SecondS) &&
- !Second->isLiveAtIndexes(FirstS)) {
- Changed = true;
- First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
-
- int OldSize = FirstS.size();
- FirstS.append(SecondS.begin(), SecondS.end());
- auto Mid = FirstS.begin() + OldSize;
- std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
-
- SlotRemap[SecondSlot] = FirstSlot;
- SortedSlots[J] = -1;
- LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
- << SecondSlot << " together.\n");
- Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
- MFI->getObjectAlign(SecondSlot));
-
- assert(MFI->getObjectSize(FirstSlot) >=
- MFI->getObjectSize(SecondSlot) &&
- "Merging a small object into a larger one");
-
- RemovedSlots+=1;
- ReducedSize += MFI->getObjectSize(SecondSlot);
- MFI->setObjectAlignment(FirstSlot, MaxAlignment);
- MFI->RemoveStackObject(SecondSlot);
+ // Objects with different stack IDs cannot be merged.
+ if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
+ continue;
+
+ LiveRange *First = &*Intervals[FirstSlot];
+ LiveRange *Second = &*Intervals[SecondSlot];
+ auto &FirstS = LiveStarts[FirstSlot];
+ auto &SecondS = LiveStarts[SecondSlot];
+ assert(!First->empty() && !Second->empty() && "Found an empty range");
+
+ bool OldNoOverlap = !First->isLiveAtIndexes(SecondS) &&
+ !Second->isLiveAtIndexes(FirstS);
+
+ SlotInfo &FSlot = Slot2Info[FirstSlot];
+ SlotInfo &SSlot = Slot2Info[SecondSlot];
+ bool NewNoOverlap = !FSlot.hasOverlap(SSlot);
+
+ // if (NewNoOverlap != OldNoOverlap) {
+ // LLVM_DEBUG(dbgs() << "OldNoOverlap=" << OldNoOverlap
+ // << " NewNoOverlap=" << NewNoOverlap << "\n");
+ // }
+ // assert(OldNoOverlap == NewNoOverlap);
+
+ // Merge disjoint slots. This is a little bit tricky - see the
+ // Implementation Notes section for an explanation.
+ if (OldNoOverlap) {
+ Changed = true;
+ First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
+
+ int OldSize = FirstS.size();
+ FirstS.append(SecondS.begin(), SecondS.end());
+ auto Mid = FirstS.begin() + OldSize;
+ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
+
+ // FSlot.Liveness |= SSlot.Liveness;
+
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
+ << SecondSlot << " together.\n");
+ Align Alignment = std::max(MFI->getObjectAlign(FirstSlot),
+ MFI->getObjectAlign(SecondSlot));
+
+ assert(MFI->getObjectSize(FirstSlot) >=
+ MFI->getObjectSize(SecondSlot) &&
+ "Merging a small object into a larger one");
+
+ RemovedSlots += 1;
+ ReducedSize += MFI->getObjectSize(SecondSlot);
+ MFI->setObjectAlignment(FirstSlot, Alignment);
+ MFI->RemoveStackObject(SecondSlot);
+ }
}
}
+ } // While changed.
+
+ // Record statistics.
+ StackSpaceSaved += ReducedSize;
+ StackSlotMerged += RemovedSlots;
+ LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
+ << ReducedSize << " bytes\n");
+
+ // Scan the entire function and update all machine operands that use frame
+ // indices to use the remapped frame index.
+ if (!SlotRemap.empty()) {
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap, InvalidIdx);
}
- }// While changed.
-
- // Record statistics.
- StackSpaceSaved += ReducedSize;
- StackSlotMerged += RemovedSlots;
- LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
- << ReducedSize << " bytes\n");
-
- // Scan the entire function and update all machine operands that use frame
- // indices to use the remapped frame index.
- if (!SlotRemap.empty()) {
- expungeSlotMap(SlotRemap, NumSlots);
- remapInstructions(SlotRemap);
+ } else {
+ // Maybe this entire logic should be moved to a generic StackLayouter that
+ // is used for PrologEpilogInserter and LocalStackSlotAllocation.
+ doMerging(NumSlots);
}
return removeAllMarkers();
>From 09de6f3c4cb8828cffd607d8b2fa75decdc3e779 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Mon, 9 Jun 2025 19:23:03 +0200
Subject: [PATCH 13/19] Start rebuild lifetimes for spill slots
---
llvm/lib/CodeGen/StackColoring.cpp | 152 +++++++++++++++++++++++++-
llvm/lib/CodeGen/TargetPassConfig.cpp | 6 +-
2 files changed, 150 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 4cfdc678643f8..0c876fe195fa3 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -399,6 +400,8 @@ class StackColoring {
MachineFrameInfo *MFI = nullptr;
MachineFunction *MF = nullptr;
+ LiveStacks* LS = nullptr;
+
struct SlotInfo {
// All places in the current function where this Slot is live
BitVector Liveness;
@@ -484,7 +487,7 @@ class StackColoring {
unsigned NumIterations;
public:
- StackColoring(SlotIndexes *Indexes) : Indexes(Indexes) {}
+ StackColoring(SlotIndexes *Indexes, LiveStacks* LS) : LS(LS), Indexes(Indexes) {}
bool run(MachineFunction &Func);
private:
@@ -573,6 +576,7 @@ INITIALIZE_PASS_END(StackColoringLegacy, DEBUG_TYPE,
void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<SlotIndexesWrapperPass>();
+ AU.addUsedIfAvailable<LiveStacksWrapperLegacy>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -744,6 +748,9 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
ConservativeSlots.clear();
ConservativeSlots.resize(NumSlot);
+ if (LS)
+ MarkersFound += LS->getNumIntervals() * 2;
+
// number of start and end lifetime ops for each slot
SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
@@ -955,6 +962,113 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
int CurrIdx = 0;
DefinitelyInUse.resize(NumSlots);
+ struct SplitSlotChanges {
+ const MachineInstr* AtMI;
+ unsigned BlockIdx : 31;
+ unsigned IsStart : 1;
+ unsigned Slot;
+ };
+ SmallVector<SplitSlotChanges> MidBlockSpillChanges;
+ unsigned SpillChangeCounter = 0;
+
+ if (LS && LS->getNumIntervals()) {
+ for (const MachineBasicBlock &MBB : *MF) {
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()];
+ MBBLiveness.LiveIn.resize(NumSlots);
+ MBBLiveness.LiveOut.resize(NumSlots);
+ }
+ for (const MachineBasicBlock &MBB : *MF) {
+ unsigned Base = LS->getStartIdx();
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()];
+ for (unsigned I = 0; I < LS->getNumIntervals(); I++) {
+ unsigned Slot = Base + I;
+ if (LS->getInterval(Slot).liveAt(Indexes->getMBBStartIdx(&MBB))) {
+ MBBLiveness.LiveIn[Slot] = true;
+ // Checking if the end of the block is in the live-range is not
+ // reliable
+ for (MachineBasicBlock *Pred : MBB.predecessors())
+ BlockLiveness[Pred->getNumber()].LiveOut[Slot] = true;
+ }
+ }
+ }
+ for (const MachineBasicBlock &MBB : *MF) {
+ unsigned SizeOnStart = MidBlockSpillChanges.size();
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()];
+ BitVector IsStoredTo;
+ IsStoredTo.resize(NumSlots, false);
+ struct MIBlockIdx {
+ const MachineInstr* MI;
+ unsigned BlockIdx;
+ };
+ unsigned BlockIdx = 0;
+ SmallVector<MIBlockIdx> LastUse;
+ LastUse.resize(NumSlots, {nullptr, 0});
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ for (MachineMemOperand* MMO : MI.memoperands()) {
+ auto *PSV = dyn_cast_if_present<FixedStackPseudoSourceValue>(
+ MMO->getPseudoValue());
+ if (!PSV)
+ continue;
+ unsigned Slot = PSV->getFrameIndex();
+ if (!LS->hasInterval(Slot))
+ continue;
+ // if (Slot == 17) {
+ // dbgs() << "MI: " << MI;
+ // dbgs() << "MBB: " << MBB.getName() << "\n";
+ // dbgs() << "MBB range:" << Indexes->getMBBRange(&MBB).first << "-"
+ // << Indexes->getMBBRange(&MBB).second << "\n";
+ // dbgs() << "slot range: " << LS->getInterval(Slot) << "\n";
+ // dbgs() << "\n";
+ // }
+ assert(MMO->isStore() != MMO->isLoad());
+ if (MMO->isStore()) {
+ if (!IsStoredTo[Slot]) {
+ MidBlockSpillChanges.push_back(
+ {&MI, BlockIdx, /*IsStart=*/true, Slot});
+ IsStoredTo[Slot] = true;
+ }
+ } else
+ LastUse[Slot] = {&MI, BlockIdx};
+ }
+ BlockIdx++;
+ }
+
+ BitVector Liveness = MBBLiveness.LiveIn;
+ Liveness |= IsStoredTo;
+ Liveness &= MBBLiveness.LiveOut.flip();
+ for (unsigned Slot : Liveness.set_bits()) {
+ if (!LS->hasInterval(Slot))
+ continue;
+ if (LastUse[Slot].MI)
+ MidBlockSpillChanges.push_back({LastUse[Slot].MI,
+ LastUse[Slot].BlockIdx,
+ /*IsStart=*/false, Slot});
+ }
+
+ std::stable_sort(MidBlockSpillChanges.begin() + SizeOnStart,
+ MidBlockSpillChanges.end(),
+ [&](SplitSlotChanges Lhs, SplitSlotChanges Rhs) -> bool {
+ if (Lhs.BlockIdx == Rhs.BlockIdx)
+ assert(Lhs.Slot != Rhs.Slot);
+ if (Lhs.BlockIdx != Rhs.BlockIdx)
+ return Lhs.BlockIdx < Rhs.BlockIdx;
+ // Avoid overlap of lifetime when the same instruction
+ // starts some spill lifetime and ends others.
+ return Rhs.IsStart;
+ });
+ }
+ }
+ LLVM_DEBUG({
+ for (SplitSlotChanges C : MidBlockSpillChanges) {
+ dbgs() << "Idx=" << C.BlockIdx << " Slot=" << C.Slot
+ << " IsStart=" << C.IsStart << " MI=" << *C.AtMI;
+ }
+ });
+
+ // To avoid needing bounds checks
+ MidBlockSpillChanges.push_back({nullptr, 0, false, InvalidIdx});
// For each block, find which slots are active within this block
// and update the live intervals.
@@ -986,10 +1100,15 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
for (const MachineInstr &MI : MBB) {
SmallVector<int, 4> slots;
bool IsStart = false;
- if (!isLifetimeStartOrEnd(MI, slots, IsStart))
+ bool AnyChange = isLifetimeStartOrEnd(MI, slots, IsStart);
+ AnyChange |= MidBlockSpillChanges[SpillChangeCounter].AtMI == &MI;
+ if (!AnyChange)
continue;
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
- for (auto Slot : slots) {
+ auto OnChange = [&](unsigned Slot, bool IsStart) {
+ // if (Slot == 3) {
+ // outs() << "HERE\n";
+ // }
if (IsStart) {
StartedSinceInc = true;
// If a slot is already definitely in use, we don't have to emit
@@ -1016,6 +1135,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
if (StartIdx[Slot] != -1)
EndRangeFor(Slot);
}
+ };
+ for (auto Slot : slots)
+ OnChange(Slot, IsStart);
+ for (; SpillChangeCounter < MidBlockSpillChanges.size() &&
+ MidBlockSpillChanges[SpillChangeCounter].AtMI == &MI;
+ SpillChangeCounter++) {
+ SplitSlotChanges Change = MidBlockSpillChanges[SpillChangeCounter];
+ OnChange(Change.Slot, Change.IsStart);
}
}
@@ -1035,6 +1162,9 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
}
}
+ // Make sure we reached the end
+ assert(!MidBlockSpillChanges[SpillChangeCounter].AtMI);
+
LivenessSize = CurrIdx;
for (SlotInfo &Info : Slot2Info) {
Info.Liveness.resize(CurrIdx);
@@ -1043,6 +1173,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
// SlotInfo::hasOverlap, which should have better cache locality
std::sort(Info.StartLiveness.begin(), Info.StartLiveness.end());
#ifndef NDEBUG
+ assert(Info.Liveness.any() == !Info.StartLiveness.empty());
for (int Start : Info.StartLiveness)
assert(Info.Liveness[Start]);
#endif
@@ -1380,13 +1511,19 @@ bool StackColoringLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- StackColoring SC(&getAnalysis<SlotIndexesWrapperPass>().getSI());
+ LiveStacks* LS = nullptr;
+ LiveStacksWrapperLegacy* LSWL = getAnalysisIfAvailable<LiveStacksWrapperLegacy>();
+ if (LSWL)
+ LS = &LSWL->getLS();
+
+ StackColoring SC(&getAnalysis<SlotIndexesWrapperPass>().getSI(), LS);
return SC.run(MF);
}
PreservedAnalyses StackColoringPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
- StackColoring SC(&MFAM.getResult<SlotIndexesAnalysis>(MF));
+ StackColoring SC(&MFAM.getResult<SlotIndexesAnalysis>(MF),
+ MFAM.getCachedResult<LiveStacksAnalysis>(MF));
if (SC.run(MF))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -1400,6 +1537,8 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
int64_t OrigPesSize = 0;
for (unsigned Slot = 0; Slot < NumSlots; Slot++) {
SlotInfo& Info = Slot2Info[Slot];
+ if (Info.StartLiveness.empty())
+ assert(!LS || !LS->hasInterval(Slot));
if (!Info.StartLiveness.empty() &&
DebugCounter::shouldExecute(ProcessSlot)) {
FinalAlign = std::max(FinalAlign, Info.Align);
@@ -1596,6 +1735,9 @@ bool StackColoring::run(MachineFunction &Func) {
unsigned NumSlots = MFI->getObjectIndexEnd();
+ // if (MF->getName() == "_ZL9transformPjS_Rm")
+ // outs() << "HERE\n";
+
// If there are no stack slots then there are no markers to remove.
if (NumSlots < 2 || DisableColoring)
return removeAllMarkers();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 864c7c8acd3b2..343e25ae17fd7 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1498,13 +1498,13 @@ void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&MachineSchedulerID);
if (addRegAssignAndRewriteOptimized()) {
- // Perform stack slot coloring and post-ra machine LICM.
- addPass(&StackSlotColoringID);
-
if (MergedStackColoring) {
// This pass merges large allocas. StackSlotColoring is a different pass
// which merges spill slots.
addPass(&StackColoringLegacyID);
+ } else {
+ // Perform stack slot coloring and post-ra machine LICM.
+ addPass(&StackSlotColoringID);
}
// Allow targets to expand pseudo instructions depending on the choice of
>From 78e9bca2c7b30b5dd242ee112379b214bf11eee0 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Tue, 10 Jun 2025 17:30:27 +0200
Subject: [PATCH 14/19] Fix bug + add comments + reduce ammount of debug prints
---
llvm/lib/CodeGen/StackColoring.cpp | 96 ++++++++++++++++++++----------
1 file changed, 65 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 0c876fe195fa3..b9a3b5b3bc8c8 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -648,9 +648,6 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
dbgs() << ' ' << SIdx;
dbgs() << '\n';
}
- for (unsigned Slot = 0; Slot < Slot2Info.size(); Slot++) {
- Slot2Info[Slot].dump(this);
- }
}
LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State) const {
@@ -1060,12 +1057,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
});
}
}
- LLVM_DEBUG({
- for (SplitSlotChanges C : MidBlockSpillChanges) {
- dbgs() << "Idx=" << C.BlockIdx << " Slot=" << C.Slot
- << " IsStart=" << C.IsStart << " MI=" << *C.AtMI;
- }
- });
// To avoid needing bounds checks
MidBlockSpillChanges.push_back({nullptr, 0, false, InvalidIdx});
@@ -1583,10 +1574,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
}
std::stable_sort(
SlotStack.begin(), SlotStack.end(), [&](unsigned Lhs, unsigned Rhs) {
- if (Lhs == InvalidIdx)
- return false;
- if (Rhs == InvalidIdx)
- return true;
return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority;
});
}
@@ -1594,6 +1581,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
SlotInfo* LastQueryLhs = nullptr;
SlotInfo* LastQueryRhs = nullptr;
bool LastQueryRes = false;
+ // TODO: Real caching ?
auto HasOverlapCached = [&](SlotInfo &Lhs, SlotInfo &Rhs) {
if (&Lhs == LastQueryLhs && LastQueryRhs == &Rhs)
return LastQueryRes;
@@ -1604,8 +1592,14 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
};
struct Status {
+ // This is the offset at which a slot on top should be placed. So the offset
+ // of the slot + the size of the slot
unsigned Offset = 0;
+
+ // The Slot just below the offset.
unsigned Slot = InvalidIdx;
+
+ // The index of the previous status in OlderStatus
unsigned Prev = InvalidIdx;
};
@@ -1616,22 +1610,41 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
auto FindOffset = [&](SlotInfo &Info, unsigned Pt) {
Status *Last = &LatestStatus[Pt];
- // This is only called on Slot that have overlapping lifetimes
- // So the no overlap only happens when there lifetime overlap but only one
- // can be live because where they start in the CFG is mutually exclusive
- // See the comment about implementation for an example
+ // The slots in the linked-list are always kept in ascending order, so the
+ // earliest slot has the lowest offset
+ // This loop handles cases where the latest slot doesn't cannot be both live
+ // because of the CFG, so even if there lifetime overlap, they can overlap
while (LLVM_UNLIKELY(Last->Slot != InvalidIdx &&
!HasOverlapCached(Info, Slot2Info[Last->Slot])))
Last = &OlderStatus[Last->Prev];
return Last->Offset;
};
auto UpdateOffset = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) {
- Status& Last = LatestStatus[Pt];
+ Status* Last = &LatestStatus[Pt];
unsigned Idx = OlderStatus.size();
- OlderStatus.push_back(Last);
- Last.Prev = Idx;
- Last.Offset = Offset;
- Last.Slot = &Info - Slot2Info.data();
+ OlderStatus.push_back(*Last);
+
+ // this is branch is not taken only when we are inserting a slot that wasn't
+ // overlapping with the previous slot and is smaller. so the slot inserted
+ // slot is not the new start of the linked-list
+ if (LLVM_LIKELY(Last->Offset <= Offset)) {
+ Last->Prev = Idx;
+ Last->Offset = Offset;
+ Last->Slot = &Info - Slot2Info.data();
+ return;
+ }
+
+ // Insure ordering of slots
+ Status* Inserted = &OlderStatus.back();
+ Inserted->Offset = Offset;
+ Inserted->Slot = &Info - Slot2Info.data();
+ Status *Curr = Last;
+ while (Curr->Prev != InvalidIdx && OlderStatus[Curr->Prev].Offset > Offset)
+ Curr = &OlderStatus[Curr->Prev];
+
+ // Insert the new node in the linked-list
+ Inserted->Prev = Curr->Prev;
+ Curr->Prev = Idx;
};
SmallVector<unsigned, MaxCandidatesToConsiderDefault> Candidates;
@@ -1643,25 +1656,34 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
Candidates.push_back(SlotStack.pop_back_val());
}
+ unsigned WorseCaseOffset = 0;
while (!Candidates.empty()) {
- int64_t BestScore = std::numeric_limits<int64_t>::max();
unsigned BestIdx = InvalidIdx;
unsigned BestOffset = InvalidIdx;
for (unsigned K = 0; K < Candidates.size(); K++) {
SlotInfo &Info = Slot2Info[Candidates[K]];
unsigned Offset = 0;
- for (unsigned Pt : Info.Liveness.set_bits())
+ for (unsigned Pt : Info.Liveness.set_bits()) {
Offset = std::max(Offset, FindOffset(Info, Pt));
+ // If Offset == WorseCaseOffset, this is always a valid, options. so no
+ // more checking needed
+ // If Offset > BestOffset, we already found a better solution, so this
+ // one doesn't matter
+ if (Offset == WorseCaseOffset || Offset > BestOffset)
+ break;
+ }
+
Offset = alignTo(Offset, Info.Align);
- int64_t Score = (int64_t)Offset - (int64_t)Log2(Info.Align);
- LLVM_DEBUG(dbgs() << "SlotInfo(" << Candidates[K] << ") Score=" << Score << "\n");
+ LLVM_DEBUG(dbgs() << "choice: SlotInfo(" << Candidates[K] << ") at " << Offset << "\n");
bool IsBetter = [&] {
- if (BestScore != Score)
- return BestScore > Score;
+ if (BestOffset != Offset)
+ return BestOffset > Offset;
SlotInfo &Other = Slot2Info[Candidates[K]];
+ if (Other.Align != Info.Align)
+ return Other.Align < Info.Align;
if (Other.Size != Info.Size)
return Other.Size < Info.Size;
if (Other.SlotPriority != Info.SlotPriority)
@@ -1672,7 +1694,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
}();
if (IsBetter) {
- BestScore = Score;
BestIdx = K;
BestOffset = Offset;
}
@@ -1681,11 +1702,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
LLVM_DEBUG(Info.dump(this));
LLVM_DEBUG(dbgs() << "Placing SlotInfo(" << Candidates[BestIdx] << ") at "
- << BestOffset << " Score=" << BestScore << "\n");
+ << BestOffset << "\n");
Info.Offset = BestOffset;
+ WorseCaseOffset = std::max(WorseCaseOffset, BestOffset + Info.Size);
for (unsigned Pt : Info.Liveness.set_bits())
UpdateOffset(Info, Pt, BestOffset + Info.Size);
+#ifdef EXPENSIVE_CHECKS
+ // Validate the order of offsets in the linked-list
+ for (Status &S : LatestStatus) {
+ Status *Curr = &S;
+ unsigned CurrOffset = Curr->Offset;
+ while (Curr->Prev != InvalidIdx) {
+ assert(Curr->Offset <= CurrOffset);
+ CurrOffset = Curr->Offset;
+ Curr = &OlderStatus[Curr->Prev];
+ }
+ }
+#endif
std::swap(Candidates[BestIdx], Candidates.back());
Candidates.pop_back();
@@ -1788,7 +1822,6 @@ bool StackColoring::run(MachineFunction &Func) {
// Propagate the liveness information.
calculateLiveIntervals(NumSlots);
- LLVM_DEBUG(dumpIntervals());
// Search for allocas which are used outside of the declared lifetime
// markers.
@@ -1796,6 +1829,7 @@ bool StackColoring::run(MachineFunction &Func) {
removeInvalidSlotRanges();
if (!UseNewStackColoring) {
+ LLVM_DEBUG(dumpIntervals());
// Maps old slots to new slots.
DenseMap<int, int> SlotRemap;
unsigned RemovedSlots = 0;
>From 74054b5ff57110349d0531c9044335656dd74e4a Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Tue, 10 Jun 2025 22:15:54 +0200
Subject: [PATCH 15/19] [NFC] Make StackColoring debug mode more concise
---
llvm/lib/CodeGen/StackColoring.cpp | 62 ++++++++++++++++++------------
1 file changed, 37 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index b9a3b5b3bc8c8..b12ef12df38b6 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -652,19 +652,23 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State) const {
unsigned Slot = InvalidIdx;
- if (State)
+ if (State) {
Slot = this - State->Slot2Info.data();
+ dbgs() << "fi#" << Slot;
+ } else
dbgs() << "SlotInfo";
- if (State)
- dbgs() << "(" << Slot << ")";
- dbgs()<< ": ";
- dbgs() << '\n';
- if (State)
- if (State->MFI->getObjectAllocation(Slot)) {
- State->MFI->getObjectAllocation(Slot)->print(dbgs());
- dbgs() << '\n';
+ dbgs() << ":";
+ if (Offset != InvalidIdx)
+ dbgs() << " offset=" << Offset;
+ if (State) {
+ if (State->MFI->getObjectAllocation(Slot))
+ dbgs() << " \"" << State->MFI->getObjectAllocation(Slot)->getName() << "\"";
+ if (State->MFI->isSpillSlotObjectIndex(Slot))
+ dbgs() << " spill";
}
- dbgs() << "Size=" << Size << " Align=" << Align.value() << '\n';
+ dbgs() << " size=" << Size << " align=" << Align.value() << '\n';
+ if (IndexBasedLiveRange)
+ dbgs() << "Index: " << *IndexBasedLiveRange << "\n";
dumpBV("LIVENESS ", Liveness);
BitVector Start;
Start.resize(Liveness.size());
@@ -1607,7 +1611,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
LatestStatus.resize(LivenessSize, Status{});
SmallVector<Status> OlderStatus;
- auto FindOffset = [&](SlotInfo &Info, unsigned Pt) {
+ auto FindStatus = [&](SlotInfo &Info, unsigned Pt) -> Status& {
Status *Last = &LatestStatus[Pt];
// The slots in the linked-list are always kept in ascending order, so the
@@ -1617,9 +1621,9 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
while (LLVM_UNLIKELY(Last->Slot != InvalidIdx &&
!HasOverlapCached(Info, Slot2Info[Last->Slot])))
Last = &OlderStatus[Last->Prev];
- return Last->Offset;
+ return *Last;
};
- auto UpdateOffset = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) {
+ auto UpdateStatus = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) {
Status* Last = &LatestStatus[Pt];
unsigned Idx = OlderStatus.size();
OlderStatus.push_back(*Last);
@@ -1656,16 +1660,25 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
Candidates.push_back(SlotStack.pop_back_val());
}
+ LLVM_DEBUG(dbgs() << "\nStarting Placement:\n");
unsigned WorseCaseOffset = 0;
while (!Candidates.empty()) {
unsigned BestIdx = InvalidIdx;
unsigned BestOffset = InvalidIdx;
+ LLVM_DEBUG(dbgs() << "top=" << WorseCaseOffset << " choosing: ");
for (unsigned K = 0; K < Candidates.size(); K++) {
SlotInfo &Info = Slot2Info[Candidates[K]];
unsigned Offset = 0;
+ unsigned PrevSlot = InvalidIdx;
+ (void)PrevSlot; // Only use in LLVM_DEBUG
+
for (unsigned Pt : Info.Liveness.set_bits()) {
- Offset = std::max(Offset, FindOffset(Info, Pt));
+ Status S = FindStatus(Info, Pt);
+ if (S.Offset > Offset) {
+ PrevSlot = S.Slot;
+ Offset = S.Offset;
+ }
// If Offset == WorseCaseOffset, this is always a valid, options. so no
// more checking needed
@@ -1677,7 +1690,10 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
Offset = alignTo(Offset, Info.Align);
- LLVM_DEBUG(dbgs() << "choice: SlotInfo(" << Candidates[K] << ") at " << Offset << "\n");
+ LLVM_DEBUG(dbgs() << "fi#" << Candidates[K] << "@" << Offset << "->";
+ if (PrevSlot == InvalidIdx) dbgs() << "bottom";
+ else dbgs() << "fi#" << PrevSlot; dbgs() << ", ";);
+
bool IsBetter = [&] {
if (BestOffset != Offset)
return BestOffset > Offset;
@@ -1699,15 +1715,15 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
}
}
SlotInfo &Info = Slot2Info[Candidates[BestIdx]];
+ Info.Offset = BestOffset;
+ WorseCaseOffset = std::max(WorseCaseOffset, BestOffset + Info.Size);
+ LLVM_DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Placing: ");
LLVM_DEBUG(Info.dump(this));
- LLVM_DEBUG(dbgs() << "Placing SlotInfo(" << Candidates[BestIdx] << ") at "
- << BestOffset << "\n");
- Info.Offset = BestOffset;
- WorseCaseOffset = std::max(WorseCaseOffset, BestOffset + Info.Size);
for (unsigned Pt : Info.Liveness.set_bits())
- UpdateOffset(Info, Pt, BestOffset + Info.Size);
+ UpdateStatus(Info, Pt, BestOffset + Info.Size);
#ifdef EXPENSIVE_CHECKS
// Validate the order of offsets in the linked-list
for (Status &S : LatestStatus) {
@@ -1786,13 +1802,9 @@ bool StackColoring::run(MachineFunction &Func) {
unsigned TotalSize = 0;
LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
<< " slots\n");
- LLVM_DEBUG(dbgs() << "Slot structure:\n");
- for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
- LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
- << " bytes.\n");
+ for (int i=0; i < MFI->getObjectIndexEnd(); ++i)
TotalSize += MFI->getObjectSize(i);
- }
LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
>From 709b55ae6ae14f2203a4b3df93b92198867ae1d8 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Wed, 11 Jun 2025 23:59:11 +0200
Subject: [PATCH 16/19] [NFC] Cleanup + comments
---
llvm/lib/CodeGen/StackColoring.cpp | 89 ++++++++++++------------------
1 file changed, 34 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index b12ef12df38b6..f00b5a17f8b91 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -10,13 +10,7 @@
// lifetime markers machine instructions (LIFETIME_START and LIFETIME_END),
// which represent the possible lifetime of stack slots. It attempts to
// merge disjoint stack slots and reduce the used stack space.
-// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
-//
-// TODO: In the future we plan to improve stack coloring in the following ways:
-// 1. Allow merging multiple small slots into a single larger slot at different
-// offsets.
-// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
-// spill slots.
+// NOTE: This pass is not StackSlotColoring, which optimizes only spill slots.
//
//===----------------------------------------------------------------------===//
@@ -25,7 +19,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -100,9 +93,8 @@ static cl::opt<bool> UseNewStackColoring(
"new-stack-coloring", cl::init(false), cl::Hidden,
cl::desc("Use a better logic to try to reduce stack usage"));
-static constexpr unsigned MaxCandidatesToConsiderDefault = 5;
-static cl::opt<unsigned> MaxCandidatesToConsider(
- "stackcoloring-max-candidates", cl::init(MaxCandidatesToConsiderDefault),
+static cl::opt<unsigned> MaxCandidatesOpt(
+ "stackcoloring-max-candidates", cl::init(0),
cl::Hidden,
cl::desc(
"Max number of candidates that will be evaluated, 0 means no limit"));
@@ -656,7 +648,7 @@ LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State)
Slot = this - State->Slot2Info.data();
dbgs() << "fi#" << Slot;
} else
- dbgs() << "SlotInfo";
+ dbgs() << "SlotInfo";
dbgs() << ":";
if (Offset != InvalidIdx)
dbgs() << " offset=" << Offset;
@@ -665,10 +657,8 @@ LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State)
dbgs() << " \"" << State->MFI->getObjectAllocation(Slot)->getName() << "\"";
if (State->MFI->isSpillSlotObjectIndex(Slot))
dbgs() << " spill";
- }
+ }
dbgs() << " size=" << Size << " align=" << Align.value() << '\n';
- if (IndexBasedLiveRange)
- dbgs() << "Index: " << *IndexBasedLiveRange << "\n";
dumpBV("LIVENESS ", Liveness);
BitVector Start;
Start.resize(Liveness.size());
@@ -973,6 +963,13 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
unsigned SpillChangeCounter = 0;
if (LS && LS->getNumIntervals()) {
+ // Here we prepare Spill slots lifetime informations
+ // Live ranges in the LiveStacks seem to be slightly outdated in many small
+ // ways. this is not an issue for stack-slot-coloring, because its only
+ // operating on LiveRange form LiveStack, but it is an issue here,
+ // So we only rely on LiveStack, to give us live edges, and conservatively
+ // re-construct in-block liveness changes
+
for (const MachineBasicBlock &MBB : *MF) {
BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()];
MBBLiveness.LiveIn.resize(NumSlots);
@@ -1015,14 +1012,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
unsigned Slot = PSV->getFrameIndex();
if (!LS->hasInterval(Slot))
continue;
- // if (Slot == 17) {
- // dbgs() << "MI: " << MI;
- // dbgs() << "MBB: " << MBB.getName() << "\n";
- // dbgs() << "MBB range:" << Indexes->getMBBRange(&MBB).first << "-"
- // << Indexes->getMBBRange(&MBB).second << "\n";
- // dbgs() << "slot range: " << LS->getInterval(Slot) << "\n";
- // dbgs() << "\n";
- // }
assert(MMO->isStore() != MMO->isLoad());
if (MMO->isStore()) {
if (!IsStoredTo[Slot]) {
@@ -1048,6 +1037,8 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
/*IsStart=*/false, Slot});
}
+ // Ensure that the changes are in the same order they will be found and
+ // need to be processed in
std::stable_sort(MidBlockSpillChanges.begin() + SizeOnStart,
MidBlockSpillChanges.end(),
[&](SplitSlotChanges Lhs, SplitSlotChanges Rhs) -> bool {
@@ -1081,6 +1072,8 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
bool StartedSinceInc = false;
auto EndRangeFor = [&](int Slot) {
+ // The less index the better, so we only increase if the ranges would not
+ // be accurate without
if (StartIdx[Slot] == CurrIdx || StartedSinceInc) {
CurrIdx++;
StartedSinceInc = false;
@@ -1101,9 +1094,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
continue;
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
auto OnChange = [&](unsigned Slot, bool IsStart) {
- // if (Slot == 3) {
- // outs() << "HERE\n";
- // }
if (IsStart) {
StartedSinceInc = true;
// If a slot is already definitely in use, we don't have to emit
@@ -1209,7 +1199,6 @@ void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedS
continue;
int Slot = VI.getStackSlot();
if (Slot >= 0 && Slot2Info[Slot].Offset != InvalidIdx) {
- // FIXME: properly update the offset into MergedSlot debug
VI.updateStackSlot(MergedSlot);
}
if (auto It = SlotRemap.find(Slot); It != SlotRemap.end()) {
@@ -1585,7 +1574,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
SlotInfo* LastQueryLhs = nullptr;
SlotInfo* LastQueryRhs = nullptr;
bool LastQueryRes = false;
- // TODO: Real caching ?
+ // Maybe there should be real caching here
auto HasOverlapCached = [&](SlotInfo &Lhs, SlotInfo &Rhs) {
if (&Lhs == LastQueryLhs && LastQueryRhs == &Rhs)
return LastQueryRes;
@@ -1616,8 +1605,10 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
// The slots in the linked-list are always kept in ascending order, so the
// earliest slot has the lowest offset
- // This loop handles cases where the latest slot doesn't cannot be both live
- // because of the CFG, so even if there lifetime overlap, they can overlap
+ // This loop handles cases where this slot and the latest slot doesn't
+ // cannot be both live because of the CFG, so even if there lifetime
+ // overlap, they can overlap
+ // See comment about implementation higher in the file
while (LLVM_UNLIKELY(Last->Slot != InvalidIdx &&
!HasOverlapCached(Info, Slot2Info[Last->Slot])))
Last = &OlderStatus[Last->Prev];
@@ -1638,7 +1629,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
return;
}
- // Insure ordering of slots
+ // Ensure ordering of slots
Status* Inserted = &OlderStatus.back();
Inserted->Offset = Offset;
Inserted->Slot = &Info - Slot2Info.data();
@@ -1651,9 +1642,10 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
Curr->Prev = Idx;
};
- SmallVector<unsigned, MaxCandidatesToConsiderDefault> Candidates;
- unsigned MaxCandidates =
- MaxCandidatesToConsider == 0 ? ~0u : MaxCandidatesToConsider;
+ // This is a vector but element ordering is not relevant
+ SmallVector<unsigned> Candidates;
+
+ unsigned MaxCandidates = MaxCandidatesOpt == 0 ? ~0u : MaxCandidatesOpt;
for (unsigned I = 0; I < MaxCandidates; I++) {
if (SlotStack.empty())
break;
@@ -1666,7 +1658,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
unsigned BestIdx = InvalidIdx;
unsigned BestOffset = InvalidIdx;
- LLVM_DEBUG(dbgs() << "top=" << WorseCaseOffset << " choosing: ");
+ LLVM_DEBUG(dbgs() << "Worse is at " << WorseCaseOffset << ", choosing: ");
for (unsigned K = 0; K < Candidates.size(); K++) {
SlotInfo &Info = Slot2Info[Candidates[K]];
unsigned Offset = 0;
@@ -1705,7 +1697,8 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
if (Other.SlotPriority != Info.SlotPriority)
return Other.SlotPriority < Info.SlotPriority;
- // Both are always stored in Slot2Info, so this is deterministic
+ // Both are always stored in Slot2Info, so this is equivalent to
+ // FrameIndex comparaison
return &Other < &Info;
}();
@@ -1783,10 +1776,10 @@ bool StackColoring::run(MachineFunction &Func) {
VNInfoAllocator.Reset();
Slot2Info.clear();
- unsigned NumSlots = MFI->getObjectIndexEnd();
+ if (!UseNewStackColoring)
+ LS = nullptr;
- // if (MF->getName() == "_ZL9transformPjS_Rm")
- // outs() << "HERE\n";
+ unsigned NumSlots = MFI->getObjectIndexEnd();
// If there are no stack slots then there are no markers to remove.
if (NumSlots < 2 || DisableColoring)
@@ -1898,22 +1891,10 @@ bool StackColoring::run(MachineFunction &Func) {
auto &SecondS = LiveStarts[SecondSlot];
assert(!First->empty() && !Second->empty() && "Found an empty range");
- bool OldNoOverlap = !First->isLiveAtIndexes(SecondS) &&
- !Second->isLiveAtIndexes(FirstS);
-
- SlotInfo &FSlot = Slot2Info[FirstSlot];
- SlotInfo &SSlot = Slot2Info[SecondSlot];
- bool NewNoOverlap = !FSlot.hasOverlap(SSlot);
-
- // if (NewNoOverlap != OldNoOverlap) {
- // LLVM_DEBUG(dbgs() << "OldNoOverlap=" << OldNoOverlap
- // << " NewNoOverlap=" << NewNoOverlap << "\n");
- // }
- // assert(OldNoOverlap == NewNoOverlap);
-
// Merge disjoint slots. This is a little bit tricky - see the
// Implementation Notes section for an explanation.
- if (OldNoOverlap) {
+ if (!First->isLiveAtIndexes(SecondS) &&
+ !Second->isLiveAtIndexes(FirstS)) {
Changed = true;
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
@@ -1922,8 +1903,6 @@ bool StackColoring::run(MachineFunction &Func) {
auto Mid = FirstS.begin() + OldSize;
std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
- // FSlot.Liveness |= SSlot.Liveness;
-
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
>From 0472ecd8ba4e672542853d74f390decba47b3dd8 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 12 Jun 2025 00:27:07 +0200
Subject: [PATCH 17/19] Cleanup the Diff
---
llvm/lib/CodeGen/StackColoring.cpp | 36 ++++++++----------------------
1 file changed, 9 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index f00b5a17f8b91..9fdf5c426201b 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -529,7 +529,7 @@ class StackColoring {
/// Go over the machine function and change instructions which use stack
/// slots to use the joint slots.
- void remapInstructions(DenseMap<int, int> &SlotRemap, int MergedSlot);
+ void remapInstructions(DenseMap<int, int> &SlotRemap);
/// The input program may contain instructions which are not inside lifetime
/// markers. This can happen due to a bug in the compiler or due to a bug in
@@ -1188,7 +1188,7 @@ bool StackColoring::removeAllMarkers() {
return Count;
}
-void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedSlot) {
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
unsigned FixedInstr = 0;
unsigned FixedMemOp = 0;
unsigned FixedDbg = 0;
@@ -1198,9 +1198,6 @@ void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedS
if (!VI.Var || !VI.inStackSlot())
continue;
int Slot = VI.getStackSlot();
- if (Slot >= 0 && Slot2Info[Slot].Offset != InvalidIdx) {
- VI.updateStackSlot(MergedSlot);
- }
if (auto It = SlotRemap.find(Slot); It != SlotRemap.end()) {
LLVM_DEBUG(dbgs() << "Remapping debug info for ["
<< cast<DILocalVariable>(VI.Var)->getName() << "].\n");
@@ -1309,12 +1306,6 @@ void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedS
if (FromSlot<0)
continue;
- if (FromSlot >= 0 && Slot2Info[FromSlot].Offset != InvalidIdx) {
- MO.setIndex(MergedSlot);
- MO.setOffset(MO.getOffset() + Slot2Info[FromSlot].Offset);
- continue;
- }
-
// Only look at mapped slots.
if (!SlotRemap.count(FromSlot))
continue;
@@ -1356,8 +1347,6 @@ void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedS
auto To = SlotRemap.find(FI);
if (To != SlotRemap.end())
SSRefs[FI].push_back(MMO);
- if (FI >= 0 && Slot2Info[FI].Offset != InvalidIdx)
- SSRefs[FI].push_back(MMO);
}
// If this memory location can be a slot remapped here,
@@ -1376,7 +1365,7 @@ void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedS
// that is not remapped, we continue checking.
// Otherwise, we need to invalidate AA infomation.
const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
- if ((AI && MergedAllocas.count(AI)) || UseNewStackColoring) {
+ if (AI && MergedAllocas.count(AI)) {
MayHaveConflictingAAMD = true;
break;
}
@@ -1400,20 +1389,13 @@ void StackColoring::remapInstructions(DenseMap<int, int>& SlotRemap, int MergedS
// Rewrite MachineMemOperands that reference old frame indices.
for (auto E : enumerate(SSRefs))
if (!E.value().empty()) {
- if (UseNewStackColoring) {
- const PseudoSourceValue *NewSV =
- MF->getPSVManager().getFixedStack(MergedSlot);
- for (MachineMemOperand *Ref : E.value())
- Ref->setValue(NewSV);
- } else {
- const PseudoSourceValue *NewSV = MF->getPSVManager().getFixedStack(
- SlotRemap.find(E.index())->second);
- for (MachineMemOperand *Ref : E.value())
- Ref->setValue(NewSV);
- }
+ const PseudoSourceValue *NewSV =
+ MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
+ for (MachineMemOperand *Ref : E.value())
+ Ref->setValue(NewSV);
}
- // Update the location of C++ catch objects for the MSVC personality routine.
+ // Update the location of C++ catch objects for the MSVC personality routine.
if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
for (WinEHHandlerType &H : TBME.HandlerArray)
@@ -1933,7 +1915,7 @@ bool StackColoring::run(MachineFunction &Func) {
// indices to use the remapped frame index.
if (!SlotRemap.empty()) {
expungeSlotMap(SlotRemap, NumSlots);
- remapInstructions(SlotRemap, InvalidIdx);
+ remapInstructions(SlotRemap);
}
} else {
// Maybe this entire logic should be moved to a generic StackLayouter that
>From df2fb92fe3f6e79f0fd604001b88ddff5be563d8 Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 12 Jun 2025 00:27:36 +0200
Subject: [PATCH 18/19] format StackColoring.cpp
---
llvm/lib/CodeGen/StackColoring.cpp | 126 +++++++++++++++--------------
1 file changed, 64 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 9fdf5c426201b..9319401424f3f 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -49,8 +49,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <limits>
@@ -64,10 +64,9 @@ using namespace llvm;
DEBUG_COUNTER(ProcessSlot, DEBUG_TYPE "-slot",
"Controls which slot get processed");
-static cl::opt<bool>
-DisableColoring("no-stack-coloring",
- cl::init(false), cl::Hidden,
- cl::desc("Disable stack coloring"));
+static cl::opt<bool> DisableColoring("no-stack-coloring", cl::init(false),
+ cl::Hidden,
+ cl::desc("Disable stack coloring"));
/// The user may write code that uses allocas outside of the declared lifetime
/// zone. This can happen when the user returns a reference to a local
@@ -75,31 +74,31 @@ DisableColoring("no-stack-coloring",
/// code. If this flag is enabled, we try to save the user. This option
/// is treated as overriding LifetimeStartOnFirstUse below.
static cl::opt<bool>
-ProtectFromEscapedAllocas("protect-from-escaped-allocas",
- cl::init(false), cl::Hidden,
- cl::desc("Do not optimize lifetime zones that "
- "are broken"));
+ ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false),
+ cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
/// Enable enhanced dataflow scheme for lifetime analysis (treat first
/// use of stack slot as start of slot lifetime, as opposed to looking
/// for LIFETIME_START marker). See "Implementation notes" below for
/// more info.
static cl::opt<bool>
-LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
- cl::init(true), cl::Hidden,
- cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
+ LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
+ cl::init(true), cl::Hidden,
+ cl::desc("Treat stack lifetimes as starting on "
+ "first use, not on START marker."));
static cl::opt<bool> UseNewStackColoring(
"new-stack-coloring", cl::init(false), cl::Hidden,
cl::desc("Use a better logic to try to reduce stack usage"));
static cl::opt<unsigned> MaxCandidatesOpt(
- "stackcoloring-max-candidates", cl::init(0),
- cl::Hidden,
+ "stackcoloring-max-candidates", cl::init(0), cl::Hidden,
cl::desc(
"Max number of candidates that will be evaluated, 0 means no limit"));
-STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
STATISTIC(GeneratedWorse, "Number of times worse layout were generated");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
@@ -392,7 +391,7 @@ class StackColoring {
MachineFrameInfo *MFI = nullptr;
MachineFunction *MF = nullptr;
- LiveStacks* LS = nullptr;
+ LiveStacks *LS = nullptr;
struct SlotInfo {
// All places in the current function where this Slot is live
@@ -420,7 +419,7 @@ class StackColoring {
[&](int Idx) { return Liveness[Idx]; });
}
- LLVM_DUMP_METHOD void dump(const StackColoring* State = nullptr) const;
+ LLVM_DUMP_METHOD void dump(const StackColoring *State = nullptr) const;
};
/// A class representing liveness information for a single basic block.
@@ -465,7 +464,7 @@ class StackColoring {
/// The list of lifetime markers found. These markers are to be removed
/// once the coloring is done.
- SmallVector<MachineInstr*, 8> Markers;
+ SmallVector<MachineInstr *, 8> Markers;
/// Record the FI slots for which we have seen some sort of
/// lifetime marker (either start or end).
@@ -479,7 +478,8 @@ class StackColoring {
unsigned NumIterations;
public:
- StackColoring(SlotIndexes *Indexes, LiveStacks* LS) : LS(LS), Indexes(Indexes) {}
+ StackColoring(SlotIndexes *Indexes, LiveStacks *LS)
+ : LS(LS), Indexes(Indexes) {}
bool run(MachineFunction &Func);
private:
@@ -506,7 +506,8 @@ class StackColoring {
unsigned doMerging(unsigned NumSlots);
/// Returns TRUE if we're using the first-use-begins-lifetime method for
- /// this slot (if FALSE, then the start marker is treated as start of lifetime).
+ /// this slot (if FALSE, then the start marker is treated as start of
+ /// lifetime).
bool applyFirstUse(int Slot) {
if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
return false;
@@ -520,8 +521,7 @@ class StackColoring {
/// starting or ending are added to the vector "slots" and "isStart" is set
/// accordingly.
/// \returns True if inst contains a lifetime start or end
- bool isLifetimeStartOrEnd(const MachineInstr &MI,
- SmallVector<int, 4> &slots,
+ bool isLifetimeStartOrEnd(const MachineInstr &MI, SmallVector<int, 4> &slots,
bool &isStart);
/// Construct the LiveIntervals for the slots.
@@ -623,8 +623,8 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
LLVM_DUMP_METHOD void StackColoring::dump() const {
for (MachineBasicBlock *MBB : depth_first(MF)) {
- dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
- << MBB->getName() << "]\n";
+ dbgs() << "Inspecting block #" << MBB->getNumber() << " [" << MBB->getName()
+ << "]\n";
dumpBB(MBB);
}
}
@@ -642,7 +642,8 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
}
}
-LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State) const {
+LLVM_DUMP_METHOD void
+StackColoring::SlotInfo::dump(const StackColoring *State) const {
unsigned Slot = InvalidIdx;
if (State) {
Slot = this - State->Slot2Info.data();
@@ -654,7 +655,8 @@ LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State)
dbgs() << " offset=" << Offset;
if (State) {
if (State->MFI->getObjectAllocation(Slot))
- dbgs() << " \"" << State->MFI->getObjectAllocation(Slot)->getName() << "\"";
+ dbgs() << " \"" << State->MFI->getObjectAllocation(Slot)->getName()
+ << "\"";
if (State->MFI->isSpillSlotObjectIndex(Slot))
dbgs() << " spill";
}
@@ -673,8 +675,7 @@ LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump(const StackColoring* State)
#endif
-static inline int getStartOrEndSlot(const MachineInstr &MI)
-{
+static inline int getStartOrEndSlot(const MachineInstr &MI) {
assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
"Expected LIFETIME_START or LIFETIME_END op");
@@ -715,7 +716,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
if (!MO.isFI())
continue;
int Slot = MO.getIndex();
- if (Slot<0)
+ if (Slot < 0)
continue;
if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
slots.push_back(Slot);
@@ -802,7 +803,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
int Slot = MO.getIndex();
if (Slot < 0)
continue;
- if (! BetweenStartEnd.test(Slot)) {
+ if (!BetweenStartEnd.test(Slot)) {
ConservativeSlots.set(Slot);
}
}
@@ -954,7 +955,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
DefinitelyInUse.resize(NumSlots);
struct SplitSlotChanges {
- const MachineInstr* AtMI;
+ const MachineInstr *AtMI;
unsigned BlockIdx : 31;
unsigned IsStart : 1;
unsigned Slot;
@@ -995,7 +996,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
BitVector IsStoredTo;
IsStoredTo.resize(NumSlots, false);
struct MIBlockIdx {
- const MachineInstr* MI;
+ const MachineInstr *MI;
unsigned BlockIdx;
};
unsigned BlockIdx = 0;
@@ -1004,7 +1005,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
for (const MachineInstr &MI : MBB) {
if (MI.isDebugInstr())
continue;
- for (MachineMemOperand* MMO : MI.memoperands()) {
+ for (MachineMemOperand *MMO : MI.memoperands()) {
auto *PSV = dyn_cast_if_present<FixedStackPseudoSourceValue>(
MMO->getPseudoValue());
if (!PSV)
@@ -1207,10 +1208,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// Keep a list of *allocas* which need to be remapped.
- DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+ DenseMap<const AllocaInst *, const AllocaInst *> Allocas;
// Keep a list of allocas which has been affected by the remap.
- SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
+ SmallPtrSet<const AllocaInst *, 32> MergedAllocas;
for (const std::pair<int, int> &SI : SlotRemap) {
const AllocaInst *From = MFI->getObjectAllocation(SI.first);
@@ -1244,8 +1245,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
// does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
// that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
- MachineFrameInfo::SSPLayoutKind FromKind
- = MFI->getObjectSSPLayout(SI.first);
+ MachineFrameInfo::SSPLayoutKind FromKind =
+ MFI->getObjectSSPLayout(SI.first);
MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second);
if (FromKind != MachineFrameInfo::SSPLK_None &&
(ToKind == MachineFrameInfo::SSPLK_None ||
@@ -1303,20 +1304,20 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
int FromSlot = MO.getIndex();
// Don't touch arguments.
- if (FromSlot<0)
+ if (FromSlot < 0)
continue;
// Only look at mapped slots.
if (!SlotRemap.count(FromSlot))
continue;
- // In a debug build, check that the instruction that we are modifying is
- // inside the expected live range. If the instruction is not inside
- // the calculated range then it means that the alloca usage moved
- // outside of the lifetime markers, or that the user has a bug.
- // NOTE: Alloca address calculations which happen outside the lifetime
- // zone are okay, despite the fact that we don't have a good way
- // for validating all of the usages of the calculation.
+ // In a debug build, check that the instruction that we are modifying is
+ // inside the expected live range. If the instruction is not inside
+ // the calculated range then it means that the alloca usage moved
+ // outside of the lifetime markers, or that the user has a bug.
+ // NOTE: Alloca address calculations which happen outside the lifetime
+ // zone are okay, despite the fact that we don't have a good way
+ // for validating all of the usages of the calculation.
#ifndef NDEBUG
bool TouchesMemory = I.mayLoadOrStore();
// If we *don't* protect the user from escaped allocas, don't bother
@@ -1395,7 +1396,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
Ref->setValue(NewSV);
}
- // Update the location of C++ catch objects for the MSVC personality routine.
+ // Update the location of C++ catch objects for the MSVC personality routine.
if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
for (WinEHHandlerType &H : TBME.HandlerArray)
@@ -1407,9 +1408,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
- (void) FixedMemOp;
- (void) FixedDbg;
- (void) FixedInstr;
+ (void)FixedMemOp;
+ (void)FixedDbg;
+ (void)FixedInstr;
}
void StackColoring::removeInvalidSlotRanges() {
@@ -1435,7 +1436,7 @@ void StackColoring::removeInvalidSlotRanges() {
int Slot = MO.getIndex();
- if (Slot<0)
+ if (Slot < 0)
continue;
if (Intervals[Slot]->empty())
@@ -1457,7 +1458,7 @@ void StackColoring::removeInvalidSlotRanges() {
void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
unsigned NumSlots) {
// Expunge slot remap map.
- for (unsigned i=0; i < NumSlots; ++i) {
+ for (unsigned i = 0; i < NumSlots; ++i) {
// If we are remapping i
if (auto It = SlotRemap.find(i); It != SlotRemap.end()) {
int Target = It->second;
@@ -1477,8 +1478,9 @@ bool StackColoringLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- LiveStacks* LS = nullptr;
- LiveStacksWrapperLegacy* LSWL = getAnalysisIfAvailable<LiveStacksWrapperLegacy>();
+ LiveStacks *LS = nullptr;
+ LiveStacksWrapperLegacy *LSWL =
+ getAnalysisIfAvailable<LiveStacksWrapperLegacy>();
if (LSWL)
LS = &LSWL->getLS();
@@ -1502,7 +1504,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
int64_t OrigOptSize = 0;
int64_t OrigPesSize = 0;
for (unsigned Slot = 0; Slot < NumSlots; Slot++) {
- SlotInfo& Info = Slot2Info[Slot];
+ SlotInfo &Info = Slot2Info[Slot];
if (Info.StartLiveness.empty())
assert(!LS || !LS->hasInterval(Slot));
if (!Info.StartLiveness.empty() &&
@@ -1553,8 +1555,8 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
});
}
- SlotInfo* LastQueryLhs = nullptr;
- SlotInfo* LastQueryRhs = nullptr;
+ SlotInfo *LastQueryLhs = nullptr;
+ SlotInfo *LastQueryRhs = nullptr;
bool LastQueryRes = false;
// Maybe there should be real caching here
auto HasOverlapCached = [&](SlotInfo &Lhs, SlotInfo &Rhs) {
@@ -1582,7 +1584,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
LatestStatus.resize(LivenessSize, Status{});
SmallVector<Status> OlderStatus;
- auto FindStatus = [&](SlotInfo &Info, unsigned Pt) -> Status& {
+ auto FindStatus = [&](SlotInfo &Info, unsigned Pt) -> Status & {
Status *Last = &LatestStatus[Pt];
// The slots in the linked-list are always kept in ascending order, so the
@@ -1597,7 +1599,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
return *Last;
};
auto UpdateStatus = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) {
- Status* Last = &LatestStatus[Pt];
+ Status *Last = &LatestStatus[Pt];
unsigned Idx = OlderStatus.size();
OlderStatus.push_back(*Last);
@@ -1612,7 +1614,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
}
// Ensure ordering of slots
- Status* Inserted = &OlderStatus.back();
+ Status *Inserted = &OlderStatus.back();
Inserted->Offset = Offset;
Inserted->Slot = &Info - Slot2Info.data();
Status *Curr = Last;
@@ -1719,7 +1721,7 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
}
unsigned FinalSize = 0;
- for (Status& U : LatestStatus)
+ for (Status &U : LatestStatus)
FinalSize = std::max(FinalSize, U.Offset);
LLVM_DEBUG(dbgs() << "MergedSize=" << FinalSize << " OrigPesSize="
<< OrigPesSize << " OrigOptSize" << OrigOptSize << "\n");
@@ -1778,7 +1780,7 @@ bool StackColoring::run(MachineFunction &Func) {
LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
<< " slots\n");
- for (int i=0; i < MFI->getObjectIndexEnd(); ++i)
+ for (int i = 0; i < MFI->getObjectIndexEnd(); ++i)
TotalSize += MFI->getObjectSize(i);
LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
@@ -1791,7 +1793,7 @@ bool StackColoring::run(MachineFunction &Func) {
}
Slot2Info.resize(NumSlots);
- for (unsigned i=0; i < NumSlots; ++i) {
+ for (unsigned i = 0; i < NumSlots; ++i) {
std::unique_ptr<LiveRange> LI(new LiveRange());
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
Intervals.push_back(std::move(LI));
>From f7ae304acbe9adcfe84115b65d5992f8b52c0cab Mon Sep 17 00:00:00 2001
From: tyker <tyker1 at outlook.com>
Date: Thu, 19 Jun 2025 20:06:44 +0200
Subject: [PATCH 19/19] Update selection heristics to avoid code-size
regression in average
---
llvm/include/llvm/CodeGen/MachineFrameInfo.h | 2 +-
llvm/lib/CodeGen/StackColoring.cpp | 84 ++++++++++----------
2 files changed, 41 insertions(+), 45 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index fdb2fbd133397..5c05b792cd1e0 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -772,7 +772,7 @@ class MachineFrameInfo {
// If ID == 0, MaxAlignment will need to be updated separately.
}
- int getUnderlyingSlot(int ObjectIdx) {
+ int getUnderlyingSlot(int ObjectIdx) const {
assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
return Objects[ObjectIdx + NumFixedObjects].UnderlyingSlot;
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 9319401424f3f..798eef9354256 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
@@ -99,7 +100,6 @@ static cl::opt<unsigned> MaxCandidatesOpt(
"Max number of candidates that will be evaluated, 0 means no limit"));
STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
-STATISTIC(GeneratedWorse, "Number of times worse layout were generated");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
@@ -400,7 +400,9 @@ class StackColoring {
// Use to make overlap queries faster
SmallVector<unsigned, 4> StartLiveness;
- uint64_t SlotPriority = 0;
+ int64_t SlotPriority = 0;
+
+ unsigned UseCount = 0;
unsigned Offset = InvalidIdx;
@@ -653,9 +655,11 @@ StackColoring::SlotInfo::dump(const StackColoring *State) const {
dbgs() << ":";
if (Offset != InvalidIdx)
dbgs() << " offset=" << Offset;
+ dbgs() << " uses=" << UseCount;
+ dbgs() << " prio=" << SlotPriority;
if (State) {
if (State->MFI->getObjectAllocation(Slot))
- dbgs() << " \"" << State->MFI->getObjectAllocation(Slot)->getName()
+ dbgs() << " alloca=\"" << State->MFI->getObjectAllocation(Slot)->getName()
<< "\"";
if (State->MFI->isSpillSlotObjectIndex(Slot))
dbgs() << " spill";
@@ -803,6 +807,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
int Slot = MO.getIndex();
if (Slot < 0)
continue;
+ Slot2Info[Slot].UseCount++;
if (!BetweenStartEnd.test(Slot)) {
ConservativeSlots.set(Slot);
}
@@ -1525,35 +1530,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
if (SlotStack.size() <= 1)
return InvalidIdx;
- // This Whole block is only used to try and order the stack, such that the
- // Slots are processed in an order that helps getting good packing
- {
- // Find how much usage of every livepoint there is.
- SmallVector<unsigned> CumulatedUsage;
- CumulatedUsage.resize(LivenessSize, 0);
-
- for (unsigned Idx = 0; Idx < SlotStack.size(); Idx++) {
- SlotInfo &Info = Slot2Info[SlotStack[Idx]];
- for (unsigned Pt : Info.Liveness.set_bits()) {
- CumulatedUsage[Pt] += Info.Size;
- }
- }
+ // This logic is optimized for x86_64, it probably needs to be adapted to
+ // other targets to get good code-size/stack-size balance.
+ // Its inspired from X86FrameLowering::orderFrameObjects, but modified weight
+ // in alignments helping with stack size
+ auto IsLower = [&](unsigned Lhs, unsigned Rhs) {
+ SlotInfo &L = Slot2Info[Lhs];
+ SlotInfo &R = Slot2Info[Rhs];
+ uint64_t DensityLScaled = static_cast<uint64_t>(L.UseCount) *
+ static_cast<uint64_t>(R.Size + Log2(R.Align));
+ uint64_t DensityRScaled = static_cast<uint64_t>(R.UseCount) *
+ static_cast<uint64_t>(L.Size + Log2(L.Align));
+ return DensityLScaled < DensityRScaled;
+ };
+ std::stable_sort(SlotStack.begin(), SlotStack.end(), IsLower);
- for (unsigned Idx = 0; Idx < SlotStack.size(); Idx++) {
- SlotInfo &Info = Slot2Info[SlotStack[Idx]];
- for (unsigned Pt : Info.Liveness.set_bits()) {
- // Since the goal is to minimize the max usage, blocks that are in high
- // contention areas are given more priority
- Info.SlotPriority +=
- (uint64_t)CumulatedUsage[Pt] * (uint64_t)CumulatedUsage[Pt] +
- (uint64_t)Info.Size * (uint64_t)Info.Align.value();
- }
- }
- std::stable_sort(
- SlotStack.begin(), SlotStack.end(), [&](unsigned Lhs, unsigned Rhs) {
- return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority;
- });
- }
+ int Prio = 0;
+ for (int Slot : SlotStack)
+ Slot2Info[Slot].SlotPriority = Prio++;
SlotInfo *LastQueryLhs = nullptr;
SlotInfo *LastQueryRhs = nullptr;
@@ -1666,24 +1660,27 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
Offset = alignTo(Offset, Info.Align);
- LLVM_DEBUG(dbgs() << "fi#" << Candidates[K] << "@" << Offset << "->";
- if (PrevSlot == InvalidIdx) dbgs() << "bottom";
- else dbgs() << "fi#" << PrevSlot; dbgs() << ", ";);
+ LLVM_DEBUG({
+ dbgs() << "fi#" << Candidates[K] << "@" << Offset;
+ if (PrevSlot != InvalidIdx)
+ dbgs() << "->" << "fi#" << PrevSlot;
+ dbgs() << ", ";
+ });
bool IsBetter = [&] {
+ if (BestIdx == InvalidIdx)
+ return true;
+ SlotInfo &Best = Slot2Info[Candidates[BestIdx]];
if (BestOffset != Offset)
return BestOffset > Offset;
- SlotInfo &Other = Slot2Info[Candidates[K]];
- if (Other.Align != Info.Align)
- return Other.Align < Info.Align;
- if (Other.Size != Info.Size)
- return Other.Size < Info.Size;
- if (Other.SlotPriority != Info.SlotPriority)
- return Other.SlotPriority < Info.SlotPriority;
+ if (Best.SlotPriority != Info.SlotPriority)
+ return Best.SlotPriority < Info.SlotPriority;
+ if (Best.Align != Info.Align)
+ return Best.Align < Info.Align;
// Both are always stored in Slot2Info, so this is equivalent to
// FrameIndex comparaison
- return &Other < &Info;
+ return &Best < &Info;
}();
if (IsBetter) {
@@ -1726,7 +1723,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
LLVM_DEBUG(dbgs() << "MergedSize=" << FinalSize << " OrigPesSize="
<< OrigPesSize << " OrigOptSize" << OrigOptSize << "\n");
if (FinalSize >= OrigPesSize) {
- GeneratedWorse++;
return InvalidIdx;
}
@@ -1774,6 +1770,7 @@ bool StackColoring::run(MachineFunction &Func) {
Intervals.reserve(NumSlots);
LiveStarts.resize(NumSlots);
+ Slot2Info.resize(NumSlots);
unsigned NumMarkers = collectMarkers(NumSlots);
unsigned TotalSize = 0;
@@ -1792,7 +1789,6 @@ bool StackColoring::run(MachineFunction &Func) {
return removeAllMarkers();
}
- Slot2Info.resize(NumSlots);
for (unsigned i = 0; i < NumSlots; ++i) {
std::unique_ptr<LiveRange> LI(new LiveRange());
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
More information about the llvm-commits
mailing list