[llvm] [CodeGen] Speed up ReachingDefAnalysis (NFC) (PR #100913)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 27 20:52:59 PDT 2024
https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/100913
This patch speeds up ReachingDefAnalysis without changing the
functionality.
Without this patch, ReachingDefAnalysis maintains a three-dimensional
vector to keep track of reaching definitions (i.e. instructions) for
each basic block and each reg unit. Quite literally:
MBBReachingDefs[MBBNumber][Unit]
is a vector of definitions, where the innermost vector is implemented
with TinyPtrVector. The problem is that for many pairs of MBBNumber
and Unit, we have more than one definition, causing TinyPtrVector to
allocate SmallVector, accounting for 1.91% of heap allocations during
the compilation of X86ISelLowering.cpp.ll, a .ll version of
X86ISelLowering.cpp.
This patch replaces the three-dimensional vector with a custom
multimap implemented on a single instance of
std::vector<std::pair<int, int>>. The vector contains a collection of
singly-linked lists of reaching definitions for all possible pairs of
MBBNumber and Unit.
With this patch, the instruction count and cycle count go down by
0.24% and 0.82%, respectively, for the compilation of
X86ISelLowering.cpp.ll on a stabilized x86 machine (the frequency
govenor set to performance and hyperthreading disabled). The number
of heap allocations goes down by 2.20%.
>From c436460adf1fb278450faa8f435149176784a3a8 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Fri, 19 Jul 2024 20:16:27 -0700
Subject: [PATCH] [CodeGen] Speed up ReachingDefAnalysis (NFC)
This patch speeds up ReachingDefAnalysis without changing the
functionality.
Without this patch, ReachingDefAnalysis maintains a three-dimensional
vector to keep track of reaching definitions (i.e. instructions) for
each basic block and each reg unit. Quite literally:
MBBReachingDefs[MBBNumber][Unit]
is a vector of definitions, where the innermost vector is implemented
with TinyPtrVector. The problem is that for many pairs of MBBNumber
and Unit, we have more than one definition, causing TinyPtrVector to
allocate SmallVector, accounting for 1.91% of heap allocations during
the compilation of X86ISelLowering.cpp.ll, a .ll version of
X86ISelLowering.cpp.
This patch replaces the three-dimensional vector with a custom
multimap implemented on a single instance of
std::vector<std::pair<int, int>>. The vector contains a collection of
singly-linked lists of reaching definitions for all possible pairs of
MBBNumber and Unit.
With this patch, the instruction count and cycle count go down by
0.24% and 0.82%, respectively, for the compilation of
X86ISelLowering.cpp.ll on a stabilized x86 machine (the frequency
govenor set to performance and hyperthreading disabled). The number
of heap allocations goes down by 2.20%.
---
.../llvm/CodeGen/ReachingDefAnalysis.h | 172 +++++++++++++++---
llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 42 ++---
2 files changed, 163 insertions(+), 51 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
index ec652f448f0f6..7aac0c8cd9e31 100644
--- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -23,7 +23,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CodeGen/LoopTraversal.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
@@ -33,35 +32,156 @@ namespace llvm {
class MachineBasicBlock;
class MachineInstr;
-/// Thin wrapper around "int" used to store reaching definitions,
-/// using an encoding that makes it compatible with TinyPtrVector.
-/// The 0th LSB is forced zero (and will be used for pointer union tagging),
-/// The 1st LSB is forced one (to make sure the value is non-zero).
-class ReachingDef {
- uintptr_t Encoded;
- friend struct PointerLikeTypeTraits<ReachingDef>;
- explicit ReachingDef(uintptr_t Encoded) : Encoded(Encoded) {}
-
+// An implementation of multimap from (MBBNumber, Unit) to reaching definitions.
+//
+// This implementation only supports modification operations just enough
+// to serve our needs:
+//
+// - addDef
+// - prependDef
+// - replaceFront
+//
+// Internally, the multimap is implemented as a collection of singly linked
+// lists represented on top of a single array. Each singly-linked list
+// contains reaching definitions for a given pair of MBBNumber and Unit.
+//
+// This design has the following highlights:
+//
+// - Unlike SparseMultiset or other maps, we do not store keys as part of values
+// or anywhere else in the data structure.
+//
+// - The single array design minimizes malloc traffic.
+//
+// - Reaching definitions share one array. This means that if one pair of
+// (MBBNumber, Unit) has multiple reaching definitions while another pair of
+// (MBBNumber, Unit) has none, they cancel each other to some extent.
+class MBBReachingDefsInfo {
public:
- ReachingDef(std::nullptr_t) : Encoded(0) {}
- ReachingDef(int Instr) : Encoded(((uintptr_t) Instr << 2) | 2) {}
- operator int() const { return ((int) Encoded) >> 2; }
-};
+ MBBReachingDefsInfo() = default;
+ MBBReachingDefsInfo(const MBBReachingDefsInfo &) = delete;
+ MBBReachingDefsInfo &operator=(const MBBReachingDefsInfo &) = delete;
+
+ // Initialize the multimap with the number of basic blocks and the number of
+ // register units.
+ void init(unsigned BBs, unsigned Regs) {
+ assert(NumBlockIDs == 0 && "can initialize only once");
+ assert(NumRegUnits == 0 && "can initialize only once");
+ assert(Storage.empty() && "can initialize only once");
+ NumBlockIDs = BBs;
+ NumRegUnits = Regs;
+ unsigned NumIndexes = NumBlockIDs * NumRegUnits;
+ // Reserve space for reaching definitions. Note that the first NumIndexes
+ // elements are used for indexes to various chains. The second half
+ // accommodates up to one reaching def per (MBBNumber, Unit) pair on
+ // average.
+ Storage.reserve(NumIndexes * 2);
+ Storage.assign(NumIndexes, std::make_pair(0, 0));
+ }
-template<>
-struct PointerLikeTypeTraits<ReachingDef> {
- static constexpr int NumLowBitsAvailable = 1;
+ // Clear the entire data structure.
+ void clear() {
+ NumBlockIDs = 0;
+ NumRegUnits = 0;
+ Storage.clear();
+ }
+
+ // Add a reaching definition Def to the end of the singly-linked list of
+ // definitions for (MBBNumber, Unit).
+ void addDef(unsigned MBBNumber, unsigned Unit, int Def) {
+ unsigned Key = computeKey(MBBNumber, Unit);
+ unsigned NewIndex = Storage.size();
+ Storage.emplace_back(Def, 0);
+ if (Storage[Key].first == 0) {
+ // Update the index of the first element.
+ Storage[Key].first = NewIndex;
+ // Update the index of the last element.
+ Storage[Key].second = NewIndex;
+ } else {
+ unsigned OldLastPos = Storage[Key].second;
+ // The old last element now points to the new element.
+ Storage[OldLastPos].second = NewIndex;
+ // Update the index of the last element.
+ Storage[Key].second = NewIndex;
+ }
+ }
+
+ // Add a reaching definition Def to the beginning of the singly-linked list of
+ // definitions for (MBBNumber, Unit).
+ void prependDef(unsigned MBBNumber, unsigned Unit, int Def) {
+ unsigned Key = computeKey(MBBNumber, Unit);
+ unsigned NewIndex = Storage.size();
+ Storage.emplace_back(Def, 0);
+ if (Storage[Key].first == 0) {
+ // Update the index of the first element.
+ Storage[Key].first = NewIndex;
+ // Update the index of the last element.
+ Storage[Key].second = NewIndex;
+ } else {
+ // The new element now points to the old first element.
+ Storage[NewIndex].second = Storage[Key].first;
+ // Update the index of the first element.
+ Storage[Key].first = NewIndex;
+ }
+ }
- static inline void *getAsVoidPointer(const ReachingDef &RD) {
- return reinterpret_cast<void *>(RD.Encoded);
+ // Replace the definition at the beginning of the singly-linked list of
+ // definitions for (MBBNumber, Unit).
+ void replaceFront(unsigned MBBNumber, unsigned Unit, int Def) {
+ unsigned Key = computeKey(MBBNumber, Unit);
+ assert(Storage[Key].first != 0);
+ assert(Storage[Key].second != 0);
+ unsigned FirstPos = Storage[Key].first;
+ Storage[FirstPos].first = Def;
}
- static inline ReachingDef getFromVoidPointer(void *P) {
- return ReachingDef(reinterpret_cast<uintptr_t>(P));
+ class def_iterator {
+ ArrayRef<std::pair<int, int>> Storage;
+ unsigned Pos;
+
+ public:
+ def_iterator(ArrayRef<std::pair<int, int>> Storage, unsigned Pos)
+ : Storage(Storage), Pos(Pos) {}
+ int operator*() { return Storage[Pos].first; }
+ void operator++() { Pos = Storage[Pos].second; }
+ bool operator==(const def_iterator &RHS) const {
+ return Storage == RHS.Storage && Pos == RHS.Pos;
+ }
+ bool operator!=(const def_iterator &RHS) const { return !operator==(RHS); }
+ };
+
+ def_iterator def_begin(unsigned MBBNumber, unsigned Unit) const {
+ unsigned Key = computeKey(MBBNumber, Unit);
+ return {Storage, static_cast<unsigned>(Storage[Key].first)};
}
+ def_iterator def_end() const { return {Storage, 0}; }
+ iterator_range<def_iterator> defs(unsigned MBBNumber, unsigned Unit) const {
+ return llvm::make_range(def_begin(MBBNumber, Unit), def_end());
+ }
+
+private:
+ // The number of reg units.
+ unsigned NumRegUnits = 0;
- static inline ReachingDef getFromVoidPointer(const void *P) {
- return ReachingDef(reinterpret_cast<uintptr_t>(P));
+ // The number of basic blocks.
+ unsigned NumBlockIDs = 0;
+
+ // The storage for definitions and various indexes. The array has two parts:
+ //
+ // The first NumBlockIDs * NumRegUnits elements represent array indexes to
+ // reaching definitions for all possible pairs of MBBNumber and Unit. Each
+ // pair represents the first and last index of a corresponding chain. If the
+ // chain is empty, both values are zero.
+ //
+ // The subsequent elements represent reaching definitions and indexes to their
+ // next elements. In each pair, the first is the reaching def, and the second
+ // is the index to the next element. The index is zero for the last element
+ // of the chain.
+ std::vector<std::pair<int, int>> Storage;
+
+ unsigned computeKey(unsigned MBBNumber, unsigned Unit) const {
+ assert(MBBNumber < NumBlockIDs);
+ assert(Unit < NumRegUnits);
+ return MBBNumber * NumRegUnits + Unit;
}
};
@@ -72,6 +192,7 @@ class ReachingDefAnalysis : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
LoopTraversal::TraversalOrder TraversedMBBOrder;
unsigned NumRegUnits = 0;
+ unsigned NumBlockIDs = 0;
/// Instruction that defined each register, relative to the beginning of the
/// current basic block. When a LiveRegsDefInfo is used to represent a
/// live-out register, this value is relative to the end of the basic block,
@@ -93,12 +214,7 @@ class ReachingDefAnalysis : public MachineFunctionPass {
/// their basic blocks.
DenseMap<MachineInstr *, int> InstIds;
- /// All reaching defs of a given RegUnit for a given MBB.
- using MBBRegUnitDefs = TinyPtrVector<ReachingDef>;
- /// All reaching defs of all reg units for a given MBB
- using MBBDefsInfo = std::vector<MBBRegUnitDefs>;
/// All reaching defs of all reg units for a all MBBs
- using MBBReachingDefsInfo = SmallVector<MBBDefsInfo, 4>;
MBBReachingDefsInfo MBBReachingDefs;
/// Default values are 'nothing happened a long time ago'.
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 07fa92889d885..b2cb4ed53387e 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -50,9 +50,7 @@ static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
unsigned MBBNumber = MBB->getNumber();
- assert(MBBNumber < MBBReachingDefs.size() &&
- "Unexpected basic block number.");
- MBBReachingDefs[MBBNumber].resize(NumRegUnits);
+ assert(MBBNumber < NumBlockIDs && "Unexpected basic block number.");
// Reset instruction counter in each basic block.
CurInstr = 0;
@@ -71,7 +69,7 @@ void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
// before the call.
if (LiveRegs[Unit] != -1) {
LiveRegs[Unit] = -1;
- MBBReachingDefs[MBBNumber][Unit].push_back(-1);
+ MBBReachingDefs.addDef(MBBNumber, Unit, -1);
}
}
}
@@ -97,7 +95,7 @@ void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
// Insert the most recent reaching definition we found.
for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit)
if (LiveRegs[Unit] != ReachingDefDefaultVal)
- MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]);
+ MBBReachingDefs.addDef(MBBNumber, Unit, LiveRegs[Unit]);
}
void ReachingDefAnalysis::leaveBasicBlock(MachineBasicBlock *MBB) {
@@ -122,8 +120,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
assert(!MI->isDebugInstr() && "Won't process debug instructions");
unsigned MBBNumber = MI->getParent()->getNumber();
- assert(MBBNumber < MBBReachingDefs.size() &&
- "Unexpected basic block number.");
+ assert(MBBNumber < NumBlockIDs && "Unexpected basic block number.");
for (auto &MO : MI->operands()) {
if (!isValidRegDef(MO))
@@ -136,7 +133,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
// How many instructions since this reg unit was last written?
if (LiveRegs[Unit] != CurInstr) {
LiveRegs[Unit] = CurInstr;
- MBBReachingDefs[MBBNumber][Unit].push_back(CurInstr);
+ MBBReachingDefs.addDef(MBBNumber, Unit, CurInstr);
}
}
}
@@ -146,8 +143,7 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
unsigned MBBNumber = MBB->getNumber();
- assert(MBBNumber < MBBReachingDefs.size() &&
- "Unexpected basic block number.");
+ assert(MBBNumber < NumBlockIDs && "Unexpected basic block number.");
// Count number of non-debug instructions for end of block adjustment.
auto NonDbgInsts =
@@ -169,16 +165,16 @@ void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
if (Def == ReachingDefDefaultVal)
continue;
- auto Start = MBBReachingDefs[MBBNumber][Unit].begin();
- if (Start != MBBReachingDefs[MBBNumber][Unit].end() && *Start < 0) {
- if (*Start >= Def)
+ auto Defs = MBBReachingDefs.defs(MBBNumber, Unit);
+ if (!Defs.empty() && *Defs.begin() < 0) {
+ if (*Defs.begin() >= Def)
continue;
// Update existing reaching def from predecessor to a more recent one.
- *Start = Def;
+ MBBReachingDefs.replaceFront(MBBNumber, Unit, Def);
} else {
// Insert new reaching def from predecessor.
- MBBReachingDefs[MBBNumber][Unit].insert(Start, Def);
+ MBBReachingDefs.prependDef(MBBNumber, Unit, Def);
}
// Update reaching def at end of BB. Keep in mind that these are
@@ -234,7 +230,8 @@ void ReachingDefAnalysis::reset() {
void ReachingDefAnalysis::init() {
NumRegUnits = TRI->getNumRegUnits();
- MBBReachingDefs.resize(MF->getNumBlockIDs());
+ NumBlockIDs = MF->getNumBlockIDs();
+ MBBReachingDefs.init(NumBlockIDs, NumRegUnits);
// Initialize the MBBOutRegsInfos
MBBOutRegsInfos.resize(MF->getNumBlockIDs());
LoopTraversal Traversal;
@@ -247,10 +244,10 @@ void ReachingDefAnalysis::traverse() {
processBasicBlock(TraversedMBB);
#ifndef NDEBUG
// Make sure reaching defs are sorted and unique.
- for (MBBDefsInfo &MBBDefs : MBBReachingDefs) {
- for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) {
+ for (unsigned MBBNumber = 0; MBBNumber != NumBlockIDs; ++MBBNumber) {
+ for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) {
int LastDef = ReachingDefDefaultVal;
- for (int Def : RegUnitDefs) {
+ for (int Def : MBBReachingDefs.defs(MBBNumber, Unit)) {
assert(Def > LastDef && "Defs must be sorted and unique");
LastDef = Def;
}
@@ -265,11 +262,10 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
int InstId = InstIds.lookup(MI);
int DefRes = ReachingDefDefaultVal;
unsigned MBBNumber = MI->getParent()->getNumber();
- assert(MBBNumber < MBBReachingDefs.size() &&
- "Unexpected basic block number.");
+ assert(MBBNumber < NumBlockIDs && "Unexpected basic block number.");
int LatestDef = ReachingDefDefaultVal;
for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
- for (int Def : MBBReachingDefs[MBBNumber][Unit]) {
+ for (int Def : MBBReachingDefs.defs(MBBNumber, Unit)) {
if (Def >= InstId)
break;
DefRes = Def;
@@ -299,7 +295,7 @@ bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
int InstId) const {
- assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() &&
+ assert(static_cast<size_t>(MBB->getNumber()) < NumBlockIDs &&
"Unexpected basic block number.");
assert(InstId < static_cast<int>(MBB->size()) &&
"Unexpected instruction id.");
More information about the llvm-commits
mailing list