[llvm] 9c31155 - TableGen: Optimize super-register class computation (#134865)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 12:00:11 PDT 2025
Author: Nicolai Hähnle
Date: 2025-04-10T12:00:08-07:00
New Revision: 9c31155ead6e68450b8c2de38f5bec8f5c5db810
URL: https://github.com/llvm/llvm-project/commit/9c31155ead6e68450b8c2de38f5bec8f5c5db810
DIFF: https://github.com/llvm/llvm-project/commit/9c31155ead6e68450b8c2de38f5bec8f5c5db810.diff
LOG: TableGen: Optimize super-register class computation (#134865)
Inferring super-register classes naively requires checking every
register class against every other register class and sub-register
index.
Each of those checks is itself a non-trivial operation on register sets.
Culling as many (RC, RC, SubIdx) triples as possible is important for
the running time of TableGen for architectures with complex sub-register
relations.
Use transitivity to cull many (RC, RC, SubIdx) triples. This
unfortunately requires us to complete the transitive closure of
super-register classes explicitly, but it still cuts down the running
time on AMDGPU substantially -- in some upcoming work in the
backend by more than half (in very rough measurements).
This changes the names of some of the inferred register classes, since
the order in which they are inferred changes. The names of the inferred
register classes become shorter, which reduces the size of the generated
files.
Replacing some uses of SmallPtrSet by DenseSet shaves off a few more
percent; there are hundreds of register classes in AMDGPU.
Tweaking the topological signature check to skip reigsters without
super-registers further helps skip register classes that have "pseudo"
registers in them whose sub- and super-register structure is trivial.
Added:
Modified:
llvm/utils/TableGen/Common/CodeGenRegisters.cpp
llvm/utils/TableGen/Common/CodeGenRegisters.h
llvm/utils/TableGen/RegisterBankEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
index 7105ced26be1c..3a6e828a99f2d 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
@@ -16,6 +16,8 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -765,7 +767,8 @@ static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
const Record *R)
: TheDef(R), Name(std::string(R->getName())),
- TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), TSFlags(0) {
+ RegsWithSuperRegsTopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1),
+ TSFlags(0) {
GeneratePressureSet = R->getValueAsBit("GeneratePressureSet");
std::vector<const Record *> TypeList = R->getValueAsListOfDefs("RegTypes");
if (TypeList.empty())
@@ -791,7 +794,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
const CodeGenRegister *Reg = RegBank.getReg((*Elements)[i]);
Members.push_back(Reg);
Artificial &= Reg->Artificial;
- TopoSigs.set(Reg->getTopoSig());
+ if (!Reg->getSuperRegs().empty())
+ RegsWithSuperRegsTopoSigs.set(Reg->getTopoSig());
}
sortAndUniqueRegisters(Members);
@@ -849,13 +853,14 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
StringRef Name, Key Props)
: Members(*Props.Members), TheDef(nullptr), Name(std::string(Name)),
- TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), RSI(Props.RSI),
- CopyCost(0), Allocatable(true), AllocationPriority(0),
+ RegsWithSuperRegsTopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1),
+ RSI(Props.RSI), CopyCost(0), Allocatable(true), AllocationPriority(0),
GlobalPriority(false), TSFlags(0) {
Artificial = true;
GeneratePressureSet = false;
for (const auto R : Members) {
- TopoSigs.set(R->getTopoSig());
+ if (!R->getSuperRegs().empty())
+ RegsWithSuperRegsTopoSigs.set(R->getTopoSig());
Artificial &= R->Artificial;
}
}
@@ -1173,6 +1178,28 @@ void CodeGenRegisterClass::buildRegUnitSet(
std::back_inserter(RegUnits));
}
+// Combine our super classes of the given sub-register index with all of their
+// super classes in turn.
+void CodeGenRegisterClass::extendSuperRegClasses(CodeGenSubRegIndex *SubIdx) {
+ auto It = SuperRegClasses.find(SubIdx);
+ if (It == SuperRegClasses.end())
+ return;
+
+ SmallVector<CodeGenRegisterClass *> MidRCs;
+ MidRCs.insert(MidRCs.end(), It->second.begin(), It->second.end());
+
+ for (CodeGenRegisterClass *MidRC : MidRCs) {
+ for (auto &Pair : MidRC->SuperRegClasses) {
+ CodeGenSubRegIndex *ComposedSubIdx = Pair.first->compose(SubIdx);
+ if (!ComposedSubIdx)
+ continue;
+
+ for (CodeGenRegisterClass *SuperRC : Pair.second)
+ addSuperRegClass(ComposedSubIdx, SuperRC);
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// CodeGenRegisterCategory
//===----------------------------------------------------------------------===//
@@ -1290,6 +1317,8 @@ CodeGenRegBank::CodeGenRegBank(const RecordKeeper &Records,
}
}
+ computeSubRegIndicesRPOT();
+
// Native register units are associated with a leaf register. They've all been
// discovered now.
NumNativeRegUnits = RegUnits.size();
@@ -1364,7 +1393,7 @@ void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) {
}
// Create a synthetic sub-class if it is missing.
-CodeGenRegisterClass *
+std::pair<CodeGenRegisterClass *, bool>
CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
const CodeGenRegister::Vec *Members,
StringRef Name) {
@@ -1372,12 +1401,12 @@ CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
CodeGenRegisterClass::Key K(Members, RC->RSI);
RCKeyMap::const_iterator FoundI = Key2RC.find(K);
if (FoundI != Key2RC.end())
- return FoundI->second;
+ return {FoundI->second, false};
// Sub-class doesn't exist, create a new one.
RegClasses.emplace_back(*this, Name, K);
addToMaps(&RegClasses.back());
- return &RegClasses.back();
+ return {&RegClasses.back(), true};
}
CodeGenRegisterClass *CodeGenRegBank::getRegClass(const Record *Def) const {
@@ -1694,6 +1723,81 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
namespace {
+// A directed graph on sub-register indices with a virtual source node that
+// has an arc to all other nodes, and an arc from A to B if sub-register index
+// B can be obtained by composing A with some other sub-register index.
+struct SubRegIndexCompositionGraph {
+ std::deque<CodeGenSubRegIndex> &SubRegIndices;
+ CodeGenSubRegIndex::CompMap EntryNode;
+
+ SubRegIndexCompositionGraph(std::deque<CodeGenSubRegIndex> &SubRegIndices)
+ : SubRegIndices(SubRegIndices) {
+ for (CodeGenSubRegIndex &Idx : SubRegIndices) {
+ EntryNode.try_emplace(&Idx, &Idx);
+ }
+ }
+};
+
+} // namespace
+
+template <> struct llvm::GraphTraits<SubRegIndexCompositionGraph> {
+ using NodeRef =
+ PointerUnion<CodeGenSubRegIndex *, const CodeGenSubRegIndex::CompMap *>;
+
+ // Using a reverse iterator causes sub-register indices to appear in their
+ // more natural order in RPOT.
+ using CompMapIt = CodeGenSubRegIndex::CompMap::const_reverse_iterator;
+ struct ChildIteratorType
+ : public iterator_adaptor_base<
+ ChildIteratorType, CompMapIt,
+ typename std::iterator_traits<CompMapIt>::iterator_category,
+ NodeRef> {
+ ChildIteratorType(CompMapIt I)
+ : ChildIteratorType::iterator_adaptor_base(I) {}
+
+ NodeRef operator*() const { return wrapped()->second; }
+ };
+
+ static NodeRef getEntryNode(const SubRegIndexCompositionGraph &G) {
+ return &G.EntryNode;
+ }
+
+ static const CodeGenSubRegIndex::CompMap *children(NodeRef N) {
+ if (auto *Idx = dyn_cast<CodeGenSubRegIndex *>(N))
+ return &Idx->getComposites();
+ return cast<const CodeGenSubRegIndex::CompMap *>(N);
+ }
+
+ static ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(children(N)->rbegin());
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType(children(N)->rend());
+ }
+
+ static auto nodes_begin(SubRegIndexCompositionGraph *G) {
+ return G->SubRegIndices.begin();
+ }
+ static auto nodes_end(SubRegIndexCompositionGraph *G) {
+ return G->SubRegIndices.end();
+ }
+
+ static unsigned size(SubRegIndexCompositionGraph *G) {
+ return G->SubRegIndices.size();
+ }
+};
+
+void CodeGenRegBank::computeSubRegIndicesRPOT() {
+ SubRegIndexCompositionGraph G(SubRegIndices);
+ ReversePostOrderTraversal<SubRegIndexCompositionGraph> RPOT(G);
+ for (const auto N : RPOT) {
+ if (auto *Idx = dyn_cast<CodeGenSubRegIndex *>(N))
+ SubRegIndicesRPOT.push_back(Idx);
+ }
+}
+
+namespace {
+
// UberRegSet is a helper class for computeRegUnitWeights. Each UberRegSet is
// the transitive closure of the union of overlapping register
// classes. Together, the UberRegSets form a partition of the registers. If we
@@ -2323,8 +2427,10 @@ void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
if (SubIdx.Artificial)
continue;
// This is a real subset. See if we have a matching class.
- CodeGenRegisterClass *SubRC = getOrCreateSubClass(
- RC, &I->second, RC->getName() + "_with_" + I->first->getName());
+ CodeGenRegisterClass *SubRC =
+ getOrCreateSubClass(RC, &I->second,
+ RC->getName() + "_with_" + I->first->getName())
+ .first;
RC->setSubClassWithSubReg(&SubIdx, SubRC);
}
}
@@ -2339,16 +2445,22 @@ void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
void CodeGenRegBank::inferMatchingSuperRegClass(
CodeGenRegisterClass *RC,
std::list<CodeGenRegisterClass>::iterator FirstSubRegRC) {
+ DenseSet<const CodeGenSubRegIndex *> ImpliedSubRegIndices;
std::vector<std::pair<const CodeGenRegister *, const CodeGenRegister *>>
SubToSuperRegs;
BitVector TopoSigs(getNumTopoSigs());
- // Iterate in SubRegIndex numerical order to visit synthetic indices last.
- for (auto &SubIdx : SubRegIndices) {
+ // Iterate subregister indices in topological order to visit larger indices
+ // first. This allows us to skip the smaller indices in many cases because
+ // their inferred super-register classes are implied.
+ for (auto *SubIdx : SubRegIndicesRPOT) {
// Skip indexes that aren't fully supported by RC's registers. This was
// computed by inferSubClassWithSubReg() above which should have been
// called first.
- if (RC->getSubClassWithSubReg(&SubIdx) != RC)
+ if (RC->getSubClassWithSubReg(SubIdx) != RC)
+ continue;
+
+ if (ImpliedSubRegIndices.count(SubIdx))
continue;
// Build list of (Sub, Super) pairs for this SubIdx, sorted by Sub. Note
@@ -2356,7 +2468,7 @@ void CodeGenRegBank::inferMatchingSuperRegClass(
SubToSuperRegs.clear();
TopoSigs.reset();
for (const auto Super : RC->getMembers()) {
- const CodeGenRegister *Sub = Super->getSubRegs().find(&SubIdx)->second;
+ const CodeGenRegister *Sub = Super->getSubRegs().find(SubIdx)->second;
assert(Sub && "Missing sub-register");
SubToSuperRegs.emplace_back(Sub, Super);
TopoSigs.set(Sub->getTopoSig());
@@ -2374,7 +2486,7 @@ void CodeGenRegBank::inferMatchingSuperRegClass(
if (SubRC.Artificial)
continue;
// Topological shortcut: SubRC members have the wrong shape.
- if (!TopoSigs.anyCommon(SubRC.getTopoSigs()))
+ if (!TopoSigs.anyCommon(SubRC.getRegsWithSuperRegsTopoSigs()))
continue;
// Compute the subset of RC that maps into SubRC with a single linear scan
// through SubToSuperRegs and the members of SubRC.
@@ -2395,15 +2507,54 @@ void CodeGenRegBank::inferMatchingSuperRegClass(
// RC injects completely into SubRC.
sortAndUniqueRegisters(SubSetVec);
if (SubSetVec.size() == RC->getMembers().size()) {
- SubRC.addSuperRegClass(&SubIdx, RC);
+ SubRC.addSuperRegClass(SubIdx, RC);
+
+ // We can skip checking subregister indices that can be composed from
+ // the current SubIdx.
+ //
+ // Proof sketch: Let SubRC' be another register class and SubSubIdx
+ // a subregister index that can be composed from SubIdx.
+ //
+ // Calling this function with SubRC in place of RC ensures the existence
+ // of a subclass X of SubRC with the registers that have subregisters in
+ // SubRC'.
+ //
+ // The set of registers in RC with SubSubIdx in SubRC' is equal to the
+ // set of registers in RC with SubIdx in X (because every register in
+ // RC has a corresponding subregister in SubRC), and so checking the
+ // pair (SubSubIdx, SubRC') is redundant with checking (SubIdx, X).
+ for (const auto &SubSubIdx : SubIdx->getComposites())
+ ImpliedSubRegIndices.insert(SubSubIdx.second);
+
continue;
}
// Only a subset of RC maps into SubRC. Make sure it is represented by a
// class.
- getOrCreateSubClass(RC, &SubSetVec,
- RC->getName() + "_with_" + SubIdx.getName() + "_in_" +
- SubRC.getName());
+ //
+ // The name of the inferred register class follows the template
+ // "<RC>_with_<SubIdx>_in_<SubRC>".
+ //
+ // When SubRC is already an inferred class, prefer a name of the form
+ // "<RC>_with_<CompositeSubIdx>_in_<SubSubRC>" over a chain of the form
+ // "<RC>_with_<SubIdx>_in_<OtherRc>_with_<SubSubIdx>_in_<SubSubRC>".
+ CodeGenSubRegIndex *CompositeSubIdx = SubIdx;
+ CodeGenRegisterClass *CompositeSubRC = &SubRC;
+ if (CodeGenSubRegIndex *SubSubIdx = SubRC.getInferredFromSubRegIdx()) {
+ auto It = SubIdx->getComposites().find(SubSubIdx);
+ if (It != SubIdx->getComposites().end()) {
+ CompositeSubIdx = It->second;
+ CompositeSubRC = SubRC.getInferredFromRC();
+ }
+ }
+
+ auto [SubSetRC, Inserted] = getOrCreateSubClass(
+ RC, &SubSetVec,
+ RC->getName() + "_with_" + CompositeSubIdx->getName() + "_in_" +
+ CompositeSubRC->getName());
+
+ if (Inserted)
+ SubSetRC->setInferredFrom(CompositeSubIdx, CompositeSubRC);
}
}
}
@@ -2438,7 +2589,7 @@ void CodeGenRegBank::computeInferredRegisterClasses() {
inferMatchingSuperRegClass(RC);
// New register classes are created while this loop is running, and we need
- // to visit all of them. I particular, inferMatchingSuperRegClass needs
+ // to visit all of them. In particular, inferMatchingSuperRegClass needs
// to match old super-register classes with sub-register classes created
// after inferMatchingSuperRegClass was called. At this point,
// inferMatchingSuperRegClass has checked SuperRC = [0..rci] with SubRC =
@@ -2451,6 +2602,17 @@ void CodeGenRegBank::computeInferredRegisterClasses() {
FirstNewRC = NextNewRC;
}
}
+
+ // Compute the transitive closure for super-register classes.
+ //
+ // By iterating over sub-register indices in topological order, we only ever
+ // add super-register classes for sub-register indices that have not already
+ // been visited. That allows computing the transitive closure in a single
+ // pass.
+ for (CodeGenSubRegIndex *SubIdx : SubRegIndicesRPOT) {
+ for (CodeGenRegisterClass &SubRC : RegClasses)
+ SubRC.extendSuperRegClasses(SubIdx);
+ }
}
/// getRegisterClassForRegister - Find the register class that contains the
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.h b/llvm/utils/TableGen/Common/CodeGenRegisters.h
index 5e2d1977545c1..f9a7904709830 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.h
@@ -21,7 +21,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringMap.h"
@@ -337,12 +336,18 @@ class CodeGenRegisterClass {
//
// R:SubRegIndex in this RC for all R in SuperRC.
//
- DenseMap<const CodeGenSubRegIndex *, SmallPtrSet<CodeGenRegisterClass *, 8>>
+ DenseMap<CodeGenSubRegIndex *, DenseSet<CodeGenRegisterClass *>>
SuperRegClasses;
- // Bit vector of TopoSigs for the registers in this class. This will be
- // very sparse on regular architectures.
- BitVector TopoSigs;
+ // Bit vector of TopoSigs for the registers with super registers in this
+ // class. This will be very sparse on regular architectures.
+ BitVector RegsWithSuperRegsTopoSigs;
+
+ // If the register class was inferred for getMatchingSuperRegClass, this
+ // holds the subregister index and subregister class for which the register
+ // class was created.
+ CodeGenSubRegIndex *InferredFromSubRegIdx = nullptr;
+ CodeGenRegisterClass *InferredFromRC = nullptr;
public:
unsigned EnumValue;
@@ -438,6 +443,8 @@ class CodeGenRegisterClass {
SuperRegClasses[SubIdx].insert(SuperRC);
}
+ void extendSuperRegClasses(CodeGenSubRegIndex *SubIdx);
+
// getSubClasses - Returns a constant BitVector of subclasses indexed by
// EnumValue.
// The SubClasses vector includes an entry for this class.
@@ -463,8 +470,11 @@ class CodeGenRegisterClass {
// getOrder(0).
const CodeGenRegister::Vec &getMembers() const { return Members; }
- // Get a bit vector of TopoSigs present in this register class.
- const BitVector &getTopoSigs() const { return TopoSigs; }
+ // Get a bit vector of TopoSigs of registers with super registers in this
+ // register class.
+ const BitVector &getRegsWithSuperRegsTopoSigs() const {
+ return RegsWithSuperRegsTopoSigs;
+ }
// Get a weight of this register class.
unsigned getWeight(const CodeGenRegBank &) const;
@@ -505,6 +515,20 @@ class CodeGenRegisterClass {
return TheDef->getValueAsInt("BaseClassOrder");
return {};
}
+
+ void setInferredFrom(CodeGenSubRegIndex *Idx, CodeGenRegisterClass *RC) {
+ assert(Idx && RC);
+ assert(!InferredFromSubRegIdx);
+
+ InferredFromSubRegIdx = Idx;
+ InferredFromRC = RC;
+ }
+
+ CodeGenSubRegIndex *getInferredFromSubRegIdx() const {
+ return InferredFromSubRegIdx;
+ }
+
+ CodeGenRegisterClass *getInferredFromRC() const { return InferredFromRC; }
};
// Register categories are used when we need to deterine the category a
@@ -587,6 +611,9 @@ class CodeGenRegBank {
std::deque<CodeGenSubRegIndex> SubRegIndices;
DenseMap<const Record *, CodeGenSubRegIndex *> Def2SubRegIdx;
+ // Subregister indices sorted topologically by composition.
+ std::vector<CodeGenSubRegIndex *> SubRegIndicesRPOT;
+
CodeGenSubRegIndex *createSubRegIndex(StringRef Name, StringRef NameSpace);
typedef std::map<SmallVector<CodeGenSubRegIndex *, 8>, CodeGenSubRegIndex *>
@@ -638,10 +665,10 @@ class CodeGenRegBank {
// Add RC to *2RC maps.
void addToMaps(CodeGenRegisterClass *);
- // Create a synthetic sub-class if it is missing.
- CodeGenRegisterClass *getOrCreateSubClass(const CodeGenRegisterClass *RC,
- const CodeGenRegister::Vec *Membs,
- StringRef Name);
+ // Create a synthetic sub-class if it is missing. Returns (RC, inserted).
+ std::pair<CodeGenRegisterClass *, bool>
+ getOrCreateSubClass(const CodeGenRegisterClass *RC,
+ const CodeGenRegister::Vec *Membs, StringRef Name);
// Infer missing register classes.
void computeInferredRegisterClasses();
@@ -671,6 +698,9 @@ class CodeGenRegBank {
// Compute a lane mask for each sub-register index.
void computeSubRegLaneMasks();
+ // Compute RPOT of subregister indices by composition.
+ void computeSubRegIndicesRPOT();
+
/// Computes a lane mask for each register unit enumerated by a physical
/// register.
void computeRegUnitLaneMasks();
diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp
index a2fcf55e85132..e931000bb9c71 100644
--- a/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -179,7 +179,7 @@ static void visitRegisterBankClasses(
const CodeGenRegBank &RegisterClassHierarchy,
const CodeGenRegisterClass *RC, const Twine &Kind,
std::function<void(const CodeGenRegisterClass *, StringRef)> VisitFn,
- SmallPtrSetImpl<const CodeGenRegisterClass *> &VisitedRCs) {
+ DenseSet<const CodeGenRegisterClass *> &VisitedRCs) {
// Make sure we only visit each class once to avoid infinite loops.
if (!VisitedRCs.insert(RC).second)
@@ -390,7 +390,7 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
Timer.startTimer("Analyze records");
std::vector<RegisterBank> Banks;
for (const auto &V : Records.getAllDerivedDefinitions("RegisterBank")) {
- SmallPtrSet<const CodeGenRegisterClass *, 8> VisitedRCs;
+ DenseSet<const CodeGenRegisterClass *> VisitedRCs;
RegisterBank Bank(*V, CGH.getNumModeIds());
for (const CodeGenRegisterClass *RC :
More information about the llvm-commits
mailing list