[llvm] 9f75270 - [IR] Add per-function numbers to basic blocks (#101052)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 11:25:21 PDT 2024
Author: Alexis Engelke
Date: 2024-07-30T20:25:18+02:00
New Revision: 9f75270ceb3a0a3fb9b97980031a59652b7d5473
URL: https://github.com/llvm/llvm-project/commit/9f75270ceb3a0a3fb9b97980031a59652b7d5473
DIFF: https://github.com/llvm/llvm-project/commit/9f75270ceb3a0a3fb9b97980031a59652b7d5473.diff
LOG: [IR] Add per-function numbers to basic blocks (#101052)
Every basic block that is linked into a function now has a unique
number, which can be queried using getNumber(). Numbers are densely
allocated, but not re-assigned on block removal for stability. Block
numbers are intended to be fairly stable and only be updated when
removing a several basic blocks to make sure the numbering doesn't
become too sparse.
To reduce holes in the numbering, renumberBlocks() can be called to
re-assign numbers in block order.
Additionally, getMaxBlockNumber() returns a value larger than the
largest block number, intended to pre-allocate/resize vectors.
Furthermore, this introduces the concept of a "block number epoch" --
an integer that changes after every renumbering. This is useful for
identifying use of block numbers after renumbering: on initialization,
the current epoch is stored, and on all subsequent accesses, equality
with the current epoch can be asserted.
I added a validate method to catch cases where something goes wrong,
even if I can't really imagine how invalid numbers can occur. But I
think it's better to be safe and rule out this potential source of bugs
when more things depend on the numbering.
Previous discussion in:
https://discourse.llvm.org/t/rfc-add-auxiliary-field-for-per-pass-custom-data-to-basicblock/80229
Added:
Modified:
llvm/include/llvm/IR/BasicBlock.h
llvm/include/llvm/IR/Function.h
llvm/lib/IR/BasicBlock.cpp
llvm/lib/IR/Function.cpp
llvm/unittests/IR/FunctionTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 12571d957da60..c7913e60cea08 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -67,6 +67,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
bool IsNewDbgInfoFormat;
private:
+ // Allow Function to renumber blocks.
+ friend class Function;
+ /// Per-function unique number.
+ unsigned Number = -1u;
+
friend class BlockAddress;
friend class SymbolTableListTraits<BasicBlock>;
@@ -96,6 +101,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
void setIsNewDbgInfoFormat(bool NewFlag);
void setNewDbgInfoFormatFlag(bool NewFlag);
+ unsigned getNumber() const {
+ assert(getParent() && "only basic blocks in functions have valid numbers");
+ return Number;
+ }
+
/// Record that the collection of DbgRecords in \p M "trails" after the last
/// instruction of this block. These are equivalent to dbg.value intrinsics
/// that exist at the end of a basic block with no terminator (a transient
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index fd7a6aa46eea0..4abf978687d9d 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -75,6 +75,13 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
private:
// Important things that make up a function!
BasicBlockListType BasicBlocks; ///< The basic blocks
+
+ // Basic blocks need to get their number when added to a function.
+ friend void BasicBlock::setParent(Function *);
+ unsigned NextBlockNum = 0;
+ /// Epoch of block numbers. (Could be shrinked to uint8_t if required.)
+ unsigned BlockNumEpoch = 0;
+
mutable Argument *Arguments = nullptr; ///< The formal arguments
size_t NumArgs;
std::unique_ptr<ValueSymbolTable>
@@ -810,6 +817,34 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
return SymTab.get();
}
+ //===--------------------------------------------------------------------===//
+ // Block number functions
+
+ /// Return a value larger than the largest block number. Intended to allocate
+ /// a vector that is sufficiently large to hold all blocks indexed by their
+ /// number.
+ unsigned getMaxBlockNumber() const { return NextBlockNum; }
+
+ /// Renumber basic blocks into a dense value range starting from 0. Be aware
+ /// that other data structures and analyses (e.g., DominatorTree) may depend
+ /// on the value numbers and need to be updated or invalidated.
+ void renumberBlocks();
+
+ /// Return the "epoch" of current block numbers. This will return a
diff erent
+ /// value after every renumbering. The intention is: if something (e.g., an
+ /// analysis) uses block numbers, it also stores the number epoch and then
+ /// can assert later on that the epoch didn't change (indicating that the
+ /// numbering is still valid). If the epoch changed, blocks might have been
+ /// assigned new numbers and previous uses of the numbers needs to be
+ /// invalidated. This is solely intended as a debugging feature.
+ unsigned getBlockNumberEpoch() const { return BlockNumEpoch; }
+
+private:
+ /// Assert that all blocks have unique numbers within 0..NextBlockNum. This
+ /// has O(n) runtime complexity.
+ void validateBlockNumbers() const;
+
+public:
//===--------------------------------------------------------------------===//
// BasicBlock iterator forwarding functions
//
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index d9a6b6ba3791c..cf05b11c53963 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -240,6 +240,8 @@ BasicBlock::~BasicBlock() {
void BasicBlock::setParent(Function *parent) {
// Set Parent=parent, updating instruction symtab entries as appropriate.
+ if (Parent != parent)
+ Number = parent ? parent->NextBlockNum++ : -1u;
InstList.setSymTabObject(&Parent, parent);
}
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 9b0dd5fca7e0e..69520fdb03dc7 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/Function.h"
#include "SymbolTableListTraitsImpl.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
@@ -85,6 +86,27 @@ static cl::opt<int> NonGlobalValueMaxNameSize(
extern cl::opt<bool> UseNewDbgInfoFormat;
+void Function::renumberBlocks() {
+ validateBlockNumbers();
+
+ NextBlockNum = 0;
+ for (auto &BB : *this)
+ BB.Number = NextBlockNum++;
+ BlockNumEpoch++;
+}
+
+void Function::validateBlockNumbers() const {
+#ifndef NDEBUG
+ BitVector Numbers(NextBlockNum);
+ for (const auto &BB : *this) {
+ unsigned Num = BB.getNumber();
+ assert(Num < NextBlockNum && "out of range block number");
+ assert(!Numbers[Num] && "duplicate block numbers");
+ Numbers.set(Num);
+ }
+#endif
+}
+
void Function::convertToNewDbgValues() {
IsNewDbgInfoFormat = true;
for (auto &BB : *this) {
@@ -509,6 +531,8 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
}
Function::~Function() {
+ validateBlockNumbers();
+
dropAllReferences(); // After this it is safe to delete instructions.
// Delete all of the method arguments and unlink from symbol table...
diff --git a/llvm/unittests/IR/FunctionTest.cpp b/llvm/unittests/IR/FunctionTest.cpp
index 9aaff3ea33830..402667931fbc5 100644
--- a/llvm/unittests/IR/FunctionTest.cpp
+++ b/llvm/unittests/IR/FunctionTest.cpp
@@ -487,6 +487,98 @@ TEST(FunctionTest, EraseBBs) {
EXPECT_EQ(F->size(), 0u);
}
+TEST(FunctionTest, BasicBlockNumbers) {
+ LLVMContext Context;
+ Type *VoidType = Type::getVoidTy(Context);
+ FunctionType *FuncType = FunctionType::get(VoidType, false);
+ std::unique_ptr<Function> Func(
+ Function::Create(FuncType, GlobalValue::ExternalLinkage));
+
+ EXPECT_EQ(Func->getBlockNumberEpoch(), 0u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 0u);
+
+ BasicBlock *BB1 = BasicBlock::Create(Context, "bb1", Func.get());
+ EXPECT_EQ(BB1->getNumber(), 0u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 1u);
+ BasicBlock *BB2 = BasicBlock::Create(Context, "bb2", Func.get());
+ EXPECT_EQ(BB2->getNumber(), 1u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 2u);
+ BasicBlock *BB3 = BasicBlock::Create(Context, "bb3", Func.get());
+ EXPECT_EQ(BB3->getNumber(), 2u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 3u);
+
+ BB2->eraseFromParent();
+ // Erasing doesn't trigger renumbering
+ EXPECT_EQ(BB1->getNumber(), 0u);
+ EXPECT_EQ(BB3->getNumber(), 2u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 3u);
+ // ... and number are assigned monotonically increasing
+ BasicBlock *BB4 = BasicBlock::Create(Context, "bb4", Func.get());
+ EXPECT_EQ(BB4->getNumber(), 3u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
+ // ... even if inserted not at the end
+ BasicBlock *BB5 = BasicBlock::Create(Context, "bb5", Func.get(), BB1);
+ EXPECT_EQ(BB5->getNumber(), 4u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 5u);
+
+ // Func is now: bb5, bb1, bb3, bb4
+ // Renumbering assigns numbers in their order in the function
+ EXPECT_EQ(Func->getBlockNumberEpoch(), 0u);
+ Func->renumberBlocks();
+ EXPECT_EQ(Func->getBlockNumberEpoch(), 1u);
+ EXPECT_EQ(BB5->getNumber(), 0u);
+ EXPECT_EQ(BB1->getNumber(), 1u);
+ EXPECT_EQ(BB3->getNumber(), 2u);
+ EXPECT_EQ(BB4->getNumber(), 3u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
+
+ // Moving a block inside the function doesn't change numbers
+ BB1->moveBefore(BB5);
+ EXPECT_EQ(BB5->getNumber(), 0u);
+ EXPECT_EQ(BB1->getNumber(), 1u);
+ EXPECT_EQ(BB3->getNumber(), 2u);
+ EXPECT_EQ(BB4->getNumber(), 3u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
+
+ // Removing a block and adding it back assigns a new number, because the
+ // block was temporarily without a parent.
+ BB4->removeFromParent();
+ BB4->insertInto(Func.get());
+ EXPECT_EQ(BB5->getNumber(), 0u);
+ EXPECT_EQ(BB1->getNumber(), 1u);
+ EXPECT_EQ(BB3->getNumber(), 2u);
+ EXPECT_EQ(BB4->getNumber(), 4u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 5u);
+
+ std::unique_ptr<Function> Func2(
+ Function::Create(FuncType, GlobalValue::ExternalLinkage));
+ BasicBlock *BB6 = BasicBlock::Create(Context, "bb6", Func2.get());
+ EXPECT_EQ(BB6->getNumber(), 0u);
+ EXPECT_EQ(Func2->getMaxBlockNumber(), 1u);
+ // Moving a block to a
diff erent function assigns a new number
+ BB3->removeFromParent();
+ BB3->insertInto(Func2.get(), BB6);
+ EXPECT_EQ(BB3->getParent(), Func2.get());
+ EXPECT_EQ(BB3->getNumber(), 1u);
+ EXPECT_EQ(Func2->getMaxBlockNumber(), 2u);
+
+ EXPECT_EQ(Func2->getBlockNumberEpoch(), 0u);
+ Func2->renumberBlocks();
+ EXPECT_EQ(Func2->getBlockNumberEpoch(), 1u);
+ EXPECT_EQ(BB3->getNumber(), 0u);
+ EXPECT_EQ(BB6->getNumber(), 1u);
+ EXPECT_EQ(Func2->getMaxBlockNumber(), 2u);
+
+ // splice works as expected and assigns new numbers
+ Func->splice(Func->end(), Func2.get());
+ EXPECT_EQ(BB5->getNumber(), 0u);
+ EXPECT_EQ(BB1->getNumber(), 1u);
+ EXPECT_EQ(BB4->getNumber(), 4u);
+ EXPECT_EQ(BB3->getNumber(), 5u);
+ EXPECT_EQ(BB6->getNumber(), 6u);
+ EXPECT_EQ(Func->getMaxBlockNumber(), 7u);
+}
+
TEST(FunctionTest, UWTable) {
LLVMContext Ctx;
std::unique_ptr<Module> M = parseIR(Ctx, R"(
More information about the llvm-commits
mailing list