[llvm] 9f75270 - [IR] Add per-function numbers to basic blocks (#101052)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 30 11:25:21 PDT 2024


Author: Alexis Engelke
Date: 2024-07-30T20:25:18+02:00
New Revision: 9f75270ceb3a0a3fb9b97980031a59652b7d5473

URL: https://github.com/llvm/llvm-project/commit/9f75270ceb3a0a3fb9b97980031a59652b7d5473
DIFF: https://github.com/llvm/llvm-project/commit/9f75270ceb3a0a3fb9b97980031a59652b7d5473.diff

LOG: [IR] Add per-function numbers to basic blocks (#101052)

Every basic block that is linked into a function now has a unique
number, which can be queried using getNumber(). Numbers are densely
allocated, but not re-assigned on block removal for stability. Block
numbers are intended to be fairly stable and only be updated when
removing a several basic blocks to make sure the numbering doesn't
become too sparse.

To reduce holes in the numbering, renumberBlocks() can be called to
re-assign numbers in block order.

Additionally, getMaxBlockNumber() returns a value larger than the
largest block number, intended to pre-allocate/resize vectors.

Furthermore, this introduces the concept of a "block number epoch" --
an integer that changes after every renumbering. This is useful for
identifying use of block numbers after renumbering: on initialization,
the current epoch is stored, and on all subsequent accesses, equality
with the current epoch can be asserted.

I added a validate method to catch cases where something goes wrong,
even if I can't really imagine how invalid numbers can occur. But I
think it's better to be safe and rule out this potential source of bugs
when more things depend on the numbering.

Previous discussion in:
https://discourse.llvm.org/t/rfc-add-auxiliary-field-for-per-pass-custom-data-to-basicblock/80229

Added: 
    

Modified: 
    llvm/include/llvm/IR/BasicBlock.h
    llvm/include/llvm/IR/Function.h
    llvm/lib/IR/BasicBlock.cpp
    llvm/lib/IR/Function.cpp
    llvm/unittests/IR/FunctionTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 12571d957da60..c7913e60cea08 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -67,6 +67,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   bool IsNewDbgInfoFormat;
 
 private:
+  // Allow Function to renumber blocks.
+  friend class Function;
+  /// Per-function unique number.
+  unsigned Number = -1u;
+
   friend class BlockAddress;
   friend class SymbolTableListTraits<BasicBlock>;
 
@@ -96,6 +101,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   void setIsNewDbgInfoFormat(bool NewFlag);
   void setNewDbgInfoFormatFlag(bool NewFlag);
 
+  unsigned getNumber() const {
+    assert(getParent() && "only basic blocks in functions have valid numbers");
+    return Number;
+  }
+
   /// Record that the collection of DbgRecords in \p M "trails" after the last
   /// instruction of this block. These are equivalent to dbg.value intrinsics
   /// that exist at the end of a basic block with no terminator (a transient

diff  --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index fd7a6aa46eea0..4abf978687d9d 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -75,6 +75,13 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
 private:
   // Important things that make up a function!
   BasicBlockListType BasicBlocks;         ///< The basic blocks
+
+  // Basic blocks need to get their number when added to a function.
+  friend void BasicBlock::setParent(Function *);
+  unsigned NextBlockNum = 0;
+  /// Epoch of block numbers. (Could be shrinked to uint8_t if required.)
+  unsigned BlockNumEpoch = 0;
+
   mutable Argument *Arguments = nullptr;  ///< The formal arguments
   size_t NumArgs;
   std::unique_ptr<ValueSymbolTable>
@@ -810,6 +817,34 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
     return SymTab.get();
   }
 
+  //===--------------------------------------------------------------------===//
+  // Block number functions
+
+  /// Return a value larger than the largest block number. Intended to allocate
+  /// a vector that is sufficiently large to hold all blocks indexed by their
+  /// number.
+  unsigned getMaxBlockNumber() const { return NextBlockNum; }
+
+  /// Renumber basic blocks into a dense value range starting from 0. Be aware
+  /// that other data structures and analyses (e.g., DominatorTree) may depend
+  /// on the value numbers and need to be updated or invalidated.
+  void renumberBlocks();
+
+  /// Return the "epoch" of current block numbers. This will return a 
diff erent
+  /// value after every renumbering. The intention is: if something (e.g., an
+  /// analysis) uses block numbers, it also stores the number epoch and then
+  /// can assert later on that the epoch didn't change (indicating that the
+  /// numbering is still valid). If the epoch changed, blocks might have been
+  /// assigned new numbers and previous uses of the numbers needs to be
+  /// invalidated. This is solely intended as a debugging feature.
+  unsigned getBlockNumberEpoch() const { return BlockNumEpoch; }
+
+private:
+  /// Assert that all blocks have unique numbers within 0..NextBlockNum. This
+  /// has O(n) runtime complexity.
+  void validateBlockNumbers() const;
+
+public:
   //===--------------------------------------------------------------------===//
   // BasicBlock iterator forwarding functions
   //

diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index d9a6b6ba3791c..cf05b11c53963 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -240,6 +240,8 @@ BasicBlock::~BasicBlock() {
 
 void BasicBlock::setParent(Function *parent) {
   // Set Parent=parent, updating instruction symtab entries as appropriate.
+  if (Parent != parent)
+    Number = parent ? parent->NextBlockNum++ : -1u;
   InstList.setSymTabObject(&Parent, parent);
 }
 

diff  --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 9b0dd5fca7e0e..69520fdb03dc7 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/Function.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
@@ -85,6 +86,27 @@ static cl::opt<int> NonGlobalValueMaxNameSize(
 
 extern cl::opt<bool> UseNewDbgInfoFormat;
 
+void Function::renumberBlocks() {
+  validateBlockNumbers();
+
+  NextBlockNum = 0;
+  for (auto &BB : *this)
+    BB.Number = NextBlockNum++;
+  BlockNumEpoch++;
+}
+
+void Function::validateBlockNumbers() const {
+#ifndef NDEBUG
+  BitVector Numbers(NextBlockNum);
+  for (const auto &BB : *this) {
+    unsigned Num = BB.getNumber();
+    assert(Num < NextBlockNum && "out of range block number");
+    assert(!Numbers[Num] && "duplicate block numbers");
+    Numbers.set(Num);
+  }
+#endif
+}
+
 void Function::convertToNewDbgValues() {
   IsNewDbgInfoFormat = true;
   for (auto &BB : *this) {
@@ -509,6 +531,8 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
 }
 
 Function::~Function() {
+  validateBlockNumbers();
+
   dropAllReferences();    // After this it is safe to delete instructions.
 
   // Delete all of the method arguments and unlink from symbol table...

diff  --git a/llvm/unittests/IR/FunctionTest.cpp b/llvm/unittests/IR/FunctionTest.cpp
index 9aaff3ea33830..402667931fbc5 100644
--- a/llvm/unittests/IR/FunctionTest.cpp
+++ b/llvm/unittests/IR/FunctionTest.cpp
@@ -487,6 +487,98 @@ TEST(FunctionTest, EraseBBs) {
   EXPECT_EQ(F->size(), 0u);
 }
 
+TEST(FunctionTest, BasicBlockNumbers) {
+  LLVMContext Context;
+  Type *VoidType = Type::getVoidTy(Context);
+  FunctionType *FuncType = FunctionType::get(VoidType, false);
+  std::unique_ptr<Function> Func(
+      Function::Create(FuncType, GlobalValue::ExternalLinkage));
+
+  EXPECT_EQ(Func->getBlockNumberEpoch(), 0u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 0u);
+
+  BasicBlock *BB1 = BasicBlock::Create(Context, "bb1", Func.get());
+  EXPECT_EQ(BB1->getNumber(), 0u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 1u);
+  BasicBlock *BB2 = BasicBlock::Create(Context, "bb2", Func.get());
+  EXPECT_EQ(BB2->getNumber(), 1u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 2u);
+  BasicBlock *BB3 = BasicBlock::Create(Context, "bb3", Func.get());
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 3u);
+
+  BB2->eraseFromParent();
+  // Erasing doesn't trigger renumbering
+  EXPECT_EQ(BB1->getNumber(), 0u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 3u);
+  // ... and number are assigned monotonically increasing
+  BasicBlock *BB4 = BasicBlock::Create(Context, "bb4", Func.get());
+  EXPECT_EQ(BB4->getNumber(), 3u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
+  // ... even if inserted not at the end
+  BasicBlock *BB5 = BasicBlock::Create(Context, "bb5", Func.get(), BB1);
+  EXPECT_EQ(BB5->getNumber(), 4u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 5u);
+
+  // Func is now: bb5, bb1, bb3, bb4
+  // Renumbering assigns numbers in their order in the function
+  EXPECT_EQ(Func->getBlockNumberEpoch(), 0u);
+  Func->renumberBlocks();
+  EXPECT_EQ(Func->getBlockNumberEpoch(), 1u);
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(BB4->getNumber(), 3u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
+
+  // Moving a block inside the function doesn't change numbers
+  BB1->moveBefore(BB5);
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(BB4->getNumber(), 3u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
+
+  // Removing a block and adding it back assigns a new number, because the
+  // block was temporarily without a parent.
+  BB4->removeFromParent();
+  BB4->insertInto(Func.get());
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(BB4->getNumber(), 4u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 5u);
+
+  std::unique_ptr<Function> Func2(
+      Function::Create(FuncType, GlobalValue::ExternalLinkage));
+  BasicBlock *BB6 = BasicBlock::Create(Context, "bb6", Func2.get());
+  EXPECT_EQ(BB6->getNumber(), 0u);
+  EXPECT_EQ(Func2->getMaxBlockNumber(), 1u);
+  // Moving a block to a 
diff erent function assigns a new number
+  BB3->removeFromParent();
+  BB3->insertInto(Func2.get(), BB6);
+  EXPECT_EQ(BB3->getParent(), Func2.get());
+  EXPECT_EQ(BB3->getNumber(), 1u);
+  EXPECT_EQ(Func2->getMaxBlockNumber(), 2u);
+
+  EXPECT_EQ(Func2->getBlockNumberEpoch(), 0u);
+  Func2->renumberBlocks();
+  EXPECT_EQ(Func2->getBlockNumberEpoch(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 0u);
+  EXPECT_EQ(BB6->getNumber(), 1u);
+  EXPECT_EQ(Func2->getMaxBlockNumber(), 2u);
+
+  // splice works as expected and assigns new numbers
+  Func->splice(Func->end(), Func2.get());
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB4->getNumber(), 4u);
+  EXPECT_EQ(BB3->getNumber(), 5u);
+  EXPECT_EQ(BB6->getNumber(), 6u);
+  EXPECT_EQ(Func->getMaxBlockNumber(), 7u);
+}
+
 TEST(FunctionTest, UWTable) {
   LLVMContext Ctx;
   std::unique_ptr<Module> M = parseIR(Ctx, R"(


        


More information about the llvm-commits mailing list