[llvm] [IR] Add per-function numbers to basic blocks (PR #101052)

Alexis Engelke via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 29 11:07:39 PDT 2024


https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/101052

Every basic block that is linked into a function now has a unique number, which can be queried using getNumber(). Numbers are densely allocated, but not re-assigned on block removal for stability. Block numbers are intended to be fairly stable and only be updated when removing a several basic blocks to make sure the numbering doesn't become too sparse.

To reduce holes in the numbering, renumberBlocks() can be called to re-assign numbers in block order.

Previous discussion in: https://discourse.llvm.org/t/rfc-add-auxiliary-field-for-per-pass-custom-data-to-basicblock/80229

---

I added a validate method to catch cases where something goes wrong, even if I can't really imagine how invalid numbers can occur. But I think it's better to be safe and rule out this potential source of bugs when more things depend on the numbering.

Not sure if I should add a `getNumBlockIDs()` that returns NextBlockNum. So far, I haven't felt the need. I think we can always add it later if it turns out to be beneficial.


>From bff9e567fe51220de00e8049e72e3037794ab542 Mon Sep 17 00:00:00 2001
From: Alexis Engelke <engelke at in.tum.de>
Date: Mon, 29 Jul 2024 17:00:38 +0000
Subject: [PATCH] [IR] Add per-function numbers to basic blocks

Every basic block that is linked into a function now has a unique
number, which can be queried using getNumber(). Numbers are densely
allocated, but not re-assigned on block removal for stability. Block
numbers are intended to be fairly stable and only be updated when
removing a several basic blocks to make sure the numbering doesn't
become too sparse.

To reduce holes in the numbering, renumberBlocks() can be called to
re-assign numbers in block order.
---
 llvm/include/llvm/IR/BasicBlock.h  | 10 +++++
 llvm/include/llvm/IR/Function.h    | 22 +++++++++
 llvm/lib/IR/BasicBlock.cpp         |  2 +
 llvm/lib/IR/Function.cpp           | 25 +++++++++++
 llvm/unittests/IR/FunctionTest.cpp | 72 ++++++++++++++++++++++++++++++
 5 files changed, 131 insertions(+)

diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 12571d957da60..c7913e60cea08 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -67,6 +67,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   bool IsNewDbgInfoFormat;
 
 private:
+  // Allow Function to renumber blocks.
+  friend class Function;
+  /// Per-function unique number.
+  unsigned Number = -1u;
+
   friend class BlockAddress;
   friend class SymbolTableListTraits<BasicBlock>;
 
@@ -96,6 +101,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   void setIsNewDbgInfoFormat(bool NewFlag);
   void setNewDbgInfoFormatFlag(bool NewFlag);
 
+  unsigned getNumber() const {
+    assert(getParent() && "only basic blocks in functions have valid numbers");
+    return Number;
+  }
+
   /// Record that the collection of DbgRecords in \p M "trails" after the last
   /// instruction of this block. These are equivalent to dbg.value intrinsics
   /// that exist at the end of a basic block with no terminator (a transient
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index fd7a6aa46eea0..a2514bf5229dc 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -75,6 +75,22 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
 private:
   // Important things that make up a function!
   BasicBlockListType BasicBlocks;         ///< The basic blocks
+
+  // Basic blocks need to get their number when added to a function.
+  friend void BasicBlock::setParent(Function *);
+  unsigned NextBlockNum = 0;
+
+public:
+  /// Renumber basic blocks into a dense value range starting from 0. Be aware
+  /// that other data structures and analyses (e.g., DominatorTree) may depend
+  /// on the value numbers and need to be updated or invalidated.
+  void renumberBlocks();
+
+  /// Assert that all blocks have unique numbers within 0..NextBlockNum. This
+  /// has O(n) runtime complexity.
+  void validateBlockNumbers() const;
+
+private:
   mutable Argument *Arguments = nullptr;  ///< The formal arguments
   size_t NumArgs;
   std::unique_ptr<ValueSymbolTable>
@@ -996,6 +1012,12 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
   void setValueSubclassDataBit(unsigned Bit, bool On);
 };
 
+#ifdef NDEBUG
+/// In release builds, this is a no-op. For !NDEBUG builds, the checks are
+/// implemented in the .cpp file.
+inline void Function::validateBlockNumbers() const {}
+#endif
+
 /// Check whether null pointer dereferencing is considered undefined behavior
 /// for a given function or an address space.
 /// Null pointer access in non-zero address space is not considered undefined.
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index bf19934da047c..a797602c73f30 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -240,6 +240,8 @@ BasicBlock::~BasicBlock() {
 
 void BasicBlock::setParent(Function *parent) {
   // Set Parent=parent, updating instruction symtab entries as appropriate.
+  if (Parent != parent)
+    Number = parent ? parent->NextBlockNum++ : -1u;
   InstList.setSymTabObject(&Parent, parent);
 }
 
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 20871982afb06..bef2f935c4ba3 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/Function.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
@@ -85,6 +86,28 @@ static cl::opt<int> NonGlobalValueMaxNameSize(
 
 extern cl::opt<bool> UseNewDbgInfoFormat;
 
+void Function::renumberBlocks() {
+  validateBlockNumbers();
+
+  NextBlockNum = 0;
+  for (auto &BB : *this)
+    BB.Number = NextBlockNum++;
+}
+
+#ifndef NDEBUG
+/// In asserts builds, this checks the numbering. In non-asserts builds, it
+/// is defined as a no-op inline function in Function.h
+void Function::validateBlockNumbers() const {
+  BitVector Numbers(NextBlockNum);
+  for (const auto &BB : *this) {
+    unsigned Num = BB.getNumber();
+    assert(Num < NextBlockNum && "out of range block number");
+    assert(!Numbers[Num] && "duplicate block numbers");
+    Numbers.set(Num);
+  }
+}
+#endif
+
 void Function::convertToNewDbgValues() {
   IsNewDbgInfoFormat = true;
   for (auto &BB : *this) {
@@ -509,6 +532,8 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
 }
 
 Function::~Function() {
+  validateBlockNumbers();
+
   dropAllReferences();    // After this it is safe to delete instructions.
 
   // Delete all of the method arguments and unlink from symbol table...
diff --git a/llvm/unittests/IR/FunctionTest.cpp b/llvm/unittests/IR/FunctionTest.cpp
index 9aaff3ea33830..18edbbaf76561 100644
--- a/llvm/unittests/IR/FunctionTest.cpp
+++ b/llvm/unittests/IR/FunctionTest.cpp
@@ -487,6 +487,78 @@ TEST(FunctionTest, EraseBBs) {
   EXPECT_EQ(F->size(), 0u);
 }
 
+TEST(FunctionTest, BasicBlockNumbers) {
+  LLVMContext Context;
+  Type *VoidType(Type::getVoidTy(Context));
+  FunctionType *FuncType(FunctionType::get(VoidType, false));
+  std::unique_ptr<Function> Func(
+      Function::Create(FuncType, GlobalValue::ExternalLinkage));
+
+  BasicBlock *BB1 = BasicBlock::Create(Context, "bb1", Func.get());
+  EXPECT_EQ(BB1->getNumber(), 0u);
+  BasicBlock *BB2 = BasicBlock::Create(Context, "bb2", Func.get());
+  EXPECT_EQ(BB2->getNumber(), 1u);
+  BasicBlock *BB3 = BasicBlock::Create(Context, "bb3", Func.get());
+  EXPECT_EQ(BB3->getNumber(), 2u);
+
+  BB2->eraseFromParent();
+  // Erasing doesn't trigger renumbering
+  EXPECT_EQ(BB1->getNumber(), 0u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  // ... and number are assigned monotonically increasing
+  BasicBlock *BB4 = BasicBlock::Create(Context, "bb4", Func.get());
+  EXPECT_EQ(BB4->getNumber(), 3u);
+  // ... even if inserted not at the end
+  BasicBlock *BB5 = BasicBlock::Create(Context, "bb5", Func.get(), BB1);
+  EXPECT_EQ(BB5->getNumber(), 4u);
+
+  // Func is now: bb5, bb1, bb3, bb4
+  // Renumbering assigns numbers in their order in the function
+  Func->renumberBlocks();
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(BB4->getNumber(), 3u);
+
+  // Moving a block inside the function doesn't change numbers
+  BB1->moveBefore(BB5);
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(BB4->getNumber(), 3u);
+
+  // Removing a block and adding it back assigns a new number, because the
+  // block was temporarily without a parent.
+  BB4->removeFromParent();
+  BB4->insertInto(Func.get());
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB3->getNumber(), 2u);
+  EXPECT_EQ(BB4->getNumber(), 4u);
+
+  std::unique_ptr<Function> Func2(
+      Function::Create(FuncType, GlobalValue::ExternalLinkage));
+  BasicBlock *BB6 = BasicBlock::Create(Context, "bb6", Func2.get());
+  EXPECT_EQ(BB6->getNumber(), 0u);
+  // Moving a block to a different function assigns a new number
+  BB3->removeFromParent();
+  BB3->insertInto(Func2.get(), BB6);
+  EXPECT_EQ(BB3->getParent(), Func2.get());
+  EXPECT_EQ(BB3->getNumber(), 1u);
+
+  Func2->renumberBlocks();
+  EXPECT_EQ(BB3->getNumber(), 0u);
+  EXPECT_EQ(BB6->getNumber(), 1u);
+
+  // splice works as expected and assigns new numbers
+  Func->splice(Func->end(), Func2.get());
+  EXPECT_EQ(BB5->getNumber(), 0u);
+  EXPECT_EQ(BB1->getNumber(), 1u);
+  EXPECT_EQ(BB4->getNumber(), 4u);
+  EXPECT_EQ(BB3->getNumber(), 5u);
+  EXPECT_EQ(BB6->getNumber(), 6u);
+}
+
 TEST(FunctionTest, UWTable) {
   LLVMContext Ctx;
   std::unique_ptr<Module> M = parseIR(Ctx, R"(



More information about the llvm-commits mailing list