[llvm] [IR] Store Metadata attachments in vector (PR #189551)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 31 00:46:44 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: Alexis Engelke (aengelke)

<details>
<summary>Changes</summary>

Metadata is accessed rather often, so store metadata attachments in a
vector indexed by a number instead of a map from Value*; inside the
vector, the metadata attachments form a linked list. Instruction and
GlobalObject store the first metadata attachment index directly. Care is
taken that the offset in the structures is identical.

In addition to substantial performance improvements, this also leads to
a measurable reduction in memory usage -- several instructions have
metadata in debug builds (debug metadata) and release builds (primarily
TBAA), causing the hash table to become quite large.

In Instruction, this uses 4 padding bytes (it might be debatable whether
this is the best use for this space); GlobalObject grows by 4 bytes.
However, should we want grow Instruction by 8 bytes in the future, the
additionally used memory will be ~similar to the memory saved here, so
we could do so without causing a release-to-release memory regression.

This also makes the HasMetadata bit redundant, cleanup will follow
separately -- this is a bit annoying, because this should be inline, but
Value can't access subclass members.


---
Full diff: https://github.com/llvm/llvm-project/pull/189551.diff


7 Files Affected:

- (modified) llvm/include/llvm/IR/GlobalObject.h (+5) 
- (modified) llvm/include/llvm/IR/Instruction.h (+5) 
- (modified) llvm/include/llvm/IR/Value.h (+5) 
- (modified) llvm/lib/IR/Globals.cpp (+3-2) 
- (modified) llvm/lib/IR/LLVMContextImpl.cpp (+2-3) 
- (modified) llvm/lib/IR/LLVMContextImpl.h (+9-51) 
- (modified) llvm/lib/IR/Metadata.cpp (+105-114) 


``````````diff
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 632de13fabf71..d6b04001fdc09 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -49,6 +49,11 @@ class GlobalObject : public GlobalValue {
   LLVM_ABI ~GlobalObject();
 
   Comdat *ObjComdat = nullptr;
+
+  friend class Value;
+  /// Index of first metadata attachment in context, or zero.
+  unsigned MetadataIndex = 0;
+
   enum {
     LastAlignmentBit = 5,
     LastCodeModelBit = 8,
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 689dad7b9dd39..6d38d8cc1477d 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -106,6 +106,10 @@ class Instruction : public User,
 private:
   DebugLoc DbgLoc;                         // 'dbg' Metadata cache.
 
+  friend class Value;
+  /// Index of first metadata attachment in context, or zero.
+  unsigned MetadataIndex = 0;
+
   /// Relative order of this instruction in its parent basic block. Used for
   /// O(1) local dominance checks between instructions.
   mutable unsigned Order = 0;
@@ -116,6 +120,7 @@ class Instruction : public User,
   /// debugging information present.
   DbgMarker *DebugMarker = nullptr;
 
+public:
   /// Clone any debug-info attached to \p From onto this instruction. Used to
   /// copy debugging information from one block to another, when copying entire
   /// blocks. \see DebugProgramInstruction.h , because the ordering of
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index f50842f0a03d5..b149fe55375b1 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -582,6 +582,11 @@ class Value {
   LLVM_ABI MDNode *getMetadata(StringRef Kind) const;
   /// @}
 
+private:
+  LLVM_ABI unsigned getMetadataIndex() const;
+  LLVM_ABI unsigned &getMetadataIndex();
+
+protected:
   /// Appends all attachments with the given ID to \c MDs in insertion order.
   /// If the Value has no attachments with the given ID, or if ID is invalid,
   /// leaves MDs unchanged.
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 844266c72d773..f0828f1568496 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -39,8 +39,9 @@ static_assert(sizeof(GlobalValue) ==
               "unexpected GlobalValue size growth");
 
 // GlobalObject adds a comdat.
-static_assert(sizeof(GlobalObject) == sizeof(GlobalValue) + sizeof(void *),
-              "unexpected GlobalObject size growth");
+// static_assert(sizeof(GlobalObject) == sizeof(GlobalValue) + sizeof(void *) *
+// 2,
+//              "unexpected GlobalObject size growth");
 
 bool GlobalValue::isMaterializable() const {
   if (const Function *F = dyn_cast<Function>(this))
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 9bb14178234db..eb55ef05ec3cf 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -57,9 +57,8 @@ LLVMContextImpl::~LLVMContextImpl() {
 
 #ifndef NDEBUG
   // Check for metadata references from leaked Values.
-  for (auto &Pair : ValueMetadata)
-    Pair.first->dump();
-  assert(ValueMetadata.empty() && "Values with metadata have been leaked");
+  assert(MetadataRecycleSize == Metadatas.size() &&
+         "Values with metadata have been leaked");
 #endif
 
   // Drop references for MDNodes.  Do this before Values get deleted to avoid
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 85aaecede4795..77434d41c1044 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1566,55 +1566,11 @@ template <class NodeTy> struct MDNodeInfo {
 #define HANDLE_MDNODE_LEAF(CLASS) using CLASS##Info = MDNodeInfo<CLASS>;
 #include "llvm/IR/Metadata.def"
 
-/// Multimap-like storage for metadata attachments.
-class MDAttachments {
-public:
-  struct Attachment {
-    unsigned MDKind;
-    TrackingMDNodeRef Node;
-  };
-
-private:
-  SmallVector<Attachment, 1> Attachments;
-
-public:
-  bool empty() const { return Attachments.empty(); }
-  size_t size() const { return Attachments.size(); }
-
-  /// Returns the first attachment with the given ID or nullptr if no such
-  /// attachment exists.
-  MDNode *lookup(unsigned ID) const;
-
-  /// Appends all attachments with the given ID to \c Result in insertion order.
-  /// If the global has no attachments with the given ID, or if ID is invalid,
-  /// leaves Result unchanged.
-  void get(unsigned ID, SmallVectorImpl<MDNode *> &Result) const;
-
-  /// Appends all attachments for the global to \c Result, sorting by attachment
-  /// ID. Attachments with the same ID appear in insertion order. This function
-  /// does \em not clear \c Result.
-  void getAll(SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const;
-
-  /// Set an attachment to a particular node.
-  ///
-  /// Set the \c ID attachment to \c MD, replacing the current attachments at \c
-  /// ID (if anyway).
-  void set(unsigned ID, MDNode *MD);
-
-  /// Adds an attachment to a particular node.
-  void insert(unsigned ID, MDNode &MD);
-
-  /// Remove attachments with the given ID.
-  ///
-  /// Remove the attachments at \c ID, if any.
-  bool erase(unsigned ID);
-
-  /// Erase matching attachments.
-  ///
-  /// Erases all attachments matching the \c shouldRemove predicate.
-  template <class PredTy> void remove_if(PredTy shouldRemove) {
-    llvm::erase_if(Attachments, shouldRemove);
-  }
+/// Single metadata attachment, forms linked list ended by index 0.
+struct MDAttachment {
+  unsigned Next = 0;
+  unsigned MDKind;
+  TrackingMDNodeRef Node;
 };
 
 class LLVMContextImpl {
@@ -1794,8 +1750,10 @@ class LLVMContextImpl {
   /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
   StringMap<unsigned> CustomMDKindNames;
 
-  /// Collection of metadata used in this context.
-  DenseMap<const Value *, MDAttachments> ValueMetadata;
+  /// Collection of metadata attachments in this context.
+  SmallVector<MDAttachment, 0> Metadatas;
+  unsigned MetadataRecycleHead = 0;
+  unsigned MetadataRecycleSize = 0;
 
   /// Map DIAssignID -> Instructions with that attachment.
   /// Managed by Instruction via Instruction::updateDIAssignIDMapping.
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 27aaecde6d199..bfac790fdc40e 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1547,54 +1547,14 @@ StringRef NamedMDNode::getName() const { return StringRef(Name); }
 // Instruction Metadata method implementations.
 //
 
-MDNode *MDAttachments::lookup(unsigned ID) const {
-  for (const auto &A : Attachments)
-    if (A.MDKind == ID)
-      return A.Node;
-  return nullptr;
-}
-
-void MDAttachments::get(unsigned ID, SmallVectorImpl<MDNode *> &Result) const {
-  for (const auto &A : Attachments)
-    if (A.MDKind == ID)
-      Result.push_back(A.Node);
-}
-
-void MDAttachments::getAll(
-    SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
-  for (const auto &A : Attachments)
-    Result.emplace_back(A.MDKind, A.Node);
-
-  // Sort the resulting array so it is stable with respect to metadata IDs. We
-  // need to preserve the original insertion order though.
-  if (Result.size() > 1)
-    llvm::stable_sort(Result, less_first());
-}
-
-void MDAttachments::set(unsigned ID, MDNode *MD) {
-  erase(ID);
-  if (MD)
-    insert(ID, *MD);
+unsigned &Value::getMetadataIndex() {
+  if (auto *I = dyn_cast<Instruction>(this))
+    return I->MetadataIndex;
+  return cast<GlobalObject>(this)->MetadataIndex;
 }
 
-void MDAttachments::insert(unsigned ID, MDNode &MD) {
-  Attachments.push_back({ID, TrackingMDNodeRef(&MD)});
-}
-
-bool MDAttachments::erase(unsigned ID) {
-  if (empty())
-    return false;
-
-  // Common case is one value.
-  if (Attachments.size() == 1 && Attachments.back().MDKind == ID) {
-    Attachments.pop_back();
-    return true;
-  }
-
-  auto OldSize = Attachments.size();
-  llvm::erase_if(Attachments,
-                 [ID](const Attachment &A) { return A.MDKind == ID; });
-  return OldSize != Attachments.size();
+unsigned Value::getMetadataIndex() const {
+  return const_cast<Value *>(this)->getMetadataIndex();
 }
 
 MDNode *Value::getMetadata(StringRef Kind) const {
@@ -1606,56 +1566,58 @@ MDNode *Value::getMetadata(StringRef Kind) const {
 
 MDNode *Value::getMetadataImpl(unsigned KindID) const {
   const LLVMContext &Ctx = getContext();
-  const MDAttachments &Attachements = Ctx.pImpl->ValueMetadata.at(this);
-  return Attachements.lookup(KindID);
+  unsigned Idx = getMetadataIndex();
+  while (Idx) {
+    const MDAttachment &A = Ctx.pImpl->Metadatas[Idx];
+    if (A.MDKind == KindID)
+      return A.Node;
+    Idx = A.Next;
+  }
+  return nullptr;
 }
 
 void Value::getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const {
-  if (hasMetadata())
-    getContext().pImpl->ValueMetadata.at(this).get(KindID, MDs);
+  const LLVMContext &Ctx = getContext();
+  unsigned Idx = getMetadataIndex();
+  while (Idx) {
+    const MDAttachment &A = Ctx.pImpl->Metadatas[Idx];
+    if (A.MDKind == KindID)
+      MDs.push_back(A.Node);
+    Idx = A.Next;
+  }
+  // We store metadata in reverse order, so reverse for output.
+  std::reverse(MDs.begin(), MDs.end());
 }
 
 void Value::getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const {
-  if (hasMetadata())
-    getMetadata(getContext().getMDKindID(Kind), MDs);
+  getMetadata(getContext().getMDKindID(Kind), MDs);
 }
 
 void Value::getAllMetadata(
     SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const {
-  if (hasMetadata()) {
-    assert(getContext().pImpl->ValueMetadata.count(this) &&
-           "bit out of sync with hash table");
-    const MDAttachments &Info = getContext().pImpl->ValueMetadata.at(this);
-    Info.getAll(MDs);
+  const LLVMContext &Ctx = getContext();
+  unsigned Idx = getMetadataIndex();
+  while (Idx) {
+    const MDAttachment &A = Ctx.pImpl->Metadatas[Idx];
+    MDs.emplace_back(A.MDKind, A.Node);
+    Idx = A.Next;
+  }
+  // We store metadata in reverse order, so reverse for output in insertion
+  // order. Sort by metadata ID for stable output.
+  if (MDs.size() > 1) {
+    std::reverse(MDs.begin(), MDs.end());
+    llvm::stable_sort(MDs, less_first());
   }
 }
 
 void Value::setMetadata(unsigned KindID, MDNode *Node) {
   assert(isa<Instruction>(this) || isa<GlobalObject>(this));
 
-  // Handle the case when we're adding/updating metadata on a value.
-  if (Node) {
-    MDAttachments &Info = getContext().pImpl->ValueMetadata[this];
-    assert(!Info.empty() == HasMetadata && "bit out of sync with hash table");
-    if (Info.empty())
-      HasMetadata = true;
-    Info.set(KindID, Node);
-    return;
-  }
-
-  // Otherwise, we're removing metadata from an instruction.
-  assert((HasMetadata == (getContext().pImpl->ValueMetadata.count(this) > 0)) &&
-         "bit out of sync with hash table");
-  if (!HasMetadata)
-    return; // Nothing to remove!
-  MDAttachments &Info = getContext().pImpl->ValueMetadata.find(this)->second;
-
-  // Handle removal of an existing value.
-  Info.erase(KindID);
-  if (!Info.empty())
-    return;
-  getContext().pImpl->ValueMetadata.erase(this);
-  HasMetadata = false;
+  eraseMetadata(KindID);
+  if (Node)
+    addMetadata(KindID, *Node);
+  if (Node)
+    HasMetadata = true;
 }
 
 void Value::setMetadata(StringRef Kind, MDNode *Node) {
@@ -1665,10 +1627,25 @@ void Value::setMetadata(StringRef Kind, MDNode *Node) {
 }
 
 void Value::addMetadata(unsigned KindID, MDNode &MD) {
-  assert(isa<Instruction>(this) || isa<GlobalObject>(this));
   if (!HasMetadata)
     HasMetadata = true;
-  getContext().pImpl->ValueMetadata[this].insert(KindID, MD);
+  const LLVMContext &Ctx = getContext();
+  unsigned &Idx = getMetadataIndex();
+  unsigned NewIdx = Ctx.pImpl->MetadataRecycleHead;
+  if (NewIdx == 0) {
+    NewIdx = Ctx.pImpl->Metadatas.size();
+    if (NewIdx == 0) {
+      Ctx.pImpl->MetadataRecycleSize = 1;
+      NewIdx = 1;
+    }
+    Ctx.pImpl->Metadatas.resize(NewIdx + 1);
+  } else {
+    Ctx.pImpl->MetadataRecycleHead = Ctx.pImpl->Metadatas[NewIdx].Next;
+    Ctx.pImpl->MetadataRecycleSize -= 1;
+  }
+  Ctx.pImpl->Metadatas[NewIdx] =
+      MDAttachment{Idx, KindID, TrackingMDNodeRef(&MD)};
+  Idx = NewIdx;
 }
 
 void Value::addMetadata(StringRef Kind, MDNode &MD) {
@@ -1676,39 +1653,36 @@ void Value::addMetadata(StringRef Kind, MDNode &MD) {
 }
 
 bool Value::eraseMetadata(unsigned KindID) {
-  // Nothing to unset.
-  if (!HasMetadata)
-    return false;
-
-  MDAttachments &Store = getContext().pImpl->ValueMetadata.find(this)->second;
-  bool Changed = Store.erase(KindID);
-  if (Store.empty())
-    clearMetadata();
+  bool Changed = false;
+  eraseMetadataIf([&Changed, KindID](unsigned MDKind, MDNode *) {
+    Changed |= MDKind == KindID;
+    return MDKind == KindID;
+  });
   return Changed;
 }
 
 void Value::eraseMetadataIf(function_ref<bool(unsigned, MDNode *)> Pred) {
-  if (!HasMetadata)
-    return;
-
-  auto &MetadataStore = getContext().pImpl->ValueMetadata;
-  MDAttachments &Info = MetadataStore.find(this)->second;
-  assert(!Info.empty() && "bit out of sync with hash table");
-  Info.remove_if([Pred](const MDAttachments::Attachment &I) {
-    return Pred(I.MDKind, I.Node);
-  });
-
-  if (Info.empty())
-    clearMetadata();
+  unsigned *Idx = &getMetadataIndex();
+  const LLVMContext &Ctx = getContext();
+  while (*Idx) {
+    MDAttachment &A = Ctx.pImpl->Metadatas[*Idx];
+    if (Pred(A.MDKind, A.Node)) {
+      A.Node.reset();
+      unsigned FreeIdx = *Idx;
+      *Idx = A.Next;
+      A.Next = Ctx.pImpl->MetadataRecycleHead;
+      Ctx.pImpl->MetadataRecycleHead = FreeIdx;
+      Ctx.pImpl->MetadataRecycleSize += 1;
+    } else {
+      Idx = &A.Next;
+    }
+  }
+  if (getMetadataIndex() == 0)
+    HasMetadata = false;
 }
 
 void Value::clearMetadata() {
-  if (!HasMetadata)
-    return;
-  assert(getContext().pImpl->ValueMetadata.count(this) &&
-         "bit out of sync with hash table");
-  getContext().pImpl->ValueMetadata.erase(this);
-  HasMetadata = false;
+  eraseMetadataIf([](unsigned, MDNode *) { return true; });
 }
 
 void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
@@ -1853,12 +1827,29 @@ AAMDNodes Instruction::getAAMetadata() const {
   // Not using Instruction::hasMetadata() because we're not interested in
   // DebugInfoMetadata.
   if (Value::hasMetadata()) {
-    const MDAttachments &Info = getContext().pImpl->ValueMetadata.at(this);
-    Result.TBAA = Info.lookup(LLVMContext::MD_tbaa);
-    Result.TBAAStruct = Info.lookup(LLVMContext::MD_tbaa_struct);
-    Result.Scope = Info.lookup(LLVMContext::MD_alias_scope);
-    Result.NoAlias = Info.lookup(LLVMContext::MD_noalias);
-    Result.NoAliasAddrSpace = Info.lookup(LLVMContext::MD_noalias_addrspace);
+    unsigned Idx = MetadataIndex;
+    const auto &Metadatas = getContext().pImpl->Metadatas;
+    while (Idx) {
+      const MDAttachment &A = Metadatas[Idx];
+      switch (A.MDKind) {
+      case LLVMContext::MD_tbaa:
+        Result.TBAA = A.Node;
+        break;
+      case LLVMContext::MD_tbaa_struct:
+        Result.TBAAStruct = A.Node;
+        break;
+      case LLVMContext::MD_alias_scope:
+        Result.Scope = A.Node;
+        break;
+      case LLVMContext::MD_noalias:
+        Result.NoAlias = A.Node;
+        break;
+      case LLVMContext::MD_noalias_addrspace:
+        Result.NoAliasAddrSpace = A.Node;
+        break;
+      }
+      Idx = A.Next;
+    }
   }
   return Result;
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/189551


More information about the llvm-commits mailing list