[clang] Reduce memory usage in AST parent map generation by lazily checking if nodes have been seen (PR #129934)

via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 5 14:58:48 PST 2025


https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934

>From 55e0d3aca6c4051ebd68a9082005984e2d40c552 Mon Sep 17 00:00:00 2001
From: higher-performance <higher.performance.github at gmail.com>
Date: Wed, 5 Mar 2025 15:49:06 -0500
Subject: [PATCH] Reduce memory usage in AST parent map generation by lazily
 checking if nodes have been seen

---
 clang/lib/AST/ParentMapContext.cpp | 61 +++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..5c601d8716175 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -60,6 +60,29 @@ class ParentMapContext::ParentMap {
 
   template <typename, typename...> friend struct ::MatchParents;
 
+  template <class T> struct IndirectDenseMapInfo {
+    using Ptr = T *;
+    using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>;
+    static inline Ptr getEmptyKey() {
+      return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey());
+    }
+    static inline Ptr getTombstoneKey() {
+      return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey());
+    }
+    static unsigned getHashValue(Ptr Val) {
+      return Val == getEmptyKey() || Val == getTombstoneKey()
+                 ? 0
+                 : Base::getHashValue(*Val);
+    }
+    static bool isEqual(Ptr LHS, Ptr RHS) {
+      if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+          RHS == getEmptyKey() || RHS == getTombstoneKey()) {
+        return LHS == RHS;
+      }
+      return Base::isEqual(*LHS, *RHS);
+    }
+  };
+
   /// Contains parents of a node.
   class ParentVector {
   public:
@@ -70,16 +93,44 @@ class ParentMapContext::ParentMap {
         push_back(Value);
     }
     bool contains(const DynTypedNode &Value) {
-      return Seen.contains(Value);
+      assert(Value.getMemoizationData());
+      bool found = FragileLazySeenCache.contains(&Value);
+      while (!found && ItemsProcessed < Items.size()) {
+        found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second;
+        ++ItemsProcessed;
+      }
+      return found;
     }
     void push_back(const DynTypedNode &Value) {
-      if (!Value.getMemoizationData() || Seen.insert(Value).second)
+      if (!Value.getMemoizationData() || !contains(Value)) {
+        const size_t OldCapacity = Items.capacity();
         Items.push_back(Value);
+        if (OldCapacity != Items.capacity()) {
+          // Pointers are invalidated; remove them.
+          ItemsProcessed = 0;
+          // Free memory to avoid doubling peak memory usage during rehashing
+          FragileLazySeenCache.clear();
+        }
+      }
+    }
+    llvm::ArrayRef<DynTypedNode> view() {
+      while (ItemsProcessed < Items.size()) {
+        FragileLazySeenCache.insert(&Items[ItemsProcessed]);
+        ++ItemsProcessed;
+      }
+      return Items;
     }
-    llvm::ArrayRef<DynTypedNode> view() const { return Items; }
   private:
+    // BE CAREFUL. Pointers into this container are stored in the
+    // `FragileLazySeenCache` set below.
     llvm::SmallVector<DynTypedNode, 2> Items;
-    llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+    // This cache is fragile because it contains pointers that are invalidated
+    // when the vector capacity changes.
+    llvm::SmallDenseSet<const DynTypedNode *, 2,
+                        IndirectDenseMapInfo<const DynTypedNode>>
+        FragileLazySeenCache;
+    // Lazily tracks which items have been processed for the cache.
+    size_t ItemsProcessed = 0;
   };
 
   /// Maps from a node to its parents. This is used for nodes that have
@@ -117,7 +168,7 @@ class ParentMapContext::ParentMap {
     if (I == Map.end()) {
       return llvm::ArrayRef<DynTypedNode>();
     }
-    if (const auto *V = dyn_cast<ParentVector *>(I->second)) {
+    if (auto *V = dyn_cast<ParentVector *>(I->second)) {
       return V->view();
     }
     return getSingleDynTypedNodeFromParentMap(I->second);



More information about the cfe-commits mailing list