[clang] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation (PR #129934)

via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 5 14:09:10 PST 2025


https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934

>From 9f12c5856691268177e748d2a98de24ee30bdfd1 Mon Sep 17 00:00:00 2001
From: higher-performance <higher.performance.github at gmail.com>
Date: Wed, 5 Mar 2025 15:49:06 -0500
Subject: [PATCH] Reduce memory usage in AST parent map generation by lazily
 checking if nodes have been seen

---
 clang/lib/AST/ParentMapContext.cpp | 50 ++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..b7156b892b01a 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -60,6 +60,29 @@ class ParentMapContext::ParentMap {
 
   template <typename, typename...> friend struct ::MatchParents;
 
+  template <class T> struct IndirectDenseMapInfo {
+    using Ptr = T *;
+    using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>;
+    static inline Ptr getEmptyKey() {
+      return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey());
+    }
+    static inline Ptr getTombstoneKey() {
+      return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey());
+    }
+    static unsigned getHashValue(Ptr Val) {
+      return Val == getEmptyKey() || Val == getTombstoneKey()
+                 ? 0
+                 : Base::getHashValue(*Val);
+    }
+    static bool isEqual(Ptr LHS, Ptr RHS) {
+      if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+          RHS == getEmptyKey() || RHS == getTombstoneKey()) {
+        return LHS == RHS;
+      }
+      return Base::isEqual(*LHS, *RHS);
+    }
+  };
+
   /// Contains parents of a node.
   class ParentVector {
   public:
@@ -70,16 +93,37 @@ class ParentMapContext::ParentMap {
         push_back(Value);
     }
     bool contains(const DynTypedNode &Value) {
-      return Seen.contains(Value);
+      assert(Value.getMemoizationData());
+      bool found = FragileLazySeenCache.contains(&Value);
+      while (!found && ItemsProcessed < Items.size()) {
+        found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second;
+        ++ItemsProcessed;
+      }
+      return found;
     }
     void push_back(const DynTypedNode &Value) {
-      if (!Value.getMemoizationData() || Seen.insert(Value).second)
+      if (!Value.getMemoizationData() || !contains(Value)) {
+        const size_t OldCapacity = Items.capacity();
         Items.push_back(Value);
+        if (OldCapacity != Items.capacity()) {
+          // Pointers are invalidated; remove them.
+          ItemsProcessed = 0;
+          // Free memory to avoid doubling peak memory usage during rehashing
+          FragileLazySeenCache.clear();
+        }
+      }
     }
     llvm::ArrayRef<DynTypedNode> view() const { return Items; }
   private:
+    // BE CAREFUL. Pointers into this container are stored in the cache.
     llvm::SmallVector<DynTypedNode, 2> Items;
-    llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+    // This cache is fragile because it contains pointers that are invalidated
+    // when the vector capacity changes.
+    llvm::SmallDenseSet<const DynTypedNode *, 2,
+                        IndirectDenseMapInfo<const DynTypedNode>>
+        FragileLazySeenCache;
+    // Lazily tracks which items have been processed for the cache.
+    size_t ItemsProcessed = 0;
   };
 
   /// Maps from a node to its parents. This is used for nodes that have



More information about the cfe-commits mailing list