[clang] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation (PR #129934)

via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 5 13:59:08 PST 2025


https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934

>From e99bb267a950f75ea4fc23454762a7422c776bca Mon Sep 17 00:00:00 2001
From: higher-performance <higher.performance.github at gmail.com>
Date: Wed, 5 Mar 2025 15:49:06 -0500
Subject: [PATCH] Reduce memory usage in AST parent map generation by partially
 reverting quadratic slowdown mitigation

The use of parent maps (hasParent(), hasAncestor(), etc.) in Clang AST matchers is no longer guaranteed to avoid quadratic slowdown, but will only do so in pathological cases.
---
 clang/lib/AST/ParentMapContext.cpp | 50 ++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..b7156b892b01a 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -60,6 +60,29 @@ class ParentMapContext::ParentMap {
 
   template <typename, typename...> friend struct ::MatchParents;
 
+  template <class T> struct IndirectDenseMapInfo {
+    using Ptr = T *;
+    using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>;
+    static inline Ptr getEmptyKey() {
+      return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey());
+    }
+    static inline Ptr getTombstoneKey() {
+      return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey());
+    }
+    static unsigned getHashValue(Ptr Val) {
+      return Val == getEmptyKey() || Val == getTombstoneKey()
+                 ? 0
+                 : Base::getHashValue(*Val);
+    }
+    static bool isEqual(Ptr LHS, Ptr RHS) {
+      if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+          RHS == getEmptyKey() || RHS == getTombstoneKey()) {
+        return LHS == RHS;
+      }
+      return Base::isEqual(*LHS, *RHS);
+    }
+  };
+
   /// Contains parents of a node.
   class ParentVector {
   public:
@@ -70,16 +93,37 @@ class ParentMapContext::ParentMap {
         push_back(Value);
     }
     bool contains(const DynTypedNode &Value) {
-      return Seen.contains(Value);
+      assert(Value.getMemoizationData());
+      bool found = FragileLazySeenCache.contains(&Value);
+      while (!found && ItemsProcessed < Items.size()) {
+        found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second;
+        ++ItemsProcessed;
+      }
+      return found;
     }
     void push_back(const DynTypedNode &Value) {
-      if (!Value.getMemoizationData() || Seen.insert(Value).second)
+      if (!Value.getMemoizationData() || !contains(Value)) {
+        const size_t OldCapacity = Items.capacity();
         Items.push_back(Value);
+        if (OldCapacity != Items.capacity()) {
+          // Pointers are invalidated; remove them.
+          ItemsProcessed = 0;
+          // Free memory to avoid doubling peak memory usage during rehashing
+          FragileLazySeenCache.clear();
+        }
+      }
     }
     llvm::ArrayRef<DynTypedNode> view() const { return Items; }
   private:
+    // BE CAREFUL. Pointers into this container are stored in the cache.
     llvm::SmallVector<DynTypedNode, 2> Items;
-    llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+    // This cache is fragile because it contains pointers that are invalidated
+    // when the vector capacity changes.
+    llvm::SmallDenseSet<const DynTypedNode *, 2,
+                        IndirectDenseMapInfo<const DynTypedNode>>
+        FragileLazySeenCache;
+    // Lazily tracks which items have been processed for the cache.
+    size_t ItemsProcessed = 0;
   };
 
   /// Maps from a node to its parents. This is used for nodes that have



More information about the cfe-commits mailing list