[clang] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation (PR #129934)

via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 5 12:54:46 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (higher-performance)

<details>
<summary>Changes</summary>

This is a regression in #<!-- -->87824.

With this change, the use of parent maps (`hasParent()`, `hasAncestor()`, etc.) in Clang AST matchers is no longer _guaranteed_ to avoid quadratic slowdown, but in practice it should do so more frequently. I tested against a translation unit that had been slow in the past, and it worked fine on that. If we hit those pathological cases in the future, we can evaluate other approaches.

Fixes #<!-- -->129808

---
Full diff: https://github.com/llvm/llvm-project/pull/129934.diff


1 Files Affected:

- (modified) clang/lib/AST/ParentMapContext.cpp (+65-8) 


``````````diff
diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..03e5236eeb1db 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -65,21 +65,78 @@ class ParentMapContext::ParentMap {
   public:
     ParentVector() = default;
     explicit ParentVector(size_t N, const DynTypedNode &Value) {
-      Items.reserve(N);
+      SortedAndUnsortedItems.reserve(N);
       for (; N > 0; --N)
         push_back(Value);
     }
     bool contains(const DynTypedNode &Value) {
-      return Seen.contains(Value);
+      const auto SortBoundary = SortedAndUnsortedItems.begin() + NumSorted;
+      bool Found = std::binary_search(SortedAndUnsortedItems.begin(),
+                                      SortBoundary, Value);
+      Budget += llvm::bit_width(
+          static_cast<size_t>(SortBoundary - SortedAndUnsortedItems.begin()));
+      if (!Found) {
+        auto FoundIt =
+            std::find(SortBoundary, SortedAndUnsortedItems.end(), Value);
+        Budget += FoundIt - SortBoundary;
+        Found |= FoundIt != SortedAndUnsortedItems.end();
+      }
+      SortIfWorthwhile();
+      return Found;
     }
     void push_back(const DynTypedNode &Value) {
-      if (!Value.getMemoizationData() || Seen.insert(Value).second)
-        Items.push_back(Value);
+      ++Budget;
+      if (!Value.getMemoizationData() || !contains(Value)) {
+        SortedAndUnsortedItems.push_back(Value);
+        if (SortedAndUnsortedItems.back() < SortedAndUnsortedItems[NumSorted]) {
+          // Keep the minimum element in the middle to quickly tell us if
+          // merging will be necessary
+          using std::swap;
+          swap(SortedAndUnsortedItems.back(),
+               SortedAndUnsortedItems[NumSorted]);
+        }
+      }
+      VerifyInvariant();
     }
-    llvm::ArrayRef<DynTypedNode> view() const { return Items; }
+    llvm::ArrayRef<DynTypedNode> view() {
+      ++Budget;
+      return SortedAndUnsortedItems;
+    }
+
   private:
-    llvm::SmallVector<DynTypedNode, 2> Items;
-    llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+    void SortIfWorthwhile() {
+      VerifyInvariant();
+      auto SortBoundary = SortedAndUnsortedItems.begin() + NumSorted;
+      if (SortBoundary != SortedAndUnsortedItems.end()) {
+        const size_t NumUnsorted = SortedAndUnsortedItems.end() - SortBoundary;
+        const size_t SortingCost = NumUnsorted * llvm::bit_width(NumUnsorted);
+        const bool NeedMerge = SortBoundary != SortedAndUnsortedItems.begin();
+        // Assume that the naive implementation would copy these elements.
+        // This is just an estimate; it's OK if it's wrong.
+        const size_t MergeCost = SortedAndUnsortedItems.size() + NumUnsorted;
+        if (Budget >= (NeedMerge ? MergeCost : 0) + SortingCost) {
+          std::sort(SortBoundary, SortedAndUnsortedItems.end());
+          if (NeedMerge) {
+            std::inplace_merge(SortedAndUnsortedItems.begin(), SortBoundary,
+                               SortedAndUnsortedItems.end());
+          }
+          Budget = 0;
+          NumSorted = SortedAndUnsortedItems.size();
+        }
+      }
+    }
+
+    void VerifyInvariant() const {
+      assert(
+          !(NumSorted < SortedAndUnsortedItems.size() &&
+            SortedAndUnsortedItems.back() <
+                SortedAndUnsortedItems[NumSorted]) &&
+          "the boundary item must always be the minimum of the unsorted items");
+    }
+
+    llvm::SmallVector<DynTypedNode, 2> SortedAndUnsortedItems;
+    size_t NumSorted = 0;
+    int64_t Budget = 0;
   };
 
   /// Maps from a node to its parents. This is used for nodes that have
@@ -117,7 +174,7 @@ class ParentMapContext::ParentMap {
     if (I == Map.end()) {
       return llvm::ArrayRef<DynTypedNode>();
     }
-    if (const auto *V = dyn_cast<ParentVector *>(I->second)) {
+    if (auto *V = dyn_cast<ParentVector *>(I->second)) {
       return V->view();
     }
     return getSingleDynTypedNodeFromParentMap(I->second);

``````````

</details>


https://github.com/llvm/llvm-project/pull/129934


More information about the cfe-commits mailing list