[clang] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation (PR #129934)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 5 12:54:46 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (higher-performance)
<details>
<summary>Changes</summary>
This is a regression in #<!-- -->87824.
With this change, the use of parent maps (`hasParent()`, `hasAncestor()`, etc.) in Clang AST matchers is no longer _guaranteed_ to avoid quadratic slowdown, but in practice it should do so more frequently. I tested against a translation unit that had been slow in the past, and it worked fine on that. If we hit those pathological cases in the future, we can evaluate other approaches.
Fixes #<!-- -->129808
---
Full diff: https://github.com/llvm/llvm-project/pull/129934.diff
1 Files Affected:
- (modified) clang/lib/AST/ParentMapContext.cpp (+65-8)
``````````diff
diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..03e5236eeb1db 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -65,21 +65,78 @@ class ParentMapContext::ParentMap {
public:
ParentVector() = default;
explicit ParentVector(size_t N, const DynTypedNode &Value) {
- Items.reserve(N);
+ SortedAndUnsortedItems.reserve(N);
for (; N > 0; --N)
push_back(Value);
}
bool contains(const DynTypedNode &Value) {
- return Seen.contains(Value);
+ const auto SortBoundary = SortedAndUnsortedItems.begin() + NumSorted;
+ bool Found = std::binary_search(SortedAndUnsortedItems.begin(),
+ SortBoundary, Value);
+ Budget += llvm::bit_width(
+ static_cast<size_t>(SortBoundary - SortedAndUnsortedItems.begin()));
+ if (!Found) {
+ auto FoundIt =
+ std::find(SortBoundary, SortedAndUnsortedItems.end(), Value);
+ Budget += FoundIt - SortBoundary;
+ Found |= FoundIt != SortedAndUnsortedItems.end();
+ }
+ SortIfWorthwhile();
+ return Found;
}
void push_back(const DynTypedNode &Value) {
- if (!Value.getMemoizationData() || Seen.insert(Value).second)
- Items.push_back(Value);
+ ++Budget;
+ if (!Value.getMemoizationData() || !contains(Value)) {
+ SortedAndUnsortedItems.push_back(Value);
+ if (SortedAndUnsortedItems.back() < SortedAndUnsortedItems[NumSorted]) {
+ // Keep the minimum element in the middle to quickly tell us if
+ // merging will be necessary
+ using std::swap;
+ swap(SortedAndUnsortedItems.back(),
+ SortedAndUnsortedItems[NumSorted]);
+ }
+ }
+ VerifyInvariant();
}
- llvm::ArrayRef<DynTypedNode> view() const { return Items; }
+ llvm::ArrayRef<DynTypedNode> view() {
+ ++Budget;
+ return SortedAndUnsortedItems;
+ }
+
private:
- llvm::SmallVector<DynTypedNode, 2> Items;
- llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+ void SortIfWorthwhile() {
+ VerifyInvariant();
+ auto SortBoundary = SortedAndUnsortedItems.begin() + NumSorted;
+ if (SortBoundary != SortedAndUnsortedItems.end()) {
+ const size_t NumUnsorted = SortedAndUnsortedItems.end() - SortBoundary;
+ const size_t SortingCost = NumUnsorted * llvm::bit_width(NumUnsorted);
+ const bool NeedMerge = SortBoundary != SortedAndUnsortedItems.begin();
+ // Assume that the naive implementation would copy these elements.
+ // This is just an estimate; it's OK if it's wrong.
+ const size_t MergeCost = SortedAndUnsortedItems.size() + NumUnsorted;
+ if (Budget >= (NeedMerge ? MergeCost : 0) + SortingCost) {
+ std::sort(SortBoundary, SortedAndUnsortedItems.end());
+ if (NeedMerge) {
+ std::inplace_merge(SortedAndUnsortedItems.begin(), SortBoundary,
+ SortedAndUnsortedItems.end());
+ }
+ Budget = 0;
+ NumSorted = SortedAndUnsortedItems.size();
+ }
+ }
+ }
+
+ void VerifyInvariant() const {
+ assert(
+ !(NumSorted < SortedAndUnsortedItems.size() &&
+ SortedAndUnsortedItems.back() <
+ SortedAndUnsortedItems[NumSorted]) &&
+ "the boundary item must always be the minimum of the unsorted items");
+ }
+
+ llvm::SmallVector<DynTypedNode, 2> SortedAndUnsortedItems;
+ size_t NumSorted = 0;
+ int64_t Budget = 0;
};
/// Maps from a node to its parents. This is used for nodes that have
@@ -117,7 +174,7 @@ class ParentMapContext::ParentMap {
if (I == Map.end()) {
return llvm::ArrayRef<DynTypedNode>();
}
- if (const auto *V = dyn_cast<ParentVector *>(I->second)) {
+ if (auto *V = dyn_cast<ParentVector *>(I->second)) {
return V->view();
}
return getSingleDynTypedNodeFromParentMap(I->second);
``````````
</details>
https://github.com/llvm/llvm-project/pull/129934
More information about the cfe-commits
mailing list