[clang] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation (PR #129934)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 5 13:59:08 PST 2025
https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934
>From e99bb267a950f75ea4fc23454762a7422c776bca Mon Sep 17 00:00:00 2001
From: higher-performance <higher.performance.github at gmail.com>
Date: Wed, 5 Mar 2025 15:49:06 -0500
Subject: [PATCH] Reduce memory usage in AST parent map generation by partially
reverting quadratic slowdown mitigation
The use of parent maps (hasParent(), hasAncestor(), etc.) in Clang AST matchers is no longer guaranteed to avoid quadratic slowdown, but will only do so in pathological cases.
---
clang/lib/AST/ParentMapContext.cpp | 50 ++++++++++++++++++++++++++++--
1 file changed, 47 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..b7156b892b01a 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -60,6 +60,29 @@ class ParentMapContext::ParentMap {
template <typename, typename...> friend struct ::MatchParents;
+ template <class T> struct IndirectDenseMapInfo {
+ using Ptr = T *;
+ using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>;
+ static inline Ptr getEmptyKey() {
+ return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey());
+ }
+ static inline Ptr getTombstoneKey() {
+ return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey());
+ }
+ static unsigned getHashValue(Ptr Val) {
+ return Val == getEmptyKey() || Val == getTombstoneKey()
+ ? 0
+ : Base::getHashValue(*Val);
+ }
+ static bool isEqual(Ptr LHS, Ptr RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey()) {
+ return LHS == RHS;
+ }
+ return Base::isEqual(*LHS, *RHS);
+ }
+ };
+
/// Contains parents of a node.
class ParentVector {
public:
@@ -70,16 +93,37 @@ class ParentMapContext::ParentMap {
push_back(Value);
}
bool contains(const DynTypedNode &Value) {
- return Seen.contains(Value);
+ assert(Value.getMemoizationData());
+ bool found = FragileLazySeenCache.contains(&Value);
+ while (!found && ItemsProcessed < Items.size()) {
+ found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second;
+ ++ItemsProcessed;
+ }
+ return found;
}
void push_back(const DynTypedNode &Value) {
- if (!Value.getMemoizationData() || Seen.insert(Value).second)
+ if (!Value.getMemoizationData() || !contains(Value)) {
+ const size_t OldCapacity = Items.capacity();
Items.push_back(Value);
+ if (OldCapacity != Items.capacity()) {
+ // Pointers are invalidated; remove them.
+ ItemsProcessed = 0;
+ // Free memory to avoid doubling peak memory usage during rehashing
+ FragileLazySeenCache.clear();
+ }
+ }
}
llvm::ArrayRef<DynTypedNode> view() const { return Items; }
private:
+ // BE CAREFUL. Pointers into this container are stored in the cache.
llvm::SmallVector<DynTypedNode, 2> Items;
- llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+ // This cache is fragile because it contains pointers that are invalidated
+ // when the vector capacity changes.
+ llvm::SmallDenseSet<const DynTypedNode *, 2,
+ IndirectDenseMapInfo<const DynTypedNode>>
+ FragileLazySeenCache;
+ // Lazily tracks which items have been processed for the cache.
+ size_t ItemsProcessed = 0;
};
/// Maps from a node to its parents. This is used for nodes that have
More information about the cfe-commits
mailing list