[clang] Reduce memory usage in AST parent map generation by partially reverting quadratic slowdown mitigation (PR #129934)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 5 14:09:10 PST 2025
https://github.com/higher-performance updated https://github.com/llvm/llvm-project/pull/129934
>From 9f12c5856691268177e748d2a98de24ee30bdfd1 Mon Sep 17 00:00:00 2001
From: higher-performance <higher.performance.github at gmail.com>
Date: Wed, 5 Mar 2025 15:49:06 -0500
Subject: [PATCH] Reduce memory usage in AST parent map generation by lazily
checking if nodes have been seen
---
clang/lib/AST/ParentMapContext.cpp | 50 ++++++++++++++++++++++++++++--
1 file changed, 47 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index e9387ec79c373..b7156b892b01a 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -60,6 +60,29 @@ class ParentMapContext::ParentMap {
template <typename, typename...> friend struct ::MatchParents;
+ template <class T> struct IndirectDenseMapInfo {
+ using Ptr = T *;
+ using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>;
+ static inline Ptr getEmptyKey() {
+ return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey());
+ }
+ static inline Ptr getTombstoneKey() {
+ return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey());
+ }
+ static unsigned getHashValue(Ptr Val) {
+ return Val == getEmptyKey() || Val == getTombstoneKey()
+ ? 0
+ : Base::getHashValue(*Val);
+ }
+ static bool isEqual(Ptr LHS, Ptr RHS) {
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey()) {
+ return LHS == RHS;
+ }
+ return Base::isEqual(*LHS, *RHS);
+ }
+ };
+
/// Contains parents of a node.
class ParentVector {
public:
@@ -70,16 +93,37 @@ class ParentMapContext::ParentMap {
push_back(Value);
}
bool contains(const DynTypedNode &Value) {
- return Seen.contains(Value);
+ assert(Value.getMemoizationData());
+ bool found = FragileLazySeenCache.contains(&Value);
+ while (!found && ItemsProcessed < Items.size()) {
+ found |= FragileLazySeenCache.insert(&Items[ItemsProcessed]).second;
+ ++ItemsProcessed;
+ }
+ return found;
}
void push_back(const DynTypedNode &Value) {
- if (!Value.getMemoizationData() || Seen.insert(Value).second)
+ if (!Value.getMemoizationData() || !contains(Value)) {
+ const size_t OldCapacity = Items.capacity();
Items.push_back(Value);
+ if (OldCapacity != Items.capacity()) {
+ // Pointers are invalidated; remove them.
+ ItemsProcessed = 0;
+ // Free memory to avoid doubling peak memory usage during rehashing
+ FragileLazySeenCache.clear();
+ }
+ }
}
llvm::ArrayRef<DynTypedNode> view() const { return Items; }
private:
+ // BE CAREFUL. Pointers into this container are stored in the cache.
llvm::SmallVector<DynTypedNode, 2> Items;
- llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+ // This cache is fragile because it contains pointers that are invalidated
+ // when the vector capacity changes.
+ llvm::SmallDenseSet<const DynTypedNode *, 2,
+ IndirectDenseMapInfo<const DynTypedNode>>
+ FragileLazySeenCache;
+ // Lazily tracks which items have been processed for the cache.
+ size_t ItemsProcessed = 0;
};
/// Maps from a node to its parents. This is used for nodes that have
More information about the cfe-commits
mailing list