[llvm-branch-commits] [clang] release/20.x: Reduce memory usage in AST parent map generation by lazily checking if nodes have been seen (#129934) (PR #131209)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Mar 17 14:47:24 PDT 2025


https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/131209

>From 0b23d98dceaa9f965bfa196a6adfa38b1b8bda8e Mon Sep 17 00:00:00 2001
From: higher-performance <higher.performance.github at gmail.com>
Date: Thu, 13 Mar 2025 16:02:39 -0400
Subject: [PATCH] Reduce memory usage in AST parent map generation by lazily
 checking if nodes have been seen (#129934)

This mitigates a regression introduced in #87824.

The mitigation here is to store pointers the deduplicated AST nodes, rather than copies of the nodes themselves. This allows a pointer-optimized set to be used and saves a lot of memory because `clang::DynTypedNode` is ~5 times larger than a pointer.

Fixes #129808.

(cherry picked from commit 8c7f0eaa6ee3f84e3d8260535cced234bed4fa28)
---
 clang/lib/AST/ParentMapContext.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index 7ff492443031d..d8dd352c42d6b 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ParentMapContext.h"
-#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/TemplateBase.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace clang;
 
@@ -69,17 +70,21 @@ class ParentMapContext::ParentMap {
       for (; N > 0; --N)
         push_back(Value);
     }
-    bool contains(const DynTypedNode &Value) {
-      return Seen.contains(Value);
+    bool contains(const DynTypedNode &Value) const {
+      const void *Identity = Value.getMemoizationData();
+      assert(Identity);
+      return Dedup.contains(Identity);
     }
     void push_back(const DynTypedNode &Value) {
-      if (!Value.getMemoizationData() || Seen.insert(Value).second)
+      const void *Identity = Value.getMemoizationData();
+      if (!Identity || Dedup.insert(Identity).second) {
         Items.push_back(Value);
+      }
     }
     llvm::ArrayRef<DynTypedNode> view() const { return Items; }
   private:
-    llvm::SmallVector<DynTypedNode, 2> Items;
-    llvm::SmallDenseSet<DynTypedNode, 2> Seen;
+    llvm::SmallVector<DynTypedNode, 1> Items;
+    llvm::SmallPtrSet<const void *, 2> Dedup;
   };
 
   /// Maps from a node to its parents. This is used for nodes that have



More information about the llvm-branch-commits mailing list