[clang] 6257618 - [ASTMatchers] Avoid recursion in ancestor matching to save stack space.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 22 10:44:02 PDT 2020


Author: Sam McCall
Date: 2020-09-22T19:43:41+02:00
New Revision: 625761825620f19a44c7a1482ce05d678a1b0deb

URL: https://github.com/llvm/llvm-project/commit/625761825620f19a44c7a1482ce05d678a1b0deb
DIFF: https://github.com/llvm/llvm-project/commit/625761825620f19a44c7a1482ce05d678a1b0deb.diff

LOG: [ASTMatchers] Avoid recursion in ancestor matching to save stack space.

A recent change increased the stack size of memoizedMatchesAncestorOfRecursively
leading to stack overflows on real code involving large fold expressions.
It's not totally unreasonable to choke on very deep ASTs, but as common
infrastructure it's be nice if ASTMatchFinder is more robust.
(It already uses data recursion for the regular "downward" traversal.)

Differential Revision: https://reviews.llvm.org/D86964

Added: 
    

Modified: 
    clang/lib/ASTMatchers/ASTMatchFinder.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index dc937dde88e3..b9b38923495a 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -544,8 +544,9 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
     // don't invalidate any iterators.
     if (ResultCache.size() > MaxMemoizationEntries)
       ResultCache.clear();
-    return memoizedMatchesAncestorOfRecursively(Node, Ctx, Matcher, Builder,
-                                                MatchMode);
+    if (MatchMode == AncestorMatchMode::AMM_ParentOnly)
+      return matchesParentOf(Node, Matcher, Builder);
+    return matchesAnyAncestorOf(Node, Ctx, Matcher, Builder);
   }
 
   // Matches all registered matchers on the given node and calls the
@@ -699,9 +700,23 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
   void matchDispatch(const void *) { /* Do nothing. */ }
   /// @}
 
+  // Returns whether a direct parent of \p Node matches \p Matcher.
+  // Unlike matchesAnyAncestorOf there's no memoization: it doesn't save much.
+  bool matchesParentOf(const DynTypedNode &Node, const DynTypedMatcher &Matcher,
+                       BoundNodesTreeBuilder *Builder) {
+    for (const auto &Parent : ActiveASTContext->getParents(Node)) {
+      BoundNodesTreeBuilder BuilderCopy = *Builder;
+      if (Matcher.matches(Parent, this, &BuilderCopy)) {
+        *Builder = std::move(BuilderCopy);
+        return true;
+      }
+    }
+    return false;
+  }
+
   // Returns whether an ancestor of \p Node matches \p Matcher.
   //
-  // The order of matching ((which can lead to 
diff erent nodes being bound in
+  // The order of matching (which can lead to 
diff erent nodes being bound in
   // case there are multiple matches) is breadth first search.
   //
   // To allow memoization in the very common case of having deeply nested
@@ -712,51 +727,64 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
   // Once there are multiple parents, the breadth first search order does not
   // allow simple memoization on the ancestors. Thus, we only memoize as long
   // as there is a single parent.
-  bool memoizedMatchesAncestorOfRecursively(const DynTypedNode &Node,
-                                            ASTContext &Ctx,
-                                            const DynTypedMatcher &Matcher,
-                                            BoundNodesTreeBuilder *Builder,
-                                            AncestorMatchMode MatchMode) {
-    // For AST-nodes that don't have an identity, we can't memoize.
-    // When doing a single-level match, we don't need to memoize because
-    // ParentMap (in ASTContext) already memoizes the result.
-    if (!Builder->isComparable() ||
-        MatchMode == AncestorMatchMode::AMM_ParentOnly)
-      return matchesAncestorOfRecursively(Node, Ctx, Matcher, Builder,
-                                          MatchMode);
-
-    MatchKey Key;
-    Key.MatcherID = Matcher.getID();
-    Key.Node = Node;
-    Key.BoundNodes = *Builder;
-    Key.Traversal = Ctx.getParentMapContext().getTraversalKind();
-    Key.Type = MatchType::Ancestors;
-
-    // Note that we cannot use insert and reuse the iterator, as recursive
-    // calls to match might invalidate the result cache iterators.
-    MemoizationMap::iterator I = ResultCache.find(Key);
-    if (I != ResultCache.end()) {
-      *Builder = I->second.Nodes;
-      return I->second.ResultOfMatch;
-    }
+  //
+  // We avoid a recursive implementation to prevent excessive stack use on
+  // very deep ASTs (similarly to RecursiveASTVisitor's data recursion).
+  bool matchesAnyAncestorOf(DynTypedNode Node, ASTContext &Ctx,
+                            const DynTypedMatcher &Matcher,
+                            BoundNodesTreeBuilder *Builder) {
 
-    MemoizedMatchResult Result;
-    Result.Nodes = *Builder;
-    Result.ResultOfMatch = matchesAncestorOfRecursively(
-        Node, Ctx, Matcher, &Result.Nodes, MatchMode);
+    // Memoization keys that can be updated with the result.
+    // These are the memoizable nodes in the chain of unique parents, which
+    // terminates when a node has multiple parents, or matches, or is the root.
+    std::vector<MatchKey> Keys;
+    // When returning, update the memoization cache.
+    auto Finish = [&](bool Matched) {
+      for (const auto &Key : Keys) {
+        MemoizedMatchResult &CachedResult = ResultCache[Key];
+        CachedResult.ResultOfMatch = Matched;
+        CachedResult.Nodes = *Builder;
+      }
+      return Matched;
+    };
+
+    // Loop while there's a single parent and we want to attempt memoization.
+    DynTypedNodeList Parents{ArrayRef<DynTypedNode>()}; // after loop: size != 1
+    for (;;) {
+      // A cache key only makes sense if memoization is possible.
+      if (Builder->isComparable()) {
+        Keys.emplace_back();
+        Keys.back().MatcherID = Matcher.getID();
+        Keys.back().Node = Node;
+        Keys.back().BoundNodes = *Builder;
+        Keys.back().Traversal = Ctx.getParentMapContext().getTraversalKind();
+        Keys.back().Type = MatchType::Ancestors;
+
+        // Check the cache.
+        MemoizationMap::iterator I = ResultCache.find(Keys.back());
+        if (I != ResultCache.end()) {
+          Keys.pop_back(); // Don't populate the cache for the matching node!
+          *Builder = I->second.Nodes;
+          return Finish(I->second.ResultOfMatch);
+        }
+      }
 
-    MemoizedMatchResult &CachedResult = ResultCache[Key];
-    CachedResult = std::move(Result);
+      Parents = ActiveASTContext->getParents(Node);
+      // Either no parents or multiple parents: leave chain+memoize mode and
+      // enter bfs+forgetful mode.
+      if (Parents.size() != 1)
+        break;
 
-    *Builder = CachedResult.Nodes;
-    return CachedResult.ResultOfMatch;
-  }
+      // Check the next parent.
+      Node = *Parents.begin();
+      BoundNodesTreeBuilder BuilderCopy = *Builder;
+      if (Matcher.matches(Node, this, &BuilderCopy)) {
+        *Builder = std::move(BuilderCopy);
+        return Finish(true);
+      }
+    }
+    // We reached the end of the chain.
 
-  bool matchesAncestorOfRecursively(const DynTypedNode &Node, ASTContext &Ctx,
-                                    const DynTypedMatcher &Matcher,
-                                    BoundNodesTreeBuilder *Builder,
-                                    AncestorMatchMode MatchMode) {
-    const auto &Parents = ActiveASTContext->getParents(Node);
     if (Parents.empty()) {
       // Nodes may have no parents if:
       //  a) the node is the TranslationUnitDecl
@@ -775,46 +803,30 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
         llvm_unreachable("Parent map should be complete!");
       }
 #endif
-      return false;
-    }
-    if (Parents.size() == 1) {
-      // Only one parent - do recursive memoization.
-      const DynTypedNode Parent = Parents[0];
-      BoundNodesTreeBuilder BuilderCopy = *Builder;
-      if (Matcher.matches(Parent, this, &BuilderCopy)) {
-        *Builder = std::move(BuilderCopy);
-        return true;
-      }
-      if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
-        return memoizedMatchesAncestorOfRecursively(Parent, Ctx, Matcher,
-                                                    Builder, MatchMode);
-        // Once we get back from the recursive call, the result will be the
-        // same as the parent's result.
-      }
     } else {
-      // Multiple parents - BFS over the rest of the nodes.
-      llvm::DenseSet<const void *> Visited;
+      assert(Parents.size() > 1);
+      // BFS starting from the parents not yet considered.
+      // Memoization of newly visited nodes is not possible (but we still update
+      // results for the elements in the chain we found above).
       std::deque<DynTypedNode> Queue(Parents.begin(), Parents.end());
+      llvm::DenseSet<const void *> Visited;
       while (!Queue.empty()) {
         BoundNodesTreeBuilder BuilderCopy = *Builder;
         if (Matcher.matches(Queue.front(), this, &BuilderCopy)) {
           *Builder = std::move(BuilderCopy);
-          return true;
+          return Finish(true);
         }
-        if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
-          for (const auto &Parent :
-               ActiveASTContext->getParents(Queue.front())) {
-            // Make sure we do not visit the same node twice.
-            // Otherwise, we'll visit the common ancestors as often as there
-            // are splits on the way down.
-            if (Visited.insert(Parent.getMemoizationData()).second)
-              Queue.push_back(Parent);
-          }
+        for (const auto &Parent : ActiveASTContext->getParents(Queue.front())) {
+          // Make sure we do not visit the same node twice.
+          // Otherwise, we'll visit the common ancestors as often as there
+          // are splits on the way down.
+          if (Visited.insert(Parent.getMemoizationData()).second)
+            Queue.push_back(Parent);
         }
         Queue.pop_front();
       }
     }
-    return false;
+    return Finish(false);
   }
 
   // Implements a BoundNodesTree::Visitor that calls a MatchCallback with


        


More information about the cfe-commits mailing list