[llvm] 776e456 - [llvm-cov gcov] Make recursive propagateCounts iterative. NFC

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 11 11:22:20 PDT 2023


Author: Fangrui Song
Date: 2023-10-11T11:22:13-07:00
New Revision: 776e456dd883f92c9753e34d8ea403088f06de30

URL: https://github.com/llvm/llvm-project/commit/776e456dd883f92c9753e34d8ea403088f06de30
DIFF: https://github.com/llvm/llvm-project/commit/776e456dd883f92c9753e34d8ea403088f06de30.diff

LOG: [llvm-cov gcov] Make recursive propagateCounts iterative. NFC

propagateCounts computes unmeasured arc counts (see
commit b9d086693b5baebc477793af0d86a447bae01b6f).

In a x86-64 build using -O3 -fno-omit-frame-pointer, propagateCounts uses 80
bytes per stack frame. If a function contains 1e5 basic blocks on a tree path
(Kirchoff's circuit law optimization), the used stack space will be 8MB (default
ulimit -s in many configurations). (In a -O0 build, a stack frame costs 224
bytes.) 1e5 is ample for most configurations. However, for library users using
threads (e.g. in RPC handlers), a remaining thread stack of 64KiB allows just
819 stack frames, which is too limited.

Switch to an iterative form to avoid stack overflow issues. Iterative forms
match other iterative form functions in this file
(https://reviews.llvm.org/D93073).

Alternative to #68455

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/GCOV.h
    llvm/lib/ProfileData/GCOV.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h
index 674260c81fa6a24..c8af71dbf61ef1f 100644
--- a/llvm/include/llvm/ProfileData/GCOV.h
+++ b/llvm/include/llvm/ProfileData/GCOV.h
@@ -249,7 +249,7 @@ class GCOVFunction {
     return make_range(blocks.begin(), blocks.end());
   }
 
-  uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *pred);
+  void propagateCounts(const GCOVBlock &v, GCOVArc *pred);
   void print(raw_ostream &OS) const;
   void dump() const;
 

diff  --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index 0a3330fde1d100f..5fce3dd5f7b7007 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -365,25 +365,60 @@ GCOVBlock &GCOVFunction::getExitBlock() const {
 // For each basic block, the sum of incoming edge counts equals the sum of
 // outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a
 // spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be
-// uniquely identified.
-uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
-  // If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed; otherwise
-  // this prevents infinite recursion.
-  if (!visited.insert(&v).second)
-    return 0;
-
-  uint64_t excess = 0;
-  for (GCOVArc *e : v.srcs())
-    if (e != pred)
-      excess += e->onTree() ? propagateCounts(e->src, e) : e->count;
-  for (GCOVArc *e : v.dsts())
-    if (e != pred)
-      excess -= e->onTree() ? propagateCounts(e->dst, e) : e->count;
-  if (int64_t(excess) < 0)
-    excess = -excess;
-  if (pred)
-    pred->count = excess;
-  return excess;
+// uniquely identified. Use an iterative algorithm to decrease stack usage for
+// library users in threads. See the edge propagation algorithm in Optimally
+// Profiling and Tracing Programs, ACM Transactions on Programming Languages and
+// Systems, 1994.
+void GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) {
+  struct Elem {
+    const GCOVBlock &v;
+    GCOVArc *pred;
+    bool inDst;
+    size_t i = 0;
+    uint64_t excess = 0;
+  };
+
+  SmallVector<Elem, 0> stack;
+  stack.push_back({v, pred, false});
+  for (;;) {
+    Elem &u = stack.back();
+    // If GCOV_ARC_ON_TREE edges do form a tree, visited is not needed;
+    // otherwise, this prevents infinite recursion for bad input.
+    if (u.i == 0 && !visited.insert(&u.v).second) {
+      stack.pop_back();
+      if (stack.empty())
+        break;
+      continue;
+    }
+    if (u.i < u.v.pred.size()) {
+      GCOVArc *e = u.v.pred[u.i++];
+      if (e != u.pred) {
+        if (e->onTree())
+          stack.push_back({e->src, e, /*inDst=*/false});
+        else
+          u.excess += e->count;
+      }
+    } else if (u.i < u.v.pred.size() + u.v.succ.size()) {
+      GCOVArc *e = u.v.succ[u.i++ - u.v.pred.size()];
+      if (e != u.pred) {
+        if (e->onTree())
+          stack.push_back({e->dst, e, /*inDst=*/true});
+        else
+          u.excess -= e->count;
+      }
+    } else {
+      uint64_t excess = u.excess;
+      if (static_cast<int64_t>(excess) < 0)
+        excess = -excess;
+      if (u.pred)
+        u.pred->count = excess;
+      bool inDst = u.inDst;
+      stack.pop_back();
+      if (stack.empty())
+        break;
+      stack.back().excess += inDst ? -excess : excess;
+    }
+  }
 }
 
 void GCOVFunction::print(raw_ostream &OS) const {


        


More information about the llvm-commits mailing list