[llvm] bb4121e - [Coroutines] Add an O(n) algorithm for computing the cross suspend point

Chuanqi Xu via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 27 02:26:29 PDT 2023


Author: witstorm95
Date: 2023-07-27T17:25:32+08:00
New Revision: bb4121e65251275b5b16a63423c2bb2be79aeebb

URL: https://github.com/llvm/llvm-project/commit/bb4121e65251275b5b16a63423c2bb2be79aeebb
DIFF: https://github.com/llvm/llvm-project/commit/bb4121e65251275b5b16a63423c2bb2be79aeebb.diff

LOG: [Coroutines] Add an O(n) algorithm for computing the cross suspend point
information.

Fixed https://github.com/llvm/llvm-project/issues/62348

Propagate cross suspend point information by visiting CFG.

Just only go through two times at most, you can get all the cross
suspend point information.

Before the patch:

```
n: 20000
4.31user 0.11system 0:04.44elapsed 99%CPU (0avgtext+0avgdata
552352maxresident)k
0inputs+8848outputs (0major+126254minor)pagefaults 0swaps

n: 40000
11.24user 0.40system 0:11.66elapsed 99%CPU (0avgtext+0avgdata
1788404maxresident)k
0inputs+17600outputs (0major+431105minor)pagefaults 0swaps

n: 60000
21.65user 0.96system 0:22.62elapsed 99%CPU (0avgtext+0avgdata
3809836maxresident)k
0inputs+26352outputs (0major+934749minor)pagefaults 0swaps

n: 80000
37.05user 1.53system 0:38.58elapsed 99%CPU (0avgtext+0avgdata
6602396maxresident)k
0inputs+35096outputs (0major+1622584minor)pagefaults 0swaps

n: 100000
51.87user 2.67system 0:54.54elapsed 99%CPU (0avgtext+0avgdata
10210736maxresident)k
0inputs+43848outputs (0major+2518945minor)pagefaults 0swaps

```
After the patch:

```
n: 20000
3.17user 0.16system 0:03.33elapsed 100%CPU (0avgtext+0avgdata
551736maxresident)k
0inputs+8848outputs (0major+126192minor)pagefaults 0swaps

n: 40000
6.10user 0.42system 0:06.54elapsed 99%CPU (0avgtext+0avgdata
1787848maxresident)k
0inputs+17600outputs (0major+432212minor)pagefaults 0swaps

n: 60000
9.13user 0.89system 0:10.03elapsed 99%CPU (0avgtext+0avgdata
3809108maxresident)k
0inputs+26352outputs (0major+931280minor)pagefaults 0swaps

n: 80000
12.44user 1.57system 0:14.02elapsed 99%CPU (0avgtext+0avgdata
6603432maxresident)k
0inputs+35096outputs (0major+1624635minor)pagefaults 0swaps

n: 100000
16.29user 2.28system 0:18.59elapsed 99%CPU (0avgtext+0avgdata
10212808maxresident)k
0inputs+43848outputs (0major+2522200minor)pagefaults 0swaps

```

Added: 
    

Modified: 
    llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 1f373270f951ba..85a61c430a11f6 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -98,7 +98,6 @@ class SuspendCrossingInfo {
     bool Suspend = false;
     bool End = false;
     bool KillLoop = false;
-    bool Changed = false;
   };
   SmallVector<BlockData, SmallVectorThreshold> Block;
 
@@ -106,16 +105,50 @@ class SuspendCrossingInfo {
     BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
     return llvm::predecessors(BB);
   }
+  size_t pred_size(BlockData const &BD) const {
+    BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
+    return llvm::pred_size(BB);
+  }
+  iterator_range<succ_iterator> successors(BlockData const &BD) const {
+    BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
+    return llvm::successors(BB);
+  }
 
   BlockData &getBlockData(BasicBlock *BB) {
     return Block[Mapping.blockToIndex(BB)];
   }
 
-  /// Compute the BlockData for the current function in one iteration.
-  /// Returns whether the BlockData changes in this iteration.
-  /// Initialize - Whether this is the first iteration, we can optimize
-  /// the initial case a little bit by manual loop switch.
-  template <bool Initialize = false> bool computeBlockData();
+  /// This algorithm is based on topological sorting. As we know, topological
+  /// sorting is typically used on Directed Acyclic Graph (DAG). However, a
+  /// Control Flow Graph (CFG) may not always be a DAG, as it can contain back
+  /// edges or loops. To handle this, we need to break the back edge when we
+  /// encounter it in order to ensure a valid topological sorting.
+  /// Why do we need an extra traversal when a CFG contains a back edge?
+  /// Firstly, we need to figure out how the Consumes information propagates
+  /// along the back edge. For example,
+  ///
+  ///    A -> B -> C -> D -> H
+  ///         ^         |
+  ///         |         v
+  ///         G <- F <- E
+  ///
+  /// Following the direction of the arrow, we can obtain the traversal
+  /// sequences: A, B, C, D, H, E, F, G or A, B, C, D, E, H, F, G. We know that
+  /// there is a path from C to G after the first traversal. However, we are
+  /// uncertain about the existence of a path from G to C, as the Consumes info
+  /// of G has not yet propagated to C (via B). Therefore, we need a second
+  /// traversal to propagate G's Consumes info to C (via B) and its successors.
+  /// The second traversal allows us to obtain the complete Consumes info. Since
+  /// the computation of the Kills info depends on the Consumes info.
+
+  /// The parameter "EntryNo" represents the index associated with the entry
+  /// block.
+  /// The parameter "BlockPredecessorsNum" represents the number of predecessors
+  /// for each block.
+  /// Returns true if there exists back edges in CFG.
+  template <bool HasBackEdge = false>
+  bool collectConsumeKillInfo(size_t EntryNo,
+                              const SmallVector<size_t> &BlockPredecessorsNum);
 
 public:
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -223,70 +256,115 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
 }
 #endif
 
-template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
-  const size_t N = Mapping.size();
-  bool Changed = false;
-
-  for (size_t I = 0; I < N; ++I) {
-    auto &B = Block[I];
+template <bool HasBackEdge>
+bool SuspendCrossingInfo::collectConsumeKillInfo(
+    size_t EntryNo, const SmallVector<size_t> &BlockPredecessorsNum) {
+  bool FoundBackEdge = false;
+  SmallVector<size_t> UnvisitedBlockPredNum = BlockPredecessorsNum;
+  // BlockNo Queue with BlockPredNum[BlockNo] equal to zero.
+  std::queue<size_t> CandidateQueue;
+  // For blocks that maybe has a back edge.
+  DenseSet<size_t> MaybeBackEdgeSet;
+  // Visit BlockNo
+  auto visit = [&](size_t BlockNo) {
+    switch (UnvisitedBlockPredNum[BlockNo]) {
+    // Already visited, not visit again.
+    case 0:
+      break;
+    // If predecessors number of BlockNo is 1, it means all predecessors of
+    // BlockNo have propagated its info to BlockNo. So add BlockNo to
+    // CandidateQueue.
+    case 1: {
+      CandidateQueue.push(BlockNo);
+      MaybeBackEdgeSet.erase(BlockNo);
+      UnvisitedBlockPredNum[BlockNo] = 0;
+      break;
+    }
+    // If predecessors number of BlockNo bigger than 1, it means BlockNo not
+    // collect full Consumes/Kills info yet. So decrease
+    // UnvisitedBlockPredNum[BlockNo] and insert BlockNo into MaybeBackEdgeSet.
+    default: {
+      UnvisitedBlockPredNum[BlockNo]--;
+      MaybeBackEdgeSet.insert(BlockNo);
+      break;
+    }
+    }
+  };
 
-    // We don't need to count the predecessors when initialization.
-    if constexpr (!Initialize)
-      // If all the predecessors of the current Block don't change,
-      // the BlockData for the current block must not change too.
-      if (all_of(predecessors(B), [this](BasicBlock *BB) {
-            return !Block[Mapping.blockToIndex(BB)].Changed;
-          })) {
-        B.Changed = false;
-        continue;
+  CandidateQueue.push(EntryNo);
+
+  // Topological sorting.
+  while (!CandidateQueue.empty()) {
+    auto &B = Block[CandidateQueue.front()];
+    CandidateQueue.pop();
+    for (BasicBlock *SI : successors(B)) {
+      auto SuccNo = Mapping.blockToIndex(SI);
+      auto &S = Block[SuccNo];
+
+      // Propagate Kills and Consumes from predecessors into S.
+      S.Consumes |= B.Consumes;
+      S.Kills |= B.Kills;
+
+      if (B.Suspend)
+        S.Kills |= B.Consumes;
+
+      if (S.Suspend) {
+        // If block S is a suspend block, it should kill all of the blocks
+        // it consumes.
+        S.Kills |= S.Consumes;
+      } else if (S.End) {
+        // If block S is an end block, it should not propagate kills as the
+        // blocks following coro.end() are reached during initial invocation
+        // of the coroutine while all the data are still available on the
+        // stack or in the registers.
+        S.Kills.reset();
+      } else {
+        // This is reached when S block it not Suspend nor coro.end and it
+        // need to make sure that it is not in the kill set.
+        S.KillLoop |= S.Kills[SuccNo];
+        S.Kills.reset(SuccNo);
       }
-
-    // Saved Consumes and Kills bitsets so that it is easy to see
-    // if anything changed after propagation.
-    auto SavedConsumes = B.Consumes;
-    auto SavedKills = B.Kills;
-
-    for (BasicBlock *PI : predecessors(B)) {
-      auto PrevNo = Mapping.blockToIndex(PI);
-      auto &P = Block[PrevNo];
-
-      // Propagate Kills and Consumes from predecessors into B.
-      B.Consumes |= P.Consumes;
-      B.Kills |= P.Kills;
-
-      // If block P is a suspend block, it should propagate kills into block
-      // B for every block P consumes.
-      if (P.Suspend)
-        B.Kills |= P.Consumes;
+      // visit SuccNo.
+      visit(SuccNo);
     }
 
-    if (B.Suspend) {
-      // If block S is a suspend block, it should kill all of the blocks it
-      // consumes.
-      B.Kills |= B.Consumes;
-    } else if (B.End) {
-      // If block B is an end block, it should not propagate kills as the
-      // blocks following coro.end() are reached during initial invocation
-      // of the coroutine while all the data are still available on the
-      // stack or in the registers.
-      B.Kills.reset();
-    } else {
-      // This is reached when B block it not Suspend nor coro.end and it
-      // need to make sure that it is not in the kill set.
-      B.KillLoop |= B.Kills[I];
-      B.Kills.reset(I);
-    }
+    // If the CandidateQueue is empty but the MaybeBackEdgeSet is not, it
+    // indicates the presence of a back edge that needs to be addressed. In such
+    // cases, it is necessary to break the back edge.
+    if (CandidateQueue.empty() && !MaybeBackEdgeSet.empty()) {
+      FoundBackEdge = true;
+      size_t CandidateNo = -1;
+      if constexpr (HasBackEdge) {
+        auto IsCandidate = [this](size_t I) {
+          for (BasicBlock *PI : llvm::predecessors(Mapping.indexToBlock(I))) {
+            auto PredNo = Mapping.blockToIndex(PI);
+            auto &P = Block[PredNo];
+            // The node I can reach its predecessor. So we found a loop.
+            if (P.Consumes[I])
+              return true;
+          }
+
+          return false;
+        };
 
-    if constexpr (!Initialize) {
-      B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
-      Changed |= B.Changed;
+        for (auto I : MaybeBackEdgeSet) {
+          if (IsCandidate(I)) {
+            CandidateNo = I;
+            break;
+          }
+        }
+        assert(CandidateNo != size_t(-1) && "We collected the wrong backegdes");
+      } else
+        // When the value of HasBackEdge is false and we don't have any
+        // information about back edges, we can simply select one block from the
+        // MaybeBackEdgeSet.
+        CandidateNo = *(MaybeBackEdgeSet.begin());
+      CandidateQueue.push(CandidateNo);
+      MaybeBackEdgeSet.erase(CandidateNo);
+      UnvisitedBlockPredNum[CandidateNo] = 0;
     }
   }
-
-  if constexpr (Initialize)
-    return true;
-
-  return Changed;
+  return FoundBackEdge;
 }
 
 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
@@ -294,13 +372,16 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
   const size_t N = Mapping.size();
   Block.resize(N);
 
+  size_t EntryNo = Mapping.blockToIndex(&(F.getEntryBlock()));
+  SmallVector<size_t> BlockPredecessorsNum(N, 0);
+
   // Initialize every block so that it consumes itself
   for (size_t I = 0; I < N; ++I) {
     auto &B = Block[I];
     B.Consumes.resize(N);
     B.Kills.resize(N);
     B.Consumes.set(I);
-    B.Changed = true;
+    BlockPredecessorsNum[I] = pred_size(B);
   }
 
   // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
@@ -325,10 +406,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
       markSuspendBlock(Save);
   }
 
-  computeBlockData</*Initialize=*/true>();
-
-  while (computeBlockData())
-    ;
+  // We should collect the Consumes and Kills information initially. If there is
+  // a back edge present, it is necessary to perform the collection process
+  // again.
+  if (collectConsumeKillInfo(EntryNo, BlockPredecessorsNum))
+    collectConsumeKillInfo</*HasBackEdge*/ true>(EntryNo, BlockPredecessorsNum);
 
   LLVM_DEBUG(dump());
 }


        


More information about the llvm-commits mailing list