[llvm] 132bb5c - [NFC][Coroutines] Use a reverse post-order to guide the computation about cross suspend infomation to reach a fixed point faster.

Chuanqi Xu via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 8 20:23:30 PDT 2023


Author: witstorm95
Date: 2023-08-09T11:21:57+08:00
New Revision: 132bb5cc5fd5f91e6a25e8004ac5d6516518f31a

URL: https://github.com/llvm/llvm-project/commit/132bb5cc5fd5f91e6a25e8004ac5d6516518f31a
DIFF: https://github.com/llvm/llvm-project/commit/132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.diff

LOG: [NFC][Coroutines] Use a reverse post-order to guide the computation about cross suspend infomation to reach a fixed point faster.

Fixed https://github.com/llvm/llvm-project/issues/62348

Propagate cross suspend point information along reverse post-order.
It does not modify the original function, just selects a better
traversal order.

Before the patch:

```
n: 20000
4.31user 0.11system 0:04.44elapsed 99%CPU (0avgtext+0avgdata
552352maxresident)k
0inputs+8848outputs (0major+126254minor)pagefaults 0swaps

n: 40000
11.24user 0.40system 0:11.66elapsed 99%CPU (0avgtext+0avgdata
1788404maxresident)k
0inputs+17600outputs (0major+431105minor)pagefaults 0swaps

n: 60000
21.65user 0.96system 0:22.62elapsed 99%CPU (0avgtext+0avgdata
3809836maxresident)k
0inputs+26352outputs (0major+934749minor)pagefaults 0swaps

n: 80000
37.05user 1.53system 0:38.58elapsed 99%CPU (0avgtext+0avgdata
6602396maxresident)k
0inputs+35096outputs (0major+1622584minor)pagefaults 0swaps

n: 100000
51.87user 2.67system 0:54.54elapsed 99%CPU (0avgtext+0avgdata
10210736maxresident)k
0inputs+43848outputs (0major+2518945minor)pagefaults 0swaps

```
After the patch:

```
n: 20000
3.08user 0.12system 0:03.21elapsed 99%CPU (0avgtext+0avgdata
551012maxresident)k
0inputs+8848outputs (0major+129349minor)pagefaults 0swaps

n: 40000
5.88user 0.33system 0:06.22elapsed 99%CPU (0avgtext+0avgdata
1789248maxresident)k
0inputs+17600outputs (0major+435096minor)pagefaults 0swaps

n: 60000
8.84user 0.77system 0:09.63elapsed 99%CPU (0avgtext+0avgdata
3807800maxresident)k
0inputs+26352outputs (0major+939119minor)pagefaults 0swaps

n: 80000
11.64user 1.58system 0:13.23elapsed 99%CPU (0avgtext+0avgdata
6604708maxresident)k
0inputs+35096outputs (0major+1629566minor)pagefaults 0swaps

n: 100000
15.21user 2.56system 0:17.79elapsed 99%CPU (0avgtext+0avgdata
10208828maxresident)k
8inputs+43848outputs (0major+2526611minor)pagefaults 0swaps

```

Reviewed By: MatzeB, ChuanqiXu

Differential Revision: https://reviews.llvm.org/D156850

Added: 
    

Modified: 
    llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 1f373270f951ba..58115fafbdf8ea 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -63,7 +63,7 @@ class BlockToIndexMapping {
     llvm::sort(V);
   }
 
-  size_t blockToIndex(BasicBlock *BB) const {
+  size_t blockToIndex(BasicBlock const *BB) const {
     auto *I = llvm::lower_bound(V, BB);
     assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
     return I - V.begin();
@@ -112,10 +112,11 @@ class SuspendCrossingInfo {
   }
 
   /// Compute the BlockData for the current function in one iteration.
-  /// Returns whether the BlockData changes in this iteration.
   /// Initialize - Whether this is the first iteration, we can optimize
   /// the initial case a little bit by manual loop switch.
-  template <bool Initialize = false> bool computeBlockData();
+  /// Returns whether the BlockData changes in this iteration.
+  template <bool Initialize = false>
+  bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
 
 public:
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -223,12 +224,14 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
 }
 #endif
 
-template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
-  const size_t N = Mapping.size();
+template <bool Initialize>
+bool SuspendCrossingInfo::computeBlockData(
+    const ReversePostOrderTraversal<Function *> &RPOT) {
   bool Changed = false;
 
-  for (size_t I = 0; I < N; ++I) {
-    auto &B = Block[I];
+  for (const BasicBlock *BB : RPOT) {
+    auto BBNo = Mapping.blockToIndex(BB);
+    auto &B = Block[BBNo];
 
     // We don't need to count the predecessors when initialization.
     if constexpr (!Initialize)
@@ -261,7 +264,7 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
     }
 
     if (B.Suspend) {
-      // If block S is a suspend block, it should kill all of the blocks it
+      // If block B is a suspend block, it should kill all of the blocks it
       // consumes.
       B.Kills |= B.Consumes;
     } else if (B.End) {
@@ -273,8 +276,8 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
     } else {
       // This is reached when B block it not Suspend nor coro.end and it
       // need to make sure that it is not in the kill set.
-      B.KillLoop |= B.Kills[I];
-      B.Kills.reset(I);
+      B.KillLoop |= B.Kills[BBNo];
+      B.Kills.reset(BBNo);
     }
 
     if constexpr (!Initialize) {
@@ -283,9 +286,6 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
     }
   }
 
-  if constexpr (Initialize)
-    return true;
-
   return Changed;
 }
 
@@ -325,9 +325,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
       markSuspendBlock(Save);
   }
 
-  computeBlockData</*Initialize=*/true>();
-
-  while (computeBlockData())
+  // It is considered to be faster to use RPO traversal for forward-edges
+  // dataflow analysis.
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  computeBlockData</*Initialize=*/true>(RPOT);
+  while (computeBlockData</*Initialize*/ false>(RPOT))
     ;
 
   LLVM_DEBUG(dump());


        


More information about the llvm-commits mailing list