[llvm] 132bb5c - [NFC][Coroutines] Use a reverse post-order to guide the computation about cross suspend infomation to reach a fixed point faster.
Chuanqi Xu via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 8 20:23:30 PDT 2023
Author: witstorm95
Date: 2023-08-09T11:21:57+08:00
New Revision: 132bb5cc5fd5f91e6a25e8004ac5d6516518f31a
URL: https://github.com/llvm/llvm-project/commit/132bb5cc5fd5f91e6a25e8004ac5d6516518f31a
DIFF: https://github.com/llvm/llvm-project/commit/132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.diff
LOG: [NFC][Coroutines] Use a reverse post-order to guide the computation about cross suspend infomation to reach a fixed point faster.
Fixed https://github.com/llvm/llvm-project/issues/62348
Propagate cross suspend point information along reverse post-order.
It does not modify the original function, just selects a better
traversal order.
Before the patch:
```
n: 20000
4.31user 0.11system 0:04.44elapsed 99%CPU (0avgtext+0avgdata
552352maxresident)k
0inputs+8848outputs (0major+126254minor)pagefaults 0swaps
n: 40000
11.24user 0.40system 0:11.66elapsed 99%CPU (0avgtext+0avgdata
1788404maxresident)k
0inputs+17600outputs (0major+431105minor)pagefaults 0swaps
n: 60000
21.65user 0.96system 0:22.62elapsed 99%CPU (0avgtext+0avgdata
3809836maxresident)k
0inputs+26352outputs (0major+934749minor)pagefaults 0swaps
n: 80000
37.05user 1.53system 0:38.58elapsed 99%CPU (0avgtext+0avgdata
6602396maxresident)k
0inputs+35096outputs (0major+1622584minor)pagefaults 0swaps
n: 100000
51.87user 2.67system 0:54.54elapsed 99%CPU (0avgtext+0avgdata
10210736maxresident)k
0inputs+43848outputs (0major+2518945minor)pagefaults 0swaps
```
After the patch:
```
n: 20000
3.08user 0.12system 0:03.21elapsed 99%CPU (0avgtext+0avgdata
551012maxresident)k
0inputs+8848outputs (0major+129349minor)pagefaults 0swaps
n: 40000
5.88user 0.33system 0:06.22elapsed 99%CPU (0avgtext+0avgdata
1789248maxresident)k
0inputs+17600outputs (0major+435096minor)pagefaults 0swaps
n: 60000
8.84user 0.77system 0:09.63elapsed 99%CPU (0avgtext+0avgdata
3807800maxresident)k
0inputs+26352outputs (0major+939119minor)pagefaults 0swaps
n: 80000
11.64user 1.58system 0:13.23elapsed 99%CPU (0avgtext+0avgdata
6604708maxresident)k
0inputs+35096outputs (0major+1629566minor)pagefaults 0swaps
n: 100000
15.21user 2.56system 0:17.79elapsed 99%CPU (0avgtext+0avgdata
10208828maxresident)k
8inputs+43848outputs (0major+2526611minor)pagefaults 0swaps
```
Reviewed By: MatzeB, ChuanqiXu
Differential Revision: https://reviews.llvm.org/D156850
Added:
Modified:
llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 1f373270f951ba..58115fafbdf8ea 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -63,7 +63,7 @@ class BlockToIndexMapping {
llvm::sort(V);
}
- size_t blockToIndex(BasicBlock *BB) const {
+ size_t blockToIndex(BasicBlock const *BB) const {
auto *I = llvm::lower_bound(V, BB);
assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
return I - V.begin();
@@ -112,10 +112,11 @@ class SuspendCrossingInfo {
}
/// Compute the BlockData for the current function in one iteration.
- /// Returns whether the BlockData changes in this iteration.
/// Initialize - Whether this is the first iteration, we can optimize
/// the initial case a little bit by manual loop switch.
- template <bool Initialize = false> bool computeBlockData();
+ /// Returns whether the BlockData changes in this iteration.
+ template <bool Initialize = false>
+ bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -223,12 +224,14 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
}
#endif
-template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
- const size_t N = Mapping.size();
+template <bool Initialize>
+bool SuspendCrossingInfo::computeBlockData(
+ const ReversePostOrderTraversal<Function *> &RPOT) {
bool Changed = false;
- for (size_t I = 0; I < N; ++I) {
- auto &B = Block[I];
+ for (const BasicBlock *BB : RPOT) {
+ auto BBNo = Mapping.blockToIndex(BB);
+ auto &B = Block[BBNo];
// We don't need to count the predecessors when initialization.
if constexpr (!Initialize)
@@ -261,7 +264,7 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
}
if (B.Suspend) {
- // If block S is a suspend block, it should kill all of the blocks it
+ // If block B is a suspend block, it should kill all of the blocks it
// consumes.
B.Kills |= B.Consumes;
} else if (B.End) {
@@ -273,8 +276,8 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
} else {
// This is reached when B block it not Suspend nor coro.end and it
// need to make sure that it is not in the kill set.
- B.KillLoop |= B.Kills[I];
- B.Kills.reset(I);
+ B.KillLoop |= B.Kills[BBNo];
+ B.Kills.reset(BBNo);
}
if constexpr (!Initialize) {
@@ -283,9 +286,6 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
}
}
- if constexpr (Initialize)
- return true;
-
return Changed;
}
@@ -325,9 +325,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
markSuspendBlock(Save);
}
- computeBlockData</*Initialize=*/true>();
-
- while (computeBlockData())
+ // It is considered to be faster to use RPO traversal for forward-edges
+ // dataflow analysis.
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ computeBlockData</*Initialize=*/true>(RPOT);
+ while (computeBlockData</*Initialize*/ false>(RPOT))
;
LLVM_DEBUG(dump());
More information about the llvm-commits
mailing list