[llvm] 77ef88d - [Coroutines] Add an O(n) algorithm for computing the cross suspend point
Chuanqi Xu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 27 02:29:56 PDT 2023
Author: witstorm95
Date: 2023-07-27T17:28:51+08:00
New Revision: 77ef88d7ee32dc18a4d6e3f92a61393e759e3b83
URL: https://github.com/llvm/llvm-project/commit/77ef88d7ee32dc18a4d6e3f92a61393e759e3b83
DIFF: https://github.com/llvm/llvm-project/commit/77ef88d7ee32dc18a4d6e3f92a61393e759e3b83.diff
LOG: [Coroutines] Add an O(n) algorithm for computing the cross suspend point
information.
Fixed https://github.com/llvm/llvm-project/issues/62348
Propagate cross suspend point information by visiting CFG.
Just only go through two times at most, you can get all the cross
suspend point information.
Before the patch:
```
n: 20000
4.31user 0.11system 0:04.44elapsed 99%CPU (0avgtext+0avgdata
552352maxresident)k
0inputs+8848outputs (0major+126254minor)pagefaults 0swaps
n: 40000
11.24user 0.40system 0:11.66elapsed 99%CPU (0avgtext+0avgdata
1788404maxresident)k
0inputs+17600outputs (0major+431105minor)pagefaults 0swaps
n: 60000
21.65user 0.96system 0:22.62elapsed 99%CPU (0avgtext+0avgdata
3809836maxresident)k
0inputs+26352outputs (0major+934749minor)pagefaults 0swaps
n: 80000
37.05user 1.53system 0:38.58elapsed 99%CPU (0avgtext+0avgdata
6602396maxresident)k
0inputs+35096outputs (0major+1622584minor)pagefaults 0swaps
n: 100000
51.87user 2.67system 0:54.54elapsed 99%CPU (0avgtext+0avgdata
10210736maxresident)k
0inputs+43848outputs (0major+2518945minor)pagefaults 0swaps
```
After the patch:
```
n: 20000
3.17user 0.16system 0:03.33elapsed 100%CPU (0avgtext+0avgdata
551736maxresident)k
0inputs+8848outputs (0major+126192minor)pagefaults 0swaps
n: 40000
6.10user 0.42system 0:06.54elapsed 99%CPU (0avgtext+0avgdata
1787848maxresident)k
0inputs+17600outputs (0major+432212minor)pagefaults 0swaps
n: 60000
9.13user 0.89system 0:10.03elapsed 99%CPU (0avgtext+0avgdata
3809108maxresident)k
0inputs+26352outputs (0major+931280minor)pagefaults 0swaps
n: 80000
12.44user 1.57system 0:14.02elapsed 99%CPU (0avgtext+0avgdata
6603432maxresident)k
0inputs+35096outputs (0major+1624635minor)pagefaults 0swaps
n: 100000
16.29user 2.28system 0:18.59elapsed 99%CPU (0avgtext+0avgdata
10212808maxresident)k
0inputs+43848outputs (0major+2522200minor)pagefaults 0swaps
```
Reviewed By: ChuanqiXu
Differential Revision: https://reviews.llvm.org/D154695
Added:
Modified:
llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 1f373270f951ba..85a61c430a11f6 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -98,7 +98,6 @@ class SuspendCrossingInfo {
bool Suspend = false;
bool End = false;
bool KillLoop = false;
- bool Changed = false;
};
SmallVector<BlockData, SmallVectorThreshold> Block;
@@ -106,16 +105,50 @@ class SuspendCrossingInfo {
BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
return llvm::predecessors(BB);
}
+ size_t pred_size(BlockData const &BD) const {
+ BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
+ return llvm::pred_size(BB);
+ }
+ iterator_range<succ_iterator> successors(BlockData const &BD) const {
+ BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
+ return llvm::successors(BB);
+ }
BlockData &getBlockData(BasicBlock *BB) {
return Block[Mapping.blockToIndex(BB)];
}
- /// Compute the BlockData for the current function in one iteration.
- /// Returns whether the BlockData changes in this iteration.
- /// Initialize - Whether this is the first iteration, we can optimize
- /// the initial case a little bit by manual loop switch.
- template <bool Initialize = false> bool computeBlockData();
+ /// This algorithm is based on topological sorting. As we know, topological
+ /// sorting is typically used on Directed Acyclic Graph (DAG). However, a
+ /// Control Flow Graph (CFG) may not always be a DAG, as it can contain back
+ /// edges or loops. To handle this, we need to break the back edge when we
+ /// encounter it in order to ensure a valid topological sorting.
+ /// Why do we need an extra traversal when a CFG contains a back edge?
+ /// Firstly, we need to figure out how the Consumes information propagates
+ /// along the back edge. For example,
+ ///
+ /// A -> B -> C -> D -> H
+ /// ^ |
+ /// | v
+ /// G <- F <- E
+ ///
+ /// Following the direction of the arrow, we can obtain the traversal
+ /// sequences: A, B, C, D, H, E, F, G or A, B, C, D, E, H, F, G. We know that
+ /// there is a path from C to G after the first traversal. However, we are
+ /// uncertain about the existence of a path from G to C, as the Consumes info
+ /// of G has not yet propagated to C (via B). Therefore, we need a second
+ /// traversal to propagate G's Consumes info to C (via B) and its successors.
+ /// The second traversal allows us to obtain the complete Consumes info. Since
+ /// the computation of the Kills info depends on the Consumes info.
+
+ /// The parameter "EntryNo" represents the index associated with the entry
+ /// block.
+ /// The parameter "BlockPredecessorsNum" represents the number of predecessors
+ /// for each block.
+ /// Returns true if there exists back edges in CFG.
+ template <bool HasBackEdge = false>
+ bool collectConsumeKillInfo(size_t EntryNo,
+ const SmallVector<size_t> &BlockPredecessorsNum);
public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -223,70 +256,115 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
}
#endif
-template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
- const size_t N = Mapping.size();
- bool Changed = false;
-
- for (size_t I = 0; I < N; ++I) {
- auto &B = Block[I];
+template <bool HasBackEdge>
+bool SuspendCrossingInfo::collectConsumeKillInfo(
+ size_t EntryNo, const SmallVector<size_t> &BlockPredecessorsNum) {
+ bool FoundBackEdge = false;
+ SmallVector<size_t> UnvisitedBlockPredNum = BlockPredecessorsNum;
+ // BlockNo Queue with BlockPredNum[BlockNo] equal to zero.
+ std::queue<size_t> CandidateQueue;
+ // For blocks that maybe has a back edge.
+ DenseSet<size_t> MaybeBackEdgeSet;
+ // Visit BlockNo
+ auto visit = [&](size_t BlockNo) {
+ switch (UnvisitedBlockPredNum[BlockNo]) {
+ // Already visited, not visit again.
+ case 0:
+ break;
+ // If predecessors number of BlockNo is 1, it means all predecessors of
+ // BlockNo have propagated its info to BlockNo. So add BlockNo to
+ // CandidateQueue.
+ case 1: {
+ CandidateQueue.push(BlockNo);
+ MaybeBackEdgeSet.erase(BlockNo);
+ UnvisitedBlockPredNum[BlockNo] = 0;
+ break;
+ }
+ // If predecessors number of BlockNo bigger than 1, it means BlockNo not
+ // collect full Consumes/Kills info yet. So decrease
+ // UnvisitedBlockPredNum[BlockNo] and insert BlockNo into MaybeBackEdgeSet.
+ default: {
+ UnvisitedBlockPredNum[BlockNo]--;
+ MaybeBackEdgeSet.insert(BlockNo);
+ break;
+ }
+ }
+ };
- // We don't need to count the predecessors when initialization.
- if constexpr (!Initialize)
- // If all the predecessors of the current Block don't change,
- // the BlockData for the current block must not change too.
- if (all_of(predecessors(B), [this](BasicBlock *BB) {
- return !Block[Mapping.blockToIndex(BB)].Changed;
- })) {
- B.Changed = false;
- continue;
+ CandidateQueue.push(EntryNo);
+
+ // Topological sorting.
+ while (!CandidateQueue.empty()) {
+ auto &B = Block[CandidateQueue.front()];
+ CandidateQueue.pop();
+ for (BasicBlock *SI : successors(B)) {
+ auto SuccNo = Mapping.blockToIndex(SI);
+ auto &S = Block[SuccNo];
+
+ // Propagate Kills and Consumes from predecessors into S.
+ S.Consumes |= B.Consumes;
+ S.Kills |= B.Kills;
+
+ if (B.Suspend)
+ S.Kills |= B.Consumes;
+
+ if (S.Suspend) {
+ // If block S is a suspend block, it should kill all of the blocks
+ // it consumes.
+ S.Kills |= S.Consumes;
+ } else if (S.End) {
+ // If block S is an end block, it should not propagate kills as the
+ // blocks following coro.end() are reached during initial invocation
+ // of the coroutine while all the data are still available on the
+ // stack or in the registers.
+ S.Kills.reset();
+ } else {
+ // This is reached when S block it not Suspend nor coro.end and it
+ // need to make sure that it is not in the kill set.
+ S.KillLoop |= S.Kills[SuccNo];
+ S.Kills.reset(SuccNo);
}
-
- // Saved Consumes and Kills bitsets so that it is easy to see
- // if anything changed after propagation.
- auto SavedConsumes = B.Consumes;
- auto SavedKills = B.Kills;
-
- for (BasicBlock *PI : predecessors(B)) {
- auto PrevNo = Mapping.blockToIndex(PI);
- auto &P = Block[PrevNo];
-
- // Propagate Kills and Consumes from predecessors into B.
- B.Consumes |= P.Consumes;
- B.Kills |= P.Kills;
-
- // If block P is a suspend block, it should propagate kills into block
- // B for every block P consumes.
- if (P.Suspend)
- B.Kills |= P.Consumes;
+ // visit SuccNo.
+ visit(SuccNo);
}
- if (B.Suspend) {
- // If block S is a suspend block, it should kill all of the blocks it
- // consumes.
- B.Kills |= B.Consumes;
- } else if (B.End) {
- // If block B is an end block, it should not propagate kills as the
- // blocks following coro.end() are reached during initial invocation
- // of the coroutine while all the data are still available on the
- // stack or in the registers.
- B.Kills.reset();
- } else {
- // This is reached when B block it not Suspend nor coro.end and it
- // need to make sure that it is not in the kill set.
- B.KillLoop |= B.Kills[I];
- B.Kills.reset(I);
- }
+ // If the CandidateQueue is empty but the MaybeBackEdgeSet is not, it
+ // indicates the presence of a back edge that needs to be addressed. In such
+ // cases, it is necessary to break the back edge.
+ if (CandidateQueue.empty() && !MaybeBackEdgeSet.empty()) {
+ FoundBackEdge = true;
+ size_t CandidateNo = -1;
+ if constexpr (HasBackEdge) {
+ auto IsCandidate = [this](size_t I) {
+ for (BasicBlock *PI : llvm::predecessors(Mapping.indexToBlock(I))) {
+ auto PredNo = Mapping.blockToIndex(PI);
+ auto &P = Block[PredNo];
+ // The node I can reach its predecessor. So we found a loop.
+ if (P.Consumes[I])
+ return true;
+ }
+
+ return false;
+ };
- if constexpr (!Initialize) {
- B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
- Changed |= B.Changed;
+ for (auto I : MaybeBackEdgeSet) {
+ if (IsCandidate(I)) {
+ CandidateNo = I;
+ break;
+ }
+ }
+ assert(CandidateNo != size_t(-1) && "We collected the wrong backegdes");
+ } else
+ // When the value of HasBackEdge is false and we don't have any
+ // information about back edges, we can simply select one block from the
+ // MaybeBackEdgeSet.
+ CandidateNo = *(MaybeBackEdgeSet.begin());
+ CandidateQueue.push(CandidateNo);
+ MaybeBackEdgeSet.erase(CandidateNo);
+ UnvisitedBlockPredNum[CandidateNo] = 0;
}
}
-
- if constexpr (Initialize)
- return true;
-
- return Changed;
+ return FoundBackEdge;
}
SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
@@ -294,13 +372,16 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
const size_t N = Mapping.size();
Block.resize(N);
+ size_t EntryNo = Mapping.blockToIndex(&(F.getEntryBlock()));
+ SmallVector<size_t> BlockPredecessorsNum(N, 0);
+
// Initialize every block so that it consumes itself
for (size_t I = 0; I < N; ++I) {
auto &B = Block[I];
B.Consumes.resize(N);
B.Kills.resize(N);
B.Consumes.set(I);
- B.Changed = true;
+ BlockPredecessorsNum[I] = pred_size(B);
}
// Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
@@ -325,10 +406,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
markSuspendBlock(Save);
}
- computeBlockData</*Initialize=*/true>();
-
- while (computeBlockData())
- ;
+ // We should collect the Consumes and Kills information initially. If there is
+ // a back edge present, it is necessary to perform the collection process
+ // again.
+ if (collectConsumeKillInfo(EntryNo, BlockPredecessorsNum))
+ collectConsumeKillInfo</*HasBackEdge*/ true>(EntryNo, BlockPredecessorsNum);
LLVM_DEBUG(dump());
}
More information about the llvm-commits
mailing list