[llvm] b10deb9 - [Coroutines] Optimized coroutine elision based on reachability
Jun Ma via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 22:44:05 PST 2020
Author: Jun Ma
Date: 2020-03-05T14:43:50+08:00
New Revision: b10deb9487e1ba2965e09c99d04ac5193e10f8ed
URL: https://github.com/llvm/llvm-project/commit/b10deb9487e1ba2965e09c99d04ac5193e10f8ed
DIFF: https://github.com/llvm/llvm-project/commit/b10deb9487e1ba2965e09c99d04ac5193e10f8ed.diff
LOG: [Coroutines] Optimized coroutine elision based on reachability
Differential Revision: https://reviews.llvm.org/D75440
Added:
Modified:
llvm/lib/Transforms/Coroutines/CoroElide.cpp
llvm/test/Transforms/Coroutines/coro-heap-elide.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 0eb5096c47ba..ecf5450a1861 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -30,7 +30,7 @@ struct Lowerer : coro::LowererBase {
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
SmallVector<CoroFreeInst *, 1> CoroFrees;
- CoroSuspendInst *CoroFinalSuspend;
+ SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;
Lowerer(Module &M) : LowererBase(M) {}
@@ -38,6 +38,8 @@ struct Lowerer : coro::LowererBase {
bool shouldElide(Function *F, DominatorTree &DT) const;
void collectPostSplitCoroIds(Function *F);
bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
+ bool hasEscapePath(const CoroBeginInst *,
+ const SmallPtrSetImpl<BasicBlock *> &) const;
};
} // end anonymous namespace
@@ -142,6 +144,52 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
removeTailCallAttribute(Frame, AA);
}
+bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
+ const SmallPtrSetImpl<BasicBlock *> &TIs) const {
+ const auto &It = DestroyAddr.find(CB);
+ assert(It != DestroyAddr.end());
+
+ // Limit the number of blocks we visit.
+ unsigned Limit = 32 * (1 + It->second.size());
+
+ SmallVector<const BasicBlock *, 32> Worklist;
+ Worklist.push_back(CB->getParent());
+
+ SmallPtrSet<const BasicBlock *, 32> Visited;
+ // Consider basicblock of coro.destroy as visited one, so that we
+ // skip the path pass through coro.destroy.
+ for (auto *DA : It->second)
+ Visited.insert(DA->getParent());
+
+ do {
+ const auto *BB = Worklist.pop_back_val();
+ if (!Visited.insert(BB).second)
+ continue;
+ if (TIs.count(BB))
+ return true;
+
+ // Conservatively say that there is potentially a path.
+ if (!--Limit)
+ return true;
+
+ auto TI = BB->getTerminator();
+ // Although the default dest of coro.suspend switches is suspend pointer
+ // which means a escape path to normal terminator, it is reasonable to skip
+ // it since coroutine frame doesn't change outside the coroutine body.
+ if (isa<SwitchInst>(TI) &&
+ CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
+ Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(1));
+ Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(2));
+ } else
+ Worklist.append(succ_begin(BB), succ_end(BB));
+
+ } while (!Worklist.empty());
+
+ // We have exhausted all possible paths and are certain that coro.begin can
+ // not reach to any of terminators.
+ return false;
+}
+
bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If no CoroAllocs, we cannot suppress allocation, so elision is not
// possible.
@@ -154,61 +202,34 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If the value escaped, then coro.destroy would have been referencing a
// memory location storing that value and not the virtual register.
- SmallPtrSet<Instruction *, 8> Terminators;
- bool HasMultiPred = false;
+ SmallPtrSet<BasicBlock *, 8> Terminators;
// First gather all of the non-exceptional terminators for the function.
// Consider the final coro.suspend as the real terminator when the current
// function is a coroutine.
- if (CoroFinalSuspend) {
- // If block of final coro.suspend has more than one predecessor,
- // then there is one resume path and the others are exceptional paths,
- // consider these predecessors as terminators.
- BasicBlock *FinalBB = CoroFinalSuspend->getParent();
- if (FinalBB->hasNPredecessorsOrMore(2)) {
- HasMultiPred = true;
- for (auto *B : predecessors(FinalBB))
- Terminators.insert(B->getTerminator());
- } else
- Terminators.insert(CoroFinalSuspend);
- } else {
for (BasicBlock &B : *F) {
auto *TI = B.getTerminator();
if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
!isa<UnreachableInst>(TI))
- Terminators.insert(TI);
+ Terminators.insert(&B);
}
- }
// Filter out the coro.destroy that lie along exceptional paths.
- SmallPtrSet<CoroSubFnInst *, 4> DAs;
- SmallPtrSet<Instruction *, 2> TIs;
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
for (auto &It : DestroyAddr) {
- for (CoroSubFnInst *DA : It.second) {
- for (Instruction *TI : Terminators) {
- if (DT.dominates(DA, TI)) {
- if (HasMultiPred)
- TIs.insert(TI);
- else
- DAs.insert(DA);
+ for (Instruction *DA : It.second) {
+ for (BasicBlock *TI : Terminators) {
+ if (DT.dominates(DA, TI->getTerminator())) {
+ ReferencedCoroBegins.insert(It.first);
break;
}
}
}
- // If all the predecessors dominate coro.destroys that reference same
- // coro.begin, record the coro.begin
- if (TIs.size() == Terminators.size()) {
- ReferencedCoroBegins.insert(It.first);
- TIs.clear();
- }
- }
- // Find all the coro.begin referenced by coro.destroy along happy paths.
- for (CoroSubFnInst *DA : DAs) {
- if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
- ReferencedCoroBegins.insert(CB);
- else
- return false;
+ // Whether there is any paths from coro.begin to Terminators which not pass
+ // through any of the coro.destroys.
+ if (!ReferencedCoroBegins.count(It.first) &&
+ !hasEscapePath(It.first, Terminators))
+ ReferencedCoroBegins.insert(It.first);
}
// If size of the set is the same as total number of coro.begin, that means we
@@ -219,7 +240,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
void Lowerer::collectPostSplitCoroIds(Function *F) {
CoroIds.clear();
- CoroFinalSuspend = nullptr;
+ CoroSuspendSwitches.clear();
for (auto &I : instructions(F)) {
if (auto *CII = dyn_cast<CoroIdInst>(&I))
if (CII->getInfo().isPostSplit())
@@ -227,12 +248,16 @@ void Lowerer::collectPostSplitCoroIds(Function *F) {
if (CII->getCoroutine() != CII->getFunction())
CoroIds.push_back(CII);
+ // Consider case like:
+ // %0 = call i8 @llvm.coro.suspend(...)
+ // switch i8 %0, label %suspend [i8 0, label %resume
+ // i8 1, label %cleanup]
+ // and collect the SwitchInsts which are used by escape analysis later.
if (auto *CSI = dyn_cast<CoroSuspendInst>(&I))
- if (CSI->isFinal()) {
- if (!CoroFinalSuspend)
- CoroFinalSuspend = CSI;
- else
- report_fatal_error("Only one suspend point can be marked as final");
+ if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
+ SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
+ if (SWI->getNumCases() == 2)
+ CoroSuspendSwitches.insert(SWI);
}
}
}
diff --git a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll
index 8598f44a898b..fdf5d109554e 100644
--- a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll
+++ b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll
@@ -196,6 +196,58 @@ coro.ret:
ret void
}
+; CHECK-LABEL: @callResume_with_coro_suspend_3(
+define void @callResume_with_coro_suspend_3(i8 %cond) {
+entry:
+; CHECK: alloca %f.frame
+ switch i8 %cond, label %coro.ret [
+ i8 0, label %init.suspend
+ i8 1, label %coro.ret
+ ]
+
+init.suspend:
+; CHECK-NOT: llvm.coro.begin
+; CHECK-NOT: CustomAlloc
+; CHECK: call void @may_throw()
+ %hdl = call i8* @f()
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
+ %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+ %1 = bitcast i8* %0 to void (i8*)*
+ call fastcc void %1(i8* %hdl)
+ %2 = call token @llvm.coro.save(i8* %hdl)
+ %3 = call i8 @llvm.coro.suspend(token %2, i1 false)
+ switch i8 %3, label %coro.ret [
+ i8 0, label %final.suspend
+ i8 1, label %cleanups
+ ]
+
+; CHECK-LABEL: final.suspend:
+final.suspend:
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
+ %4 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+ %5 = bitcast i8* %4 to void (i8*)*
+ call fastcc void %5(i8* %hdl)
+ %6 = call token @llvm.coro.save(i8* %hdl)
+ %7 = call i8 @llvm.coro.suspend(token %6, i1 true)
+ switch i8 %7, label %coro.ret [
+ i8 0, label %coro.ret
+ i8 1, label %cleanups
+ ]
+
+; CHECK-LABEL: cleanups:
+cleanups:
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
+ %8 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+ %9 = bitcast i8* %8 to void (i8*)*
+ call fastcc void %9(i8* %hdl)
+ br label %coro.ret
+
+; CHECK-LABEL: coro.ret:
+coro.ret:
+; CHECK-NEXT: ret void
+ ret void
+}
+
; CHECK-LABEL: @callResume_PR34897_no_elision(
@@ -231,6 +283,41 @@ return:
ret void
}
+; CHECK-LABEL: @callResume_PR34897_elision(
+define void @callResume_PR34897_elision(i1 %cond) {
+; CHECK-LABEL: entry:
+entry:
+; CHECK: alloca %f.frame
+; CHECK: tail call void @bar(
+ tail call void @bar(i8* null)
+ br i1 %cond, label %if.then, label %if.else
+
+if.then:
+; CHECK-NOT: CustomAlloc
+; CHECK: call void @may_throw()
+ %hdl = call i8* @f()
+; CHECK: call void @bar(
+ tail call void @bar(i8* %hdl)
+; CHECK: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
+ %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+ %1 = bitcast i8* %0 to void (i8*)*
+ call fastcc void %1(i8* %hdl)
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8*
+ %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+ %3 = bitcast i8* %2 to void (i8*)*
+ call fastcc void %3(i8* %hdl)
+ br label %return
+
+if.else:
+ br label %return
+
+; CHECK-LABEL: return:
+return:
+; CHECK: ret void
+ ret void
+}
+
+
; a coroutine start function (cannot elide heap alloc, due to second argument to
; coro.begin not pointint to coro.alloc)
define i8* @f_no_elision() personality i8* null {
More information about the llvm-commits
mailing list