[clang] [llvm] [Clang] CGCoroutine: Skip moving parameters if the allocation decision is false (PR #81195)
Yuxuan Chen via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 6 16:03:41 PST 2024
https://github.com/yuxuanchen1997 updated https://github.com/llvm/llvm-project/pull/81195
>From 1e03c2ec24c7bc6a303266a7023a56d6449a46e5 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych at meta.com>
Date: Wed, 7 Feb 2024 16:05:42 -0800
Subject: [PATCH] Skip moving parameters if the allocation decision is false
---
clang/lib/CodeGen/CGCoroutine.cpp | 120 ++++++++---
clang/test/CodeGenCoroutines/coro-gro.cpp | 6 +-
clang/test/CodeGenCoroutines/coro-params.cpp | 48 +++--
llvm/lib/Transforms/Coroutines/CoroElide.cpp | 211 ++++++++++++++++---
4 files changed, 311 insertions(+), 74 deletions(-)
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 888d30bfb3e1d6..b2933650367c12 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -389,25 +389,12 @@ namespace {
ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap)
: LocalDeclMap(LocalDeclMap) {}
- void addCopy(DeclStmt const *PM) {
- // Figure out what param it refers to.
-
- assert(PM->isSingleDecl());
- VarDecl const*VD = static_cast<VarDecl const*>(PM->getSingleDecl());
- Expr const *InitExpr = VD->getInit();
- GetParamRef Visitor;
- Visitor.Visit(const_cast<Expr*>(InitExpr));
- assert(Visitor.Expr);
- DeclRefExpr *DREOrig = Visitor.Expr;
- auto *PD = DREOrig->getDecl();
-
- auto it = LocalDeclMap.find(PD);
- assert(it != LocalDeclMap.end() && "parameter is not found");
- SavedLocals.insert({ PD, it->second });
-
- auto copyIt = LocalDeclMap.find(VD);
- assert(copyIt != LocalDeclMap.end() && "parameter copy is not found");
- it->second = copyIt->getSecond();
+ void substAddress(ValueDecl *D, Address Addr) {
+ auto it = LocalDeclMap.find(D);
+ assert(it != LocalDeclMap.end() && "original decl is not found");
+ SavedLocals.insert({D, it->second});
+
+ it->second = Addr;
}
~ParamReferenceReplacerRAII() {
@@ -629,6 +616,63 @@ struct GetReturnObjectManager {
Builder.CreateStore(Builder.getTrue(), GroActiveFlag);
}
};
+
+static ValueDecl *getOriginalParamDeclForParamMove(VarDecl const *VD) {
+ Expr const *InitExpr = VD->getInit();
+ GetParamRef Visitor;
+ Visitor.Visit(const_cast<Expr *>(InitExpr));
+ assert(Visitor.Expr);
+ return Visitor.Expr->getDecl();
+}
+
+struct ParamMoveManager {
+ ParamMoveManager(CodeGenFunction &CGF,
+ llvm::ArrayRef<const Stmt *> ParamMoves)
+ : CGF(CGF), ParamMovesVarDecls() {
+ ParamMovesVarDecls.reserve(ParamMoves.size());
+ for (auto *S : ParamMoves) {
+ auto *PMStmt = cast<DeclStmt>(S);
+ assert(PMStmt->isSingleDecl());
+ auto *ParamMoveVD = static_cast<VarDecl const *>(PMStmt->getSingleDecl());
+ ParamMovesVarDecls.push_back(ParamMoveVD);
+ }
+ }
+
+ // Because we wrap param moves in the coro.alloc block. It's not always
+ // necessary to run the corresponding cleanups in the branches.
+ // We would need to know when to (conditionally) clean them up.
+ void EmitMovesWithCleanup(Address PMCleanupActiveFlag) {
+ // Create parameter copies. We do it before creating a promise, since an
+ // evolution of coroutine TS may allow promise constructor to observe
+ // parameter copies.
+ for (auto *VD : ParamMovesVarDecls) {
+ auto Emission = CGF.EmitAutoVarAlloca(*VD);
+ CGF.EmitAutoVarInit(Emission);
+ auto OldTop = CGF.EHStack.stable_begin();
+ CGF.EmitAutoVarCleanups(Emission);
+ auto Top = CGF.EHStack.stable_begin();
+
+ for (auto I = CGF.EHStack.find(Top), E = CGF.EHStack.find(OldTop); I != E;
+ I++) {
+ if (auto *Cleanup = dyn_cast<EHCleanupScope>(&*I)) {
+ assert(!Cleanup->hasActiveFlag() &&
+ "cleanup already has active flag?");
+ Cleanup->setActiveFlag(PMCleanupActiveFlag);
+ Cleanup->setTestFlagInEHCleanup();
+ Cleanup->setTestFlagInNormalCleanup();
+ }
+ }
+ }
+ }
+
+ llvm::ArrayRef<const VarDecl *> GetParamMovesVarDecls() {
+ return ParamMovesVarDecls;
+ }
+
+private:
+ CodeGenFunction &CGF;
+ SmallVector<const VarDecl *> ParamMovesVarDecls;
+};
} // namespace
static void emitBodyAndFallthrough(CodeGenFunction &CGF,
@@ -648,6 +692,8 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
auto *EntryBB = Builder.GetInsertBlock();
auto *AllocBB = createBasicBlock("coro.alloc");
auto *InitBB = createBasicBlock("coro.init");
+ auto *ParamMoveBB = createBasicBlock("coro.param.move");
+ auto *AfterParamMoveBB = createBasicBlock("coro.after.param.move");
auto *FinalBB = createBasicBlock("coro.final");
auto *RetBB = createBasicBlock("coro.ret");
@@ -664,6 +710,9 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
auto *CoroAlloc = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId});
+ auto PMCleanupActiveFlag = CreateTempAlloca(
+ Builder.getInt1Ty(), CharUnits::One(), "param.move.cleanup.active");
+ Builder.CreateStore(CoroAlloc, PMCleanupActiveFlag);
Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB);
EmitBlock(AllocBB);
@@ -695,6 +744,7 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
auto *Phi = Builder.CreatePHI(VoidPtrTy, 2);
Phi->addIncoming(NullPtr, EntryBB);
Phi->addIncoming(AllocateCall, AllocOrInvokeContBB);
+
auto *CoroBegin = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi});
CurCoro.Data->CoroBegin = CoroBegin;
@@ -719,15 +769,29 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
DI->getCoroutineParameterMappings().insert(
{std::get<0>(Pair), std::get<1>(Pair)});
- // Create parameter copies. We do it before creating a promise, since an
- // evolution of coroutine TS may allow promise constructor to observe
- // parameter copies.
- for (auto *PM : S.getParamMoves()) {
- EmitStmt(PM);
- ParamReplacer.addCopy(cast<DeclStmt>(PM));
- // TODO: if(CoroParam(...)) need to surround ctor and dtor
- // for the copy, so that llvm can elide it if the copy is
- // not needed.
+ Builder.CreateCondBr(CoroAlloc, ParamMoveBB, AfterParamMoveBB);
+
+ EmitBlock(ParamMoveBB);
+
+ ParamMoveManager Mover{*this, ParamMoves};
+ Mover.EmitMovesWithCleanup(PMCleanupActiveFlag);
+
+ Builder.CreateBr(AfterParamMoveBB);
+
+ EmitBlock(AfterParamMoveBB);
+
+ for (auto *VD : Mover.GetParamMovesVarDecls()) {
+ auto NewAddr = LocalDeclMap.find(VD)->getSecond();
+ auto *OrigVD = getOriginalParamDeclForParamMove(VD);
+ auto OldAddr = LocalDeclMap.find(OrigVD)->getSecond();
+
+ auto *ParamPhi = Builder.CreatePHI(VoidPtrTy, 2);
+ ParamPhi->addIncoming(NewAddr.getPointer(), ParamMoveBB);
+ ParamPhi->addIncoming(OldAddr.getPointer(), InitBB);
+
+ ParamReplacer.substAddress(
+ OrigVD,
+ Address(ParamPhi, OldAddr.getElementType(), OldAddr.getAlignment()));
}
EmitStmt(S.getPromiseDeclStmt());
diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp
index d4c3ff589e340a..fb42c7e089b363 100644
--- a/clang/test/CodeGenCoroutines/coro-gro.cpp
+++ b/clang/test/CodeGenCoroutines/coro-gro.cpp
@@ -29,8 +29,11 @@ void doSomething() noexcept;
// CHECK: define{{.*}} i32 @_Z1fv(
int f() {
// CHECK: %[[RetVal:.+]] = alloca i32
+ // CHECK: %[[ParamMoveCleanupActive:.+]] = alloca i1
// CHECK: %[[GroActive:.+]] = alloca i1
// CHECK: %[[CoroGro:.+]] = alloca %struct.GroType, {{.*}} !coro.outside.frame ![[OutFrameMetadata:.+]]
+ // CHECK: %[[CoroAlloc:.+]] = call i1 @llvm.coro.alloc
+ // CHECK-NEXT: store i1 %[[CoroAlloc:.+]], ptr %[[ParamMoveCleanupActive:.+]]
// CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64()
// CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef %[[Size]])
@@ -94,6 +97,7 @@ class invoker {
// CHECK: define{{.*}} void @_Z1gv({{.*}} %[[AggRes:.+]])
invoker g() {
// CHECK: %[[ResultPtr:.+]] = alloca ptr
+ // CHECK-NEXT: %[[ParamMoveCleanupActive:.+]] = alloca i1
// CHECK-NEXT: %[[Promise:.+]] = alloca %"class.invoker::invoker_promise"
// CHECK: store ptr %[[AggRes]], ptr %[[ResultPtr]]
@@ -105,4 +109,4 @@ invoker g() {
// CHECK: call void @_ZN7invoker15invoker_promise17get_return_objectEv({{.*}} %[[AggRes]]
co_return;
}
-// CHECK: ![[OutFrameMetadata]] = !{}
\ No newline at end of file
+// CHECK: ![[OutFrameMetadata]] = !{}
diff --git a/clang/test/CodeGenCoroutines/coro-params.cpp b/clang/test/CodeGenCoroutines/coro-params.cpp
index c5a61a53cb46ed..873936ac28aba4 100644
--- a/clang/test/CodeGenCoroutines/coro-params.cpp
+++ b/clang/test/CodeGenCoroutines/coro-params.cpp
@@ -64,22 +64,36 @@ void consume(int,int,int) noexcept;
// TODO: Add support for CopyOnly params
// CHECK: define{{.*}} void @_Z1fi8MoveOnly11MoveAndCopy(i32 noundef %val, ptr noundef %[[MoParam:.+]], ptr noundef %[[McParam:.+]]) #0 personality ptr @__gxx_personality_v0
void f(int val, MoveOnly moParam, MoveAndCopy mcParam) {
+ // CHECK: %[[ValCopy:.+]] = alloca i32,
// CHECK: %[[MoCopy:.+]] = alloca %struct.MoveOnly,
// CHECK: %[[McCopy:.+]] = alloca %struct.MoveAndCopy,
// CHECK: store i32 %val, ptr %[[ValAddr:.+]]
+ // CHECK: %[[AllocFlag:.+]] = call i1 @llvm.coro.alloc
+ // CHECK-NEXT: store i1 %[[AllocFlag:.+]] %[[CleanupActiveMem:.+]]
// CHECK: call ptr @llvm.coro.begin(
- // CHECK: call void @_ZN8MoveOnlyC1EOS_(ptr {{[^,]*}} %[[MoCopy]], ptr noundef nonnull align 4 dereferenceable(4) %[[MoParam]])
- // CHECK-NEXT: call void @llvm.lifetime.start.p0(
- // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(ptr {{[^,]*}} %[[McCopy]], ptr noundef nonnull align 4 dereferenceable(4) %[[McParam]]) #
- // CHECK-NEXT: call void @llvm.lifetime.start.p0(
- // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev(
-
+ // CHECK: br i1 %[[CoroAlloc:.+]], label %coro.param.move, label %coro.after.param.move
+ // CHECK: coro.param.move:
+ // CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %[[ValCopy:.+]])
+ // CHECK-NEXT: %[[Temp:.+]] = load i32, ptr %val.addr, align 4
+ // CHECK-NEXT: store i32 %[[Temp:.+]], ptr %val1, align 4
+ // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %[[MoCopy:.+]]) #2
+ // CHECK-NEXT: call void @_ZN8MoveOnlyC1EOS_(ptr noundef nonnull align 4 dereferenceable(4) %[[MoCopy:.+]], ptr noundef nonnull align 4 dereferenceable(4) %[[MoParam:.+]]) #2
+ // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %[[McCopy:.+]]) #2
+ // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(ptr noundef nonnull align 4 dereferenceable(4) %[[McCopy:.+]], ptr noundef nonnull align 4 dereferenceable(4) %[[McParam:.+]]) #2
+ // CHECK-NEXT: br label %coro.after.param.move
+ //
+ // CHECK: %[[ValPhi:.+]] = phi ptr [ %[[ValCopy:.+]], %coro.param.move ], [ %[[ValAddr:.+]], %coro.init ]
+ // CHECK-NEXT: %[[MoPhi:.+]] = phi ptr [ %[[MoCopy:.+]], %coro.param.move ], [ %[[MoParam:.+]], %coro.init ]
+ // CHECK-NEXT: %[[McPhi:.+]] = phi ptr [ %[[McCopy:.+]], %coro.param.move ], [ %[[McParam:.+]], %coro.init ]
+ // CHECK: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev(
+
+ // CHECK: init.ready
// CHECK: call void @_ZN14suspend_always12await_resumeEv(
// CHECK: %[[IntParam:.+]] = load i32, ptr %{{.*}}
- // CHECK: %[[MoGep:.+]] = getelementptr inbounds %struct.MoveOnly, ptr %[[MoCopy]], i32 0, i32 0
+ // CHECK: %[[MoGep:.+]] = getelementptr inbounds %struct.MoveOnly, ptr %[[MoPhi]], i32 0, i32 0
// CHECK: %[[MoVal:.+]] = load i32, ptr %[[MoGep]]
- // CHECK: %[[McGep:.+]] = getelementptr inbounds %struct.MoveAndCopy, ptr %[[McCopy]], i32 0, i32 0
+ // CHECK: %[[McGep:.+]] = getelementptr inbounds %struct.MoveAndCopy, ptr %[[McPhi]], i32 0, i32 0
// CHECK: %[[McVal:.+]] = load i32, ptr %[[McGep]]
// CHECK: call void @_Z7consumeiii(i32 noundef %[[IntParam]], i32 noundef %[[MoVal]], i32 noundef %[[McVal]])
@@ -90,13 +104,17 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) {
// CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_type13final_suspendEv(
// CHECK: call void @_ZN14suspend_always12await_resumeEv(
- // Destroy promise, then parameter copies:
+ // Destroy promise, then test the cleanup flag for parameter copies (if exist):
// CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeD1Ev(ptr {{[^,]*}} %__promise)
// CHECK-NEXT: call void @llvm.lifetime.end.p0(
- // CHECK-NEXT: call void @_ZN11MoveAndCopyD1Ev(ptr {{[^,]*}} %[[McCopy]])
- // CHECK-NEXT: call void @llvm.lifetime.end.p0(
- // CHECK-NEXT: call void @_ZN8MoveOnlyD1Ev(ptr {{[^,]*}} %[[MoCopy]]
- // CHECK-NEXT: call void @llvm.lifetime.end.p0(
+ // CHECK-NEXT: %[[TempCleanupActive:.+]] = load i1, ptr %[[CleanupActiveMem:.+]]
+ // CHECK-NEXT: br i1 %[[TempCleanupActive]]
+
+ // The next two may be in different cleanup blocks:
+ // CHECK: call void @_ZN11MoveAndCopyD1Ev(ptr {{[^,]*}} %[[McCopy]])
+ // CHECK: call void @_ZN8MoveOnlyD1Ev(ptr {{[^,]*}} %[[MoCopy]]
+
+ // CHECK: call void @llvm.lifetime.end.p0(
// CHECK-NEXT: call void @llvm.lifetime.end.p0(
// CHECK-NEXT: call ptr @llvm.coro.free(
}
@@ -109,13 +127,13 @@ void dependent_params(T x, U, U y) {
// CHECK-NEXT: %[[y_copy:.+]] = alloca %struct.B
// CHECK: call ptr @llvm.coro.begin
- // CHECK-NEXT: call void @llvm.lifetime.start.p0(
+ // CHECK: call void @llvm.lifetime.start.p0(
// CHECK-NEXT: call void @_ZN1AC1EOS_(ptr {{[^,]*}} %[[x_copy]], ptr noundef nonnull align 4 dereferenceable(512) %x)
// CHECK-NEXT: call void @llvm.lifetime.start.p0(
// CHECK-NEXT: call void @_ZN1BC1EOS_(ptr {{[^,]*}} %[[unnamed_copy]], ptr noundef nonnull align 4 dereferenceable(512) %0)
// CHECK-NEXT: call void @llvm.lifetime.start.p0(
// CHECK-NEXT: call void @_ZN1BC1EOS_(ptr {{[^,]*}} %[[y_copy]], ptr noundef nonnull align 4 dereferenceable(512) %y)
- // CHECK-NEXT: call void @llvm.lifetime.start.p0(
+ // CHECK: call void @llvm.lifetime.start.p0(
// CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJv1A1BS1_EE12promise_typeC1Ev(
co_return;
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index d356a6d2e57594..51c155767c6c09 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -7,8 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Coroutines/CoroElide.h"
+#include "CoroInstr.h"
#include "CoroInternal.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -32,13 +35,15 @@ static cl::opt<std::string> CoroElideInfoOutputFilename(
#endif
namespace {
+
// Created on demand if the coro-elide pass has work to do.
struct Lowerer : coro::LowererBase {
SmallVector<CoroIdInst *, 4> CoroIds;
SmallVector<CoroBeginInst *, 1> CoroBegins;
SmallVector<CoroAllocInst *, 1> CoroAllocs;
- SmallVector<CoroSubFnInst *, 4> ResumeAddr;
- DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
+ SmallSet<CoroSubFnInst *, 4> ResumeAddr;
+ EquivalenceClasses<CoroBeginInst *> CoroBeginClasses;
+ DenseMap<CoroBeginInst *, SmallSet<CoroSubFnInst *, 4>> DestroyAddr;
SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;
Lowerer(Module &M) : LowererBase(M) {}
@@ -47,24 +52,71 @@ struct Lowerer : coro::LowererBase {
AAResults &AA);
bool shouldElide(Function *F, DominatorTree &DT) const;
void collectPostSplitCoroIds(Function *F);
- bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT,
+ bool processCoroId(Function &F, CoroIdInst *, AAResults &AA, DominatorTree &DT,
OptimizationRemarkEmitter &ORE);
- bool hasEscapePath(const CoroBeginInst *,
+ bool hasEscapePath(Function &F,
+ const CoroBeginInst *,
const SmallPtrSetImpl<BasicBlock *> &) const;
};
+
+struct StackAliases {
+ StackAliases(Function &F) {
+ for (Instruction &I : instructions(F)) {
+ if (auto *Load = dyn_cast<LoadInst>(&I)) {
+ auto Ptr = Load->getPointerOperand();
+ Loads[Ptr].insert(Load);
+ }
+
+ if (auto *Store = dyn_cast<StoreInst>(&I)) {
+ auto Ptr = Store->getPointerOperand();
+ Stores[Ptr].insert(Store);
+ }
+ }
+ }
+
+ bool valueHasSingleStore(llvm::Value *V) const {
+ auto It = Stores.find(V);
+ return It != Stores.end() && It->second.size() == 1;
+ }
+
+ const SmallPtrSetImpl<LoadInst *>& getLoadsByStore(const StoreInst *Store) const {
+ const static SmallPtrSet<LoadInst *, 4> EmptyStores;
+ auto Ptr = Store->getPointerOperand();
+ if (Loads.contains(Ptr)) {
+ return Loads.at(Ptr);
+ }
+
+ return EmptyStores;
+ }
+
+ const SmallPtrSetImpl<StoreInst *>& getStoresByLoad(const LoadInst *Load) const {
+ const static SmallPtrSet<StoreInst *, 4> EmptyLoads;
+ auto Ptr = Load->getPointerOperand();
+ if (Stores.contains(Ptr)) {
+ return Stores.at(Ptr);
+ }
+
+ return EmptyLoads;
+ }
+
+private:
+ DenseMap<llvm::Value *, SmallPtrSet<StoreInst *, 4>> Stores;
+ DenseMap<llvm::Value *, SmallPtrSet<LoadInst *, 4>> Loads;
+};
} // end anonymous namespace
// Go through the list of coro.subfn.addr intrinsics and replace them with the
// provided constant.
+template <typename Range>
static void replaceWithConstant(Constant *Value,
- SmallVectorImpl<CoroSubFnInst *> &Users) {
+ Range &Users) {
if (Users.empty())
return;
// See if we need to bitcast the constant to match the type of the intrinsic
// being replaced. Note: All coro.subfn.addr intrinsics return the same type,
// so we only need to examine the type of the first one in the list.
- Type *IntrTy = Users.front()->getType();
+ Type *IntrTy = (*Users.begin())->getType();
Type *ValueTy = Value->getType();
if (ValueTy != IntrTy) {
// May need to tweak the function type to match the type expected at the
@@ -74,8 +126,11 @@ static void replaceWithConstant(Constant *Value,
}
// Now the value type matches the type of the intrinsic. Replace them all!
- for (CoroSubFnInst *I : Users)
+ for (CoroSubFnInst *I : Users) {
+ llvm::dbgs() << "CSFI: ";
+ I->dump();
replaceAndRecursivelySimplify(I, Value);
+ }
}
// See if any operand of the call instruction references the coroutine frame.
@@ -178,11 +233,15 @@ void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,
removeTailCallAttribute(Frame, AA);
}
-bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
- const SmallPtrSetImpl<BasicBlock *> &TIs) const {
+bool Lowerer::hasEscapePath(
+ Function &F,
+ const CoroBeginInst *CB,
+ const SmallPtrSetImpl<BasicBlock *> &TIs) const {
+
const auto &It = DestroyAddr.find(CB);
assert(It != DestroyAddr.end());
+ StackAliases SA{F};
// Limit the number of blocks we visit.
unsigned Limit = 32 * (1 + It->second.size());
@@ -196,11 +255,20 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
Visited.insert(DA->getParent());
SmallPtrSet<const BasicBlock *, 32> EscapingBBs;
+ SmallPtrSet<const LoadInst *, 1> AliasUsers;
for (auto *U : CB->users()) {
// The use from coroutine intrinsics are not a problem.
if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
continue;
+ if (auto *SI = dyn_cast<StoreInst>(U);
+ SI && SI->getPointerOperand() == U) {
+ for (const auto *Load : SA.getLoadsByStore(SI)) {
+ AliasUsers.insert(Load);
+ }
+ continue;
+ }
+
// Think all other usages may be an escaping candidate conservatively.
//
// Note that the major user of switch ABI coroutine (the C++) will store
@@ -215,6 +283,15 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
EscapingBBs.insert(cast<Instruction>(U)->getParent());
}
+ for (auto *Load : AliasUsers) {
+ for (auto *U : Load->users()) {
+ // The use from coroutine intrinsics are not a problem.
+ if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
+ continue;
+ EscapingBBs.insert(cast<Instruction>(U)->getParent());
+ }
+ }
+
bool PotentiallyEscaped = false;
do {
@@ -285,10 +362,17 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
Terminators.insert(&B);
}
+ SmallPtrSet<CoroBeginInst *, 8> CoroBeginsToTest;
+
+ for (const auto& Class : CoroBeginClasses) {
+ if (Class.isLeader()) {
+ CoroBeginsToTest.insert(Class.getData());
+ }
+ }
// Filter out the coro.destroy that lie along exceptional paths.
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
- for (const auto &It : DestroyAddr) {
+ for (auto *CB : CoroBeginsToTest) {
// If every terminators is dominated by coro.destroy, we could know the
// corresponding coro.begin wouldn't escape.
//
@@ -298,20 +382,35 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
//
// hasEscapePath is relatively slow, so we avoid to run it as much as
// possible.
+ auto It = DestroyAddr.find(CB);
+ if (It == DestroyAddr.end()) {
+ llvm::dbgs() << "has no destroyaddr!\n";
+ continue;
+ }
+
if (llvm::all_of(Terminators,
[&](auto *TI) {
- return llvm::any_of(It.second, [&](auto *DA) {
+ return llvm::any_of(It->second, [&](auto *DA) {
return DT.dominates(DA, TI->getTerminator());
});
}) ||
- !hasEscapePath(It.first, Terminators))
- ReferencedCoroBegins.insert(It.first);
+ !hasEscapePath(*F, CB, Terminators))
+ {
+ ReferencedCoroBegins.insert(CB);
+ } else {
+ llvm::dbgs() << "not referenced\n";
+ }
}
// If size of the set is the same as total number of coro.begin, that means we
// found a coro.free or coro.destroy referencing each coro.begin, so we can
// perform heap elision.
- return ReferencedCoroBegins.size() == CoroBegins.size();
+ llvm::dbgs() << "DestroyAddr: " << DestroyAddr.size()
+ << " ReferencedCoroBegins: " << ReferencedCoroBegins.size()
+ << " CoroBeginsToTest: " << CoroBeginsToTest.size()
+ << "\n";
+
+ return ReferencedCoroBegins.size() == CoroBeginsToTest.size();
}
void Lowerer::collectPostSplitCoroIds(Function *F) {
@@ -338,38 +437,89 @@ void Lowerer::collectPostSplitCoroIds(Function *F) {
}
}
-bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
+static void findReachableCoroSubFnForValue(
+ llvm::Value *V,
+ const StackAliases &SA,
+ SmallSet<CoroSubFnInst *, 4> &Res) {
+
+ if (auto *CSFI = dyn_cast<CoroSubFnInst>(V)) {
+ Res.insert(CSFI);
+ return;
+ }
+
+ if (auto *CB = dyn_cast<CoroBeginInst>(V)) {
+ for (auto *U : CB->users()) {
+ findReachableCoroSubFnForValue(U, SA, Res);
+ }
+ }
+
+ if (auto *Phi = dyn_cast<PHINode>(V)) {
+ for (auto *U : Phi->users()) {
+ findReachableCoroSubFnForValue(U, SA, Res);
+ }
+ }
+
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ for (auto *Load : SA.getLoadsByStore(SI)) {
+ findReachableCoroSubFnForValue(Load, SA, Res);
+ }
+ }
+}
+
+bool Lowerer::processCoroId(Function &F, CoroIdInst *CoroId, AAResults &AA,
DominatorTree &DT, OptimizationRemarkEmitter &ORE) {
CoroBegins.clear();
CoroAllocs.clear();
ResumeAddr.clear();
DestroyAddr.clear();
+ CoroBeginClasses = EquivalenceClasses<CoroBeginInst *>{}; // No clear function.
// Collect all coro.begin and coro.allocs associated with this coro.id.
for (User *U : CoroId->users()) {
- if (auto *CB = dyn_cast<CoroBeginInst>(U))
+ if (auto *CB = dyn_cast<CoroBeginInst>(U)) {
CoroBegins.push_back(CB);
- else if (auto *CA = dyn_cast<CoroAllocInst>(U))
+ CoroBeginClasses.insert(CB);
+ } else if (auto *CA = dyn_cast<CoroAllocInst>(U))
CoroAllocs.push_back(CA);
}
- // Collect all coro.subfn.addrs associated with coro.begin.
- // Note, we only devirtualize the calls if their coro.subfn.addr refers to
- // coro.begin directly. If we run into cases where this check is too
- // conservative, we can consider relaxing the check.
+ // Create Equivalent Classes for CoroBegins, so that multiple begins going to the same Phi Node can be count as one.
for (CoroBeginInst *CB : CoroBegins) {
- for (User *U : CB->users())
- if (auto *II = dyn_cast<CoroSubFnInst>(U))
- switch (II->getIndex()) {
- case CoroSubFnInst::ResumeIndex:
- ResumeAddr.push_back(II);
+ CoroBeginClasses.insert(CB);
+ for (User *U : CB->users()) {
+ if (auto *Phi = dyn_cast<PHINode>(U)) {
+ auto Values = Phi->incoming_values();
+ auto First = cast<CoroBeginInst>(Values.begin());
+ for (auto I = Values.begin(), E = Values.end(); I != E; I++) {
+ // unionSets inserts First if not exist as well.
+ CoroBeginClasses.unionSets(First, cast<CoroBeginInst>(I));
+ }
+ }
+ }
+ }
+
+ StackAliases SA{F};
+
+ // Collect all coro.subfn.addrs associated with coro.begin.
+ for (const auto& Class : CoroBeginClasses) {
+ if (!Class.isLeader()) {
+ continue;
+ }
+ auto Leader = Class.getData();
+ SmallSet<CoroSubFnInst *, 4> Res;
+ findReachableCoroSubFnForValue(Leader, SA, Res);
+ for (auto *CSFI : Res) {
+ switch (CSFI->getIndex()) {
+ case llvm::CoroSubFnInst::ResumeIndex:
+ ResumeAddr.insert(CSFI);
break;
- case CoroSubFnInst::DestroyIndex:
- DestroyAddr[CB].push_back(II);
+ case llvm::CoroSubFnInst::DestroyIndex:
+ DestroyAddr[Leader].insert(CSFI);
break;
default:
llvm_unreachable("unexpected coro.subfn.addr constant");
- }
+ }
+ }
}
// PostSplit coro.id refers to an array of subfunctions in its Info
@@ -383,6 +533,7 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
bool ShouldElide = shouldElide(CoroId->getFunction(), DT);
+ llvm::dbgs() << "ShouldElide " << CoroId->getCoroutine()->getName() << ": " << ShouldElide << "\n";
if (!ShouldElide)
ORE.emit([&]() {
if (auto FrameSizeAndAlign =
@@ -466,7 +617,7 @@ PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) {
bool Changed = false;
for (auto *CII : L.CoroIds)
- Changed |= L.processCoroId(CII, AA, DT, ORE);
+ Changed |= L.processCoroId(F, CII, AA, DT, ORE);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
More information about the cfe-commits
mailing list