[llvm-branch-commits] [llvm] [Coro] Prebuild a global debug info set and share it between all coroutine clones (PR #118628)
Artem Pianykh via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 4 05:16:33 PST 2024
https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118628
>From aa6401c9daa6645456dc26a214aed1253ad98f17 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Tue, 19 Nov 2024 17:19:27 -0700
Subject: [PATCH] [Coro] Prebuild a global debug info set and share it between
all coroutine clones
Summary:
CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building
a list of global debug info metadata. For programs compiled with full debug info this gets very
expensive.
This diff builds the data once and shares it between all clones.
Anecdata for a sample cpp source file compiled with full debug info:
| | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) |
|-----------------+----------+----------------+--------------------------|
| CoroSplitPass | 306ms | 221ms | 68ms |
| CoroCloner | 101ms | 72ms | 0.5ms |
| CollectGlobalDI | - | - | 63ms |
|-----------------+----------+----------------+--------------------------|
| Speed up | 1x | 1.4x | 4.5x |
Note that CollectGlobalDI happens once *per coroutine* rather than per clone.
Test Plan:
ninja check-llvm-unit
ninja check-llvm
Compiled a sample internal source file, checked time trace output for scope timings.
stack-info: PR: https://github.com/llvm/llvm-project/pull/118628, branch: users/artempyanykh/fast-coro-upstream/9
---
llvm/lib/Transforms/Coroutines/CoroCloner.h | 29 ++++++-----
llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 51 +++++++++++++++++---
2 files changed, 62 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index d1887980fb3bcb..e7121d26bd08f3 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,6 +48,7 @@ class BaseCloner {
CloneKind FKind;
IRBuilder<> Builder;
TargetTransformInfo &TTI;
+ const MetadataSetTy &GlobalDebugInfo;
ValueToValueMapTy VMap;
Function *NewF = nullptr;
@@ -60,12 +61,12 @@ class BaseCloner {
/// Create a cloner for a continuation lowering.
BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI)
+ TargetTransformInfo &TTI, const MetadataSetTy &GlobalDebugInfo)
: OrigF(OrigF), Suffix(Suffix), Shape(Shape),
FKind(Shape.ABI == ABI::Async ? CloneKind::Async
: CloneKind::Continuation),
- Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
- ActiveSuspend(ActiveSuspend) {
+ Builder(OrigF.getContext()), TTI(TTI), GlobalDebugInfo(GlobalDebugInfo),
+ NewF(NewF), ActiveSuspend(ActiveSuspend) {
assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
assert(NewF && "need existing function for continuation");
@@ -74,9 +75,11 @@ class BaseCloner {
public:
BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI)
+ CloneKind FKind, TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo)
: OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
- Builder(OrigF.getContext()), TTI(TTI) {}
+ Builder(OrigF.getContext()), TTI(TTI),
+ GlobalDebugInfo(GlobalDebugInfo) {}
virtual ~BaseCloner() {}
@@ -84,12 +87,14 @@ class BaseCloner {
static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, Function *NewF,
AnyCoroSuspendInst *ActiveSuspend,
- TargetTransformInfo &TTI) {
+ TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo) {
assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
Shape.ABI == ABI::Async);
TimeTraceScope FunctionScope("BaseCloner");
- BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
+ BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
+ GlobalDebugInfo);
Cloner.create();
return Cloner.getFunction();
}
@@ -129,8 +134,9 @@ class SwitchCloner : public BaseCloner {
protected:
/// Create a cloner for a switch lowering.
SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
- CloneKind FKind, TargetTransformInfo &TTI)
- : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
+ CloneKind FKind, TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo)
+ : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo) {}
void create() override;
@@ -138,11 +144,12 @@ class SwitchCloner : public BaseCloner {
/// Create a clone for a switch lowering.
static Function *createClone(Function &OrigF, const Twine &Suffix,
coro::Shape &Shape, CloneKind FKind,
- TargetTransformInfo &TTI) {
+ TargetTransformInfo &TTI,
+ const MetadataSetTy &GlobalDebugInfo) {
assert(Shape.ABI == ABI::Switch);
TimeTraceScope FunctionScope("SwitchCloner");
- SwitchCloner Cloner(OrigF, Suffix, Shape, FKind, TTI);
+ SwitchCloner Cloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo);
Cloner.create();
return Cloner.getFunction();
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 3808147fc26009..2803b340bd22e0 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
@@ -77,6 +78,25 @@ using namespace llvm;
#define DEBUG_TYPE "coro-split"
+namespace {
+/// Collect (a known) subset of global debug info metadata potentially used by
+/// the function \p F.
+///
+/// This metadata set can be used to avoid cloning debug info not owned by \p F
+/// and is shared among all potential clones \p F.
+void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
+ TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
+
+ DebugInfoFinder DIFinder;
+ DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
+ F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
+
+ FindDebugInfoToIdentityMap(GlobalDebugInfo,
+ CloneFunctionChangeType::LocalChangesOnly,
+ DIFinder, SPClonedWithinModule);
+}
+} // end anonymous namespace
+
// FIXME:
// Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
// and it is known that other transformations, for example, sanitizers
@@ -891,8 +911,11 @@ void coro::BaseCloner::create() {
auto savedLinkage = NewF->getLinkage();
NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
- CloneFunctionInto(NewF, &OrigF, VMap,
- CloneFunctionChangeType::LocalChangesOnly, Returns);
+ CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
+ CloneFunctionMetadataInto(NewF, &OrigF, VMap, RF_None, nullptr, nullptr,
+ &GlobalDebugInfo);
+ CloneFunctionBodyInto(NewF, &OrigF, VMap, RF_None, Returns, "", nullptr,
+ nullptr, nullptr, &GlobalDebugInfo);
auto &Context = NewF->getContext();
@@ -1374,16 +1397,22 @@ struct SwitchCoroutineSplitter {
TargetTransformInfo &TTI) {
assert(Shape.ABI == coro::ABI::Switch);
+ MetadataSetTy GlobalDebugInfo;
+ collectGlobalDebugInfo(F, GlobalDebugInfo);
+
// Create a resume clone by cloning the body of the original function,
// setting new entry block and replacing coro.suspend an appropriate value
// to force resume or cleanup pass for every suspend point.
createResumeEntryBlock(F, Shape);
auto *ResumeClone = coro::SwitchCloner::createClone(
- F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI);
+ F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI,
+ GlobalDebugInfo);
auto *DestroyClone = coro::SwitchCloner::createClone(
- F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI);
+ F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI,
+ GlobalDebugInfo);
auto *CleanupClone = coro::SwitchCloner::createClone(
- F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI);
+ F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI,
+ GlobalDebugInfo);
postSplitCleanup(*ResumeClone);
postSplitCleanup(*DestroyClone);
@@ -1768,12 +1797,16 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
}
assert(Clones.size() == Shape.CoroSuspends.size());
+
+ MetadataSetTy GlobalDebugInfo;
+ collectGlobalDebugInfo(F, GlobalDebugInfo);
+
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
auto *Suspend = CS;
auto *Clone = Clones[Idx];
coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone,
- Suspend, TTI);
+ Suspend, TTI, GlobalDebugInfo);
}
}
@@ -1899,12 +1932,16 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
}
assert(Clones.size() == Shape.CoroSuspends.size());
+
+ MetadataSetTy GlobalDebugInfo;
+ collectGlobalDebugInfo(F, GlobalDebugInfo);
+
for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
auto Suspend = CS;
auto Clone = Clones[Idx];
coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone,
- Suspend, TTI);
+ Suspend, TTI, GlobalDebugInfo);
}
}
More information about the llvm-branch-commits
mailing list