[llvm-branch-commits] [llvm] [Coro] Prebuild a global debug info set and share it between all coroutine clones (PR #118628)

Artem Pianykh via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Dec 4 05:15:56 PST 2024


https://github.com/artempyanykh created https://github.com/llvm/llvm-project/pull/118628

[Coro] Prebuild a global debug info set and share it between all coroutine clones

Summary:
CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building
a list of global debug info metadata. For programs compiled with full debug info this gets very
expensive.

This diff builds the data once and shares it between all clones.

Anecdata for a sample cpp source file compiled with full debug info:

|                 | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) |
|-----------------+----------+----------------+--------------------------|
| CoroSplitPass   | 306ms    | 221ms          | 68ms                     |
| CoroCloner      | 101ms    | 72ms           | 0.5ms                    |
| CollectGlobalDI | -        | -              | 63ms                     |
|-----------------+----------+----------------+--------------------------|
| Speed up        | 1x       | 1.4x           | 4.5x                     |

Note that CollectGlobalDI happens once *per coroutine* rather than per clone.

Test Plan:
ninja check-llvm-unit
ninja check-llvm

Compiled a sample internal source file, checked time trace output for scope timings.

>From 77d348fbc36ce20f95350c46f9b417e0457ec3c8 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Tue, 19 Nov 2024 17:19:27 -0700
Subject: [PATCH] [Coro] Prebuild a global debug info set and share it between
 all coroutine clones

Summary:
CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building
a list of global debug info metadata. For programs compiled with full debug info this gets very
expensive.

This diff builds the data once and shares it between all clones.

Anecdata for a sample cpp source file compiled with full debug info:

|                 | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) |
|-----------------+----------+----------------+--------------------------|
| CoroSplitPass   | 306ms    | 221ms          | 68ms                     |
| CoroCloner      | 101ms    | 72ms           | 0.5ms                    |
| CollectGlobalDI | -        | -              | 63ms                     |
|-----------------+----------+----------------+--------------------------|
| Speed up        | 1x       | 1.4x           | 4.5x                     |

Note that CollectGlobalDI happens once *per coroutine* rather than per clone.

Test Plan:
ninja check-llvm-unit
ninja check-llvm

Compiled a sample internal source file, checked time trace output for scope timings.
---
 llvm/lib/Transforms/Coroutines/CoroCloner.h  | 29 ++++++-----
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 51 +++++++++++++++++---
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index d1887980fb3bcb..e7121d26bd08f3 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -48,6 +48,7 @@ class BaseCloner {
   CloneKind FKind;
   IRBuilder<> Builder;
   TargetTransformInfo &TTI;
+  const MetadataSetTy &GlobalDebugInfo;
 
   ValueToValueMapTy VMap;
   Function *NewF = nullptr;
@@ -60,12 +61,12 @@ class BaseCloner {
   /// Create a cloner for a continuation lowering.
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
              Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
-             TargetTransformInfo &TTI)
+             TargetTransformInfo &TTI, const MetadataSetTy &GlobalDebugInfo)
       : OrigF(OrigF), Suffix(Suffix), Shape(Shape),
         FKind(Shape.ABI == ABI::Async ? CloneKind::Async
                                       : CloneKind::Continuation),
-        Builder(OrigF.getContext()), TTI(TTI), NewF(NewF),
-        ActiveSuspend(ActiveSuspend) {
+        Builder(OrigF.getContext()), TTI(TTI), GlobalDebugInfo(GlobalDebugInfo),
+        NewF(NewF), ActiveSuspend(ActiveSuspend) {
     assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
            Shape.ABI == ABI::Async);
     assert(NewF && "need existing function for continuation");
@@ -74,9 +75,11 @@ class BaseCloner {
 
 public:
   BaseCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-             CloneKind FKind, TargetTransformInfo &TTI)
+             CloneKind FKind, TargetTransformInfo &TTI,
+             const MetadataSetTy &GlobalDebugInfo)
       : OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
-        Builder(OrigF.getContext()), TTI(TTI) {}
+        Builder(OrigF.getContext()), TTI(TTI),
+        GlobalDebugInfo(GlobalDebugInfo) {}
 
   virtual ~BaseCloner() {}
 
@@ -84,12 +87,14 @@ class BaseCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
                                coro::Shape &Shape, Function *NewF,
                                AnyCoroSuspendInst *ActiveSuspend,
-                               TargetTransformInfo &TTI) {
+                               TargetTransformInfo &TTI,
+                               const MetadataSetTy &GlobalDebugInfo) {
     assert(Shape.ABI == ABI::Retcon || Shape.ABI == ABI::RetconOnce ||
            Shape.ABI == ABI::Async);
     TimeTraceScope FunctionScope("BaseCloner");
 
-    BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
+    BaseCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
+                      GlobalDebugInfo);
     Cloner.create();
     return Cloner.getFunction();
   }
@@ -129,8 +134,9 @@ class SwitchCloner : public BaseCloner {
 protected:
   /// Create a cloner for a switch lowering.
   SwitchCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-               CloneKind FKind, TargetTransformInfo &TTI)
-      : BaseCloner(OrigF, Suffix, Shape, FKind, TTI) {}
+               CloneKind FKind, TargetTransformInfo &TTI,
+               const MetadataSetTy &GlobalDebugInfo)
+      : BaseCloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo) {}
 
   void create() override;
 
@@ -138,11 +144,12 @@ class SwitchCloner : public BaseCloner {
   /// Create a clone for a switch lowering.
   static Function *createClone(Function &OrigF, const Twine &Suffix,
                                coro::Shape &Shape, CloneKind FKind,
-                               TargetTransformInfo &TTI) {
+                               TargetTransformInfo &TTI,
+                               const MetadataSetTy &GlobalDebugInfo) {
     assert(Shape.ABI == ABI::Switch);
     TimeTraceScope FunctionScope("SwitchCloner");
 
-    SwitchCloner Cloner(OrigF, Suffix, Shape, FKind, TTI);
+    SwitchCloner Cloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo);
     Cloner.create();
     return Cloner.getFunction();
   }
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 3808147fc26009..2803b340bd22e0 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -43,6 +43,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GlobalValue.h"
@@ -77,6 +78,25 @@ using namespace llvm;
 
 #define DEBUG_TYPE "coro-split"
 
+namespace {
+/// Collect (a known) subset of global debug info metadata potentially used by
+/// the function \p F.
+///
+/// This metadata set can be used to avoid cloning debug info not owned by \p F
+/// and is shared among all potential clones \p F.
+void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
+  TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
+
+  DebugInfoFinder DIFinder;
+  DISubprogram *SPClonedWithinModule = CollectDebugInfoForCloning(
+      F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
+
+  FindDebugInfoToIdentityMap(GlobalDebugInfo,
+                             CloneFunctionChangeType::LocalChangesOnly,
+                             DIFinder, SPClonedWithinModule);
+}
+} // end anonymous namespace
+
 // FIXME:
 // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
 // and it is known that other transformations, for example, sanitizers
@@ -891,8 +911,11 @@ void coro::BaseCloner::create() {
   auto savedLinkage = NewF->getLinkage();
   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
 
-  CloneFunctionInto(NewF, &OrigF, VMap,
-                    CloneFunctionChangeType::LocalChangesOnly, Returns);
+  CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
+  CloneFunctionMetadataInto(NewF, &OrigF, VMap, RF_None, nullptr, nullptr,
+                            &GlobalDebugInfo);
+  CloneFunctionBodyInto(NewF, &OrigF, VMap, RF_None, Returns, "", nullptr,
+                        nullptr, nullptr, &GlobalDebugInfo);
 
   auto &Context = NewF->getContext();
 
@@ -1374,16 +1397,22 @@ struct SwitchCoroutineSplitter {
                     TargetTransformInfo &TTI) {
     assert(Shape.ABI == coro::ABI::Switch);
 
+    MetadataSetTy GlobalDebugInfo;
+    collectGlobalDebugInfo(F, GlobalDebugInfo);
+
     // Create a resume clone by cloning the body of the original function,
     // setting new entry block and replacing coro.suspend an appropriate value
     // to force resume or cleanup pass for every suspend point.
     createResumeEntryBlock(F, Shape);
     auto *ResumeClone = coro::SwitchCloner::createClone(
-        F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI);
+        F, ".resume", Shape, coro::CloneKind::SwitchResume, TTI,
+        GlobalDebugInfo);
     auto *DestroyClone = coro::SwitchCloner::createClone(
-        F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI);
+        F, ".destroy", Shape, coro::CloneKind::SwitchUnwind, TTI,
+        GlobalDebugInfo);
     auto *CleanupClone = coro::SwitchCloner::createClone(
-        F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI);
+        F, ".cleanup", Shape, coro::CloneKind::SwitchCleanup, TTI,
+        GlobalDebugInfo);
 
     postSplitCleanup(*ResumeClone);
     postSplitCleanup(*DestroyClone);
@@ -1768,12 +1797,16 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape,
   }
 
   assert(Clones.size() == Shape.CoroSuspends.size());
+
+  MetadataSetTy GlobalDebugInfo;
+  collectGlobalDebugInfo(F, GlobalDebugInfo);
+
   for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
     auto *Suspend = CS;
     auto *Clone = Clones[Idx];
 
     coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone,
-                                  Suspend, TTI);
+                                  Suspend, TTI, GlobalDebugInfo);
   }
 }
 
@@ -1899,12 +1932,16 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape,
   }
 
   assert(Clones.size() == Shape.CoroSuspends.size());
+
+  MetadataSetTy GlobalDebugInfo;
+  collectGlobalDebugInfo(F, GlobalDebugInfo);
+
   for (auto [Idx, CS] : llvm::enumerate(Shape.CoroSuspends)) {
     auto Suspend = CS;
     auto Clone = Clones[Idx];
 
     coro::BaseCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone,
-                                  Suspend, TTI);
+                                  Suspend, TTI, GlobalDebugInfo);
   }
 }
 



More information about the llvm-branch-commits mailing list