[llvm] [Coro] Amortize debug info processing cost in CoroSplit (PR #109032)

Artem Pianykh via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 12:20:59 PDT 2024


https://github.com/artempyanykh created https://github.com/llvm/llvm-project/pull/109032

More details about this stack are [in this topic on discourse](https://discourse.llvm.org/t/rfc-amortizing-debug-info-processing-cost-in-corosplit/81275). 

TL;DR: In large modules with a bunch of coroutines the cost of debug info metadata processing can get very high (e.g. adding over a minute to the compile time). This stack does some refactoring around function cloning and caches some module-level calculations to speed up the pass.

Anecdata for a sample C++ source file:
||Baseline / 0|[IdentityMD set](https://github.com/llvm/llvm-project/commit/d7f598cc0f86e62838bd02fe5217d6eb58152956) |[Prebuilt GlobalDI](https://github.com/llvm/llvm-project/commit/e14a46f5224f0d974ad7dc5237cd4b9f82bb991b) |[Cached CU DIFinder](https://github.com/llvm/llvm-project/commit/4c76540db1990a49b6126de83c7ccd12e28e8cb0) |
| --- | --- | --- | --- | --- |
|CoroSplitPass|306ms|221ms|68ms|17ms|
|CoroCloner|101ms|72ms|63ms|0.5ms|
|CollectGlobalDI|-|-|63ms|13ms|
|Overall speed up |1x|1.4x|4.5x|18x| 

Each commit is individually buildable and reviewable. I can extract them into separate PRs if the overall direction makes sense.  

>From a98768822650b2f3a076d48a463e9cfb543197db Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 05:30:29 -0700
Subject: [PATCH 01/14] [NFC][Coro] Add helpers for coro cloning with a
 TimeTraceScope

Summary:
A helper (2 overloads) that consolidates corocloner creation and the actual cloning.
The helpers create a TimeTraceScope to make it easier to see how long the cloning takes.

Test Plan:
ninja check-llvm-unit
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 58 +++++++++++++-------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 382bdfff1926f7..387b1e6942f186 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -61,6 +61,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -116,7 +117,6 @@ class CoroCloner {
 
   TargetTransformInfo &TTI;
 
-public:
   /// Create a cloner for a switch lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
              Kind FKind, TargetTransformInfo &TTI)
@@ -138,6 +138,30 @@ class CoroCloner {
     assert(ActiveSuspend && "need active suspend point for continuation");
   }
 
+public:
+  /// Create a clone for a switch lowering.
+  static Function *createClone(Function &OrigF, const Twine &Suffix,
+                               coro::Shape &Shape, Kind FKind,
+                               TargetTransformInfo &TTI) {
+    TimeTraceScope FunctionScope("CoroCloner");
+
+    CoroCloner Cloner(OrigF, Suffix, Shape, FKind, TTI);
+    Cloner.create();
+    return Cloner.getFunction();
+  }
+
+  /// Create a clone for a continuation lowering.
+  static Function *createClone(Function &OrigF, const Twine &Suffix,
+                               coro::Shape &Shape, Function *NewF,
+                               AnyCoroSuspendInst *ActiveSuspend,
+                               TargetTransformInfo &TTI) {
+    TimeTraceScope FunctionScope("CoroCloner");
+
+    CoroCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
+    Cloner.create();
+    return Cloner.getFunction();
+  }
+
   Function *getFunction() const {
     assert(NewF != nullptr && "declaration not yet set");
     return NewF;
@@ -1464,13 +1488,16 @@ struct SwitchCoroutineSplitter {
                     TargetTransformInfo &TTI) {
     assert(Shape.ABI == coro::ABI::Switch);
 
+    // Create a resume clone by cloning the body of the original function,
+    // setting new entry block and replacing coro.suspend an appropriate value
+    // to force resume or cleanup pass for every suspend point.
     createResumeEntryBlock(F, Shape);
-    auto *ResumeClone =
-        createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
-    auto *DestroyClone =
-        createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
-    auto *CleanupClone =
-        createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
+    auto *ResumeClone = CoroCloner::createClone(
+        F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
+    auto *DestroyClone = CoroCloner::createClone(
+        F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
+    auto *CleanupClone = CoroCloner::createClone(
+        F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
 
     postSplitCleanup(*ResumeClone);
     postSplitCleanup(*DestroyClone);
@@ -1560,17 +1587,6 @@ struct SwitchCoroutineSplitter {
   }
 
 private:
-  // Create a resume clone by cloning the body of the original function, setting
-  // new entry block and replacing coro.suspend an appropriate value to force
-  // resume or cleanup pass for every suspend point.
-  static Function *createClone(Function &F, const Twine &Suffix,
-                               coro::Shape &Shape, CoroCloner::Kind FKind,
-                               TargetTransformInfo &TTI) {
-    CoroCloner Cloner(F, Suffix, Shape, FKind, TTI);
-    Cloner.create();
-    return Cloner.getFunction();
-  }
-
   // Create an entry block for a resume function with a switch that will jump to
   // suspend points.
   static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
@@ -1870,7 +1886,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
     auto *Suspend = Shape.CoroSuspends[Idx];
     auto *Clone = Clones[Idx];
 
-    CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create();
+    CoroCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend,
+                            TTI);
   }
 }
 
@@ -1999,7 +2016,8 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
     auto Suspend = Shape.CoroSuspends[i];
     auto Clone = Clones[i];
 
-    CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create();
+    CoroCloner::createClone(F, "resume." + Twine(i), Shape, Clone, Suspend,
+                            TTI);
   }
 }
 

>From 3cb662c59a2043f2ba3bcf28089fd5b78a413f30 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 09:22:34 -0700
Subject: [PATCH 02/14] [NFC][Utils] Extract CloneFunctionAttributesInto from
 CloneFunctionInto

---
 llvm/include/llvm/Transforms/Utils/Cloning.h |  6 +++
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 53 ++++++++++++--------
 2 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index a4be24e32c5279..6145fcea8351b8 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -175,6 +175,12 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        ValueMapTypeRemapper *TypeMapper = nullptr,
                        ValueMaterializer *Materializer = nullptr);
 
+void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
+                                 ValueToValueMapTy &VMap,
+                                 bool ModuleLevelChanges,
+                                 ValueMapTypeRemapper *TypeMapper = nullptr,
+                                 ValueMaterializer *Materializer = nullptr);
+
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const Instruction *StartingInst,
                                ValueToValueMapTy &VMap, bool ModuleLevelChanges,
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index dc9ca1423f3e79..f986164d03c2bb 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -86,28 +86,14 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
   return NewBB;
 }
 
-// Clone OldFunc into NewFunc, transforming the old arguments into references to
-// VMap values.
-//
-void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
-                             ValueToValueMapTy &VMap,
-                             CloneFunctionChangeType Changes,
-                             SmallVectorImpl<ReturnInst *> &Returns,
-                             const char *NameSuffix, ClonedCodeInfo *CodeInfo,
-                             ValueMapTypeRemapper *TypeMapper,
-                             ValueMaterializer *Materializer) {
-  NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat);
-  assert(NameSuffix && "NameSuffix cannot be null!");
-
-#ifndef NDEBUG
-  for (const Argument &I : OldFunc->args())
-    assert(VMap.count(&I) && "No mapping from source argument specified!");
-#endif
-
-  bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
-
-  // Copy all attributes other than those stored in the AttributeList.  We need
-  // to remap the parameter indices of the AttributeList.
+// Copy all attributes other than those stored in the AttributeList.  We need
+// to remap the parameter indices of the AttributeList.
+void llvm::CloneFunctionAttributesInto(Function *NewFunc,
+                                       const Function *OldFunc,
+                                       ValueToValueMapTy &VMap,
+                                       bool ModuleLevelChanges,
+                                       ValueMapTypeRemapper *TypeMapper,
+                                       ValueMaterializer *Materializer) {
   AttributeList NewAttrs = NewFunc->getAttributes();
   NewFunc->copyAttributesFrom(OldFunc);
   NewFunc->setAttributes(NewAttrs);
@@ -147,6 +133,29 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   NewFunc->setAttributes(
       AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(),
                          OldAttrs.getRetAttrs(), NewArgAttrs));
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                             ValueToValueMapTy &VMap,
+                             CloneFunctionChangeType Changes,
+                             SmallVectorImpl<ReturnInst *> &Returns,
+                             const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+                             ValueMapTypeRemapper *TypeMapper,
+                             ValueMaterializer *Materializer) {
+  NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat);
+  assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+  for (const Argument &I : OldFunc->args())
+    assert(VMap.count(&I) && "No mapping from source argument specified!");
+#endif
+
+  bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
+
+  CloneFunctionAttributesInto(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+                              TypeMapper, Materializer);
 
   // Everything else beyond this point deals with function instructions,
   // so if we are dealing with a function declaration, we're done.

>From 7b6c39f8c342f27200f466f46d8ddadf286c8abc Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 14:55:28 -0700
Subject: [PATCH 03/14] [Utils] Extract ProcessSubprogramAttachment from
 CloneFunctionInto

Summary:
Consolidate the logic in a single function. We do an extra pass over Instructions but this is
necessary to untangle things and extract metadata cloning in a future diff.

Test Plan:
ninja check-llvm-unit
---
 llvm/include/llvm/Transforms/Utils/Cloning.h |  5 ++
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 58 +++++++++++++-------
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 6145fcea8351b8..1a3e99241c2215 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -205,6 +205,11 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                const char *NameSuffix = "",
                                ClonedCodeInfo *CodeInfo = nullptr);
 
+/// Process debug information from function's subprogram attachment.
+DISubprogram *ProcessSubprogramAttachment(const Function &F,
+                                          CloneFunctionChangeType Changes,
+                                          DebugInfoFinder &DIFinder);
+
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f986164d03c2bb..4c99c26e52fe17 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -135,6 +135,29 @@ void llvm::CloneFunctionAttributesInto(Function *NewFunc,
                          OldAttrs.getRetAttrs(), NewArgAttrs));
 }
 
+DISubprogram *llvm::ProcessSubprogramAttachment(const Function &F,
+                                                CloneFunctionChangeType Changes,
+                                                DebugInfoFinder &DIFinder) {
+  DISubprogram *SPClonedWithinModule = nullptr;
+  if (Changes < CloneFunctionChangeType::DifferentModule) {
+    SPClonedWithinModule = F.getSubprogram();
+  }
+  if (SPClonedWithinModule)
+    DIFinder.processSubprogram(SPClonedWithinModule);
+
+  const Module *M = F.getParent();
+  if (Changes != CloneFunctionChangeType::ClonedModule && M) {
+    // Inspect instructions to process e.g. DILexicalBlocks of inlined functions
+    for (const auto &BB : F) {
+      for (const auto &I : BB) {
+        DIFinder.processInstruction(*M, I);
+      }
+    }
+  }
+
+  return SPClonedWithinModule;
+}
+
 // Clone OldFunc into NewFunc, transforming the old arguments into references to
 // VMap values.
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
@@ -167,23 +190,19 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // duplicate instructions and then freeze them in the MD map. We also record
   // information about dbg.value and dbg.declare to avoid duplicating the
   // types.
-  std::optional<DebugInfoFinder> DIFinder;
+  DebugInfoFinder DIFinder;
 
   // Track the subprogram attachment that needs to be cloned to fine-tune the
   // mapping within the same module.
-  DISubprogram *SPClonedWithinModule = nullptr;
   if (Changes < CloneFunctionChangeType::DifferentModule) {
+    // Need to find subprograms, types, and compile units.
+
     assert((NewFunc->getParent() == nullptr ||
             NewFunc->getParent() == OldFunc->getParent()) &&
            "Expected NewFunc to have the same parent, or no parent");
-
-    // Need to find subprograms, types, and compile units.
-    DIFinder.emplace();
-
-    SPClonedWithinModule = OldFunc->getSubprogram();
-    if (SPClonedWithinModule)
-      DIFinder->processSubprogram(SPClonedWithinModule);
   } else {
+    // Need to find all the compile units.
+
     assert((NewFunc->getParent() == nullptr ||
             NewFunc->getParent() != OldFunc->getParent()) &&
            "Expected NewFunc to have different parents, or no parent");
@@ -192,19 +211,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
       assert(NewFunc->getParent() &&
              "Need parent of new function to maintain debug info invariants");
 
-      // Need to find all the compile units.
-      DIFinder.emplace();
     }
   }
 
+  DISubprogram *SPClonedWithinModule =
+      ProcessSubprogramAttachment(*OldFunc, Changes, DIFinder);
+
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of
   // recursive functions into themselves.
   for (const BasicBlock &BB : *OldFunc) {
 
     // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
-                                      DIFinder ? &*DIFinder : nullptr);
+    BasicBlock *CBB =
+        CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, nullptr);
 
     // Add basic block mapping.
     VMap[&BB] = CBB;
@@ -227,7 +247,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   }
 
   if (Changes < CloneFunctionChangeType::DifferentModule &&
-      DIFinder->subprogram_count() > 0) {
+      DIFinder.subprogram_count() > 0) {
     // Turn on module-level changes, since we need to clone (some of) the
     // debug info metadata.
     //
@@ -242,7 +262,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 
     // Avoid cloning types, compile units, and (other) subprograms.
     SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
-    for (DISubprogram *ISP : DIFinder->subprograms()) {
+    for (DISubprogram *ISP : DIFinder.subprograms()) {
       if (ISP != SPClonedWithinModule) {
         mapToSelfIfNew(ISP);
         MappedToSelfSPs.insert(ISP);
@@ -250,16 +270,16 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     }
 
     // If a subprogram isn't going to be cloned skip its lexical blocks as well.
-    for (DIScope *S : DIFinder->scopes()) {
+    for (DIScope *S : DIFinder.scopes()) {
       auto *LScope = dyn_cast<DILocalScope>(S);
       if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
         mapToSelfIfNew(S);
     }
 
-    for (DICompileUnit *CU : DIFinder->compile_units())
+    for (DICompileUnit *CU : DIFinder.compile_units())
       mapToSelfIfNew(CU);
 
-    for (DIType *Type : DIFinder->types())
+    for (DIType *Type : DIFinder.types())
       mapToSelfIfNew(Type);
   } else {
     assert(!SPClonedWithinModule &&
@@ -313,7 +333,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   SmallPtrSet<const void *, 8> Visited;
   for (auto *Operand : NMD->operands())
     Visited.insert(Operand);
-  for (auto *Unit : DIFinder->compile_units()) {
+  for (auto *Unit : DIFinder.compile_units()) {
     MDNode *MappedUnit =
         MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);
     if (Visited.insert(MappedUnit).second)

>From a611c5e591f887a83f8be392d553554da646e1ee Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 15:01:05 -0700
Subject: [PATCH 04/14] [NFC][Utils] Remove DebugInfoFinder parameter from
 CloneBasicBlock

Summary:
There was a single usage of CloneBasicBlock with non-default DebugInfoFinder inside
CloneFunctionInto which has been refactored in the previous diff.

Let's remove the parameter to keep the scope of the function more focused.

Test Plan:
ninja check-llvm-unit
---
 llvm/include/llvm/Transforms/Utils/Cloning.h |  3 +--
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 10 ++--------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 1a3e99241c2215..4a869ea56dd4ad 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -119,8 +119,7 @@ struct ClonedCodeInfo {
 /// parameter.
 BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
                             const Twine &NameSuffix = "", Function *F = nullptr,
-                            ClonedCodeInfo *CodeInfo = nullptr,
-                            DebugInfoFinder *DIFinder = nullptr);
+                            ClonedCodeInfo *CodeInfo = nullptr);
 
 /// Return a copy of the specified function and add it to that
 /// function's module.  Also, any references specified in the VMap are changed
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 4c99c26e52fe17..76b3412915200f 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -43,21 +43,16 @@ using namespace llvm;
 /// See comments in Cloning.h.
 BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
                                   const Twine &NameSuffix, Function *F,
-                                  ClonedCodeInfo *CodeInfo,
-                                  DebugInfoFinder *DIFinder) {
+                                  ClonedCodeInfo *CodeInfo) {
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
   NewBB->IsNewDbgInfoFormat = BB->IsNewDbgInfoFormat;
   if (BB->hasName())
     NewBB->setName(BB->getName() + NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasMemProfMetadata = false;
-  Module *TheModule = F ? F->getParent() : nullptr;
 
   // Loop over all instructions, and copy them over.
   for (const Instruction &I : *BB) {
-    if (DIFinder && TheModule)
-      DIFinder->processInstruction(*TheModule, I);
-
     Instruction *NewInst = I.clone();
     if (I.hasName())
       NewInst->setName(I.getName() + NameSuffix);
@@ -223,8 +218,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   for (const BasicBlock &BB : *OldFunc) {
 
     // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB =
-        CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, nullptr);
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
 
     // Add basic block mapping.
     VMap[&BB] = CBB;

>From 06c10b12d346e596dfdaecefccbf89baee541240 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 15:09:44 -0700
Subject: [PATCH 05/14] [NFC][Utils] Clone basic blocks after we're done with
 metadata in CloneFunctionInto

Summary:
This makes the flow of the function a bit more straightforward and makes it easier to extract more
into helper functions.

Test Plan:
ninja check-llvm-unit
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 56 ++++++++++-----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 76b3412915200f..08f85bd54fd588 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -212,34 +212,6 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   DISubprogram *SPClonedWithinModule =
       ProcessSubprogramAttachment(*OldFunc, Changes, DIFinder);
 
-  // Loop over all of the basic blocks in the function, cloning them as
-  // appropriate.  Note that we save BE this way in order to handle cloning of
-  // recursive functions into themselves.
-  for (const BasicBlock &BB : *OldFunc) {
-
-    // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
-
-    // Add basic block mapping.
-    VMap[&BB] = CBB;
-
-    // It is only legal to clone a function if a block address within that
-    // function is never referenced outside of the function.  Given that, we
-    // want to map block addresses from the old function to block addresses in
-    // the clone. (This is different from the generic ValueMapper
-    // implementation, which generates an invalid blockaddress when
-    // cloning a function.)
-    if (BB.hasAddressTaken()) {
-      Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
-                                              const_cast<BasicBlock *>(&BB));
-      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
-    }
-
-    // Note return instructions for the caller.
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
-      Returns.push_back(RI);
-  }
-
   if (Changes < CloneFunctionChangeType::DifferentModule &&
       DIFinder.subprogram_count() > 0) {
     // Turn on module-level changes, since we need to clone (some of) the
@@ -291,6 +263,34 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                                 TypeMapper, Materializer));
   }
 
+  // Loop over all of the basic blocks in the function, cloning them as
+  // appropriate.  Note that we save BE this way in order to handle cloning of
+  // recursive functions into themselves.
+  for (const BasicBlock &BB : *OldFunc) {
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+
+    // Add basic block mapping.
+    VMap[&BB] = CBB;
+
+    // It is only legal to clone a function if a block address within that
+    // function is never referenced outside of the function.  Given that, we
+    // want to map block addresses from the old function to block addresses in
+    // the clone. (This is different from the generic ValueMapper
+    // implementation, which generates an invalid blockaddress when
+    // cloning a function.)
+    if (BB.hasAddressTaken()) {
+      Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
+                                              const_cast<BasicBlock *>(&BB));
+      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+    }
+
+    // Note return instructions for the caller.
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+      Returns.push_back(RI);
+  }
+
   // Loop over all of the instructions in the new function, fixing up operand
   // references as we go. This uses VMap to do all the hard work.
   for (Function::iterator

>From fad2d5ecfc9380ecbc852c05aee30e2dc38c39fe Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 15:23:43 -0700
Subject: [PATCH 06/14] [NFC][Utils] Extract BuildDebugInfoMDMap from
 CloneFunctionInto

Summary:
Extract the logic to build up a metadta map to use in metadata cloning into a separate function.

Test Plan:
ninja check-llvm-unit
---
 llvm/include/llvm/IR/ValueMap.h              |  4 +-
 llvm/include/llvm/Transforms/Utils/Cloning.h |  7 ++
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 88 +++++++++++---------
 3 files changed, 59 insertions(+), 40 deletions(-)

diff --git a/llvm/include/llvm/IR/ValueMap.h b/llvm/include/llvm/IR/ValueMap.h
index d12d639aaa8886..fd4c08492e0124 100644
--- a/llvm/include/llvm/IR/ValueMap.h
+++ b/llvm/include/llvm/IR/ValueMap.h
@@ -79,6 +79,9 @@ struct ValueMapConfig {
   static mutex_type *getMutex(const ExtraDataT &/*Data*/) { return nullptr; }
 };
 
+/// This type stores Metadata. Used in ValueMap.
+using MDMapT = DenseMap<const Metadata *, TrackingMDRef>;
+
 /// See the file comment.
 template<typename KeyT, typename ValueT, typename Config =ValueMapConfig<KeyT>>
 class ValueMap {
@@ -86,7 +89,6 @@ class ValueMap {
 
   using ValueMapCVH = ValueMapCallbackVH<KeyT, ValueT, Config>;
   using MapT = DenseMap<ValueMapCVH, ValueT, DenseMapInfo<ValueMapCVH>>;
-  using MDMapT = DenseMap<const Metadata *, TrackingMDRef>;
   using ExtraData = typename Config::ExtraData;
 
   MapT Map;
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 4a869ea56dd4ad..55733c18f7dec0 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -209,6 +209,13 @@ DISubprogram *ProcessSubprogramAttachment(const Function &F,
                                           CloneFunctionChangeType Changes,
                                           DebugInfoFinder &DIFinder);
 
+/// Build a map of debug info to use during Metadata cloning.
+/// Returns true if cloning would need module level changes and false if there
+/// would only be local changes.
+bool BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
+                         DebugInfoFinder &DIFinder,
+                         DISubprogram *SPClonedWithinModule);
+
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
 class InlineFunctionInfo {
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 08f85bd54fd588..9f7a24fe301b57 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -153,6 +153,53 @@ DISubprogram *llvm::ProcessSubprogramAttachment(const Function &F,
   return SPClonedWithinModule;
 }
 
+bool llvm::BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
+                               DebugInfoFinder &DIFinder,
+                               DISubprogram *SPClonedWithinModule) {
+  bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
+  if (Changes < CloneFunctionChangeType::DifferentModule &&
+      DIFinder.subprogram_count() > 0) {
+    // Turn on module-level changes, since we need to clone (some of) the
+    // debug info metadata.
+    //
+    // FIXME: Metadata effectively owned by a function should be made
+    // local, and only that local metadata should be cloned.
+    ModuleLevelChanges = true;
+
+    auto mapToSelfIfNew = [&MD](MDNode *N) {
+      // Avoid clobbering an existing mapping.
+      (void)MD.try_emplace(N, N);
+    };
+
+    // Avoid cloning types, compile units, and (other) subprograms.
+    SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
+    for (DISubprogram *ISP : DIFinder.subprograms()) {
+      if (ISP != SPClonedWithinModule) {
+        mapToSelfIfNew(ISP);
+        MappedToSelfSPs.insert(ISP);
+      }
+    }
+
+    // If a subprogram isn't going to be cloned skip its lexical blocks as well.
+    for (DIScope *S : DIFinder.scopes()) {
+      auto *LScope = dyn_cast<DILocalScope>(S);
+      if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
+        mapToSelfIfNew(S);
+    }
+
+    for (DICompileUnit *CU : DIFinder.compile_units())
+      mapToSelfIfNew(CU);
+
+    for (DIType *Type : DIFinder.types())
+      mapToSelfIfNew(Type);
+  } else {
+    assert(!SPClonedWithinModule &&
+           "Subprogram should be in DIFinder->subprogram_count()...");
+  }
+
+  return ModuleLevelChanges;
+}
+
 // Clone OldFunc into NewFunc, transforming the old arguments into references to
 // VMap values.
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
@@ -212,45 +259,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   DISubprogram *SPClonedWithinModule =
       ProcessSubprogramAttachment(*OldFunc, Changes, DIFinder);
 
-  if (Changes < CloneFunctionChangeType::DifferentModule &&
-      DIFinder.subprogram_count() > 0) {
-    // Turn on module-level changes, since we need to clone (some of) the
-    // debug info metadata.
-    //
-    // FIXME: Metadata effectively owned by a function should be made
-    // local, and only that local metadata should be cloned.
-    ModuleLevelChanges = true;
-
-    auto mapToSelfIfNew = [&VMap](MDNode *N) {
-      // Avoid clobbering an existing mapping.
-      (void)VMap.MD().try_emplace(N, N);
-    };
-
-    // Avoid cloning types, compile units, and (other) subprograms.
-    SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
-    for (DISubprogram *ISP : DIFinder.subprograms()) {
-      if (ISP != SPClonedWithinModule) {
-        mapToSelfIfNew(ISP);
-        MappedToSelfSPs.insert(ISP);
-      }
-    }
-
-    // If a subprogram isn't going to be cloned skip its lexical blocks as well.
-    for (DIScope *S : DIFinder.scopes()) {
-      auto *LScope = dyn_cast<DILocalScope>(S);
-      if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
-        mapToSelfIfNew(S);
-    }
-
-    for (DICompileUnit *CU : DIFinder.compile_units())
-      mapToSelfIfNew(CU);
-
-    for (DIType *Type : DIFinder.types())
-      mapToSelfIfNew(Type);
-  } else {
-    assert(!SPClonedWithinModule &&
-           "Subprogram should be in DIFinder->subprogram_count()...");
-  }
+  ModuleLevelChanges =
+      BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
 
   const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
   // Duplicate the metadata that is attached to the cloned function.

>From 53d82851a06c5305274055ea502df7a8872de0e3 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 15:35:38 -0700
Subject: [PATCH 07/14] [NFC][Utils] Extract CloneFunctionMetadataInto from
 CloneFunctionInto

Summary:
One potentially contentious point in the new function's API is that it expects the caller ot
populate the VMap rather than doing metadata cloning wholesale inside it.

We'll need it this way for a subsequent change.

Test Plan:
ninja check-llvm-unit
---
 llvm/include/llvm/Transforms/Utils/Cloning.h | 12 +++++++++
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 28 +++++++++++++-------
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 55733c18f7dec0..e9f33f7f1311f8 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -180,6 +180,18 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
                                  ValueMapTypeRemapper *TypeMapper = nullptr,
                                  ValueMaterializer *Materializer = nullptr);
 
+/// Clone OldFunc's metadata into NewFunc.
+///
+/// The caller is expected to populate \p VMap beforehand and set an appropriate
+/// \p RemapFlag.
+///
+/// NOTE: This function doesn't clone !llvm.dbg.cu when cloning into a different
+/// module. Use CloneFunctionInto for that behavior.
+void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
+                               ValueToValueMapTy &VMap, RemapFlags RemapFlag,
+                               ValueMapTypeRemapper *TypeMapper = nullptr,
+                               ValueMaterializer *Materializer = nullptr);
+
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const Instruction *StartingInst,
                                ValueToValueMapTy &VMap, bool ModuleLevelChanges,
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 9f7a24fe301b57..766722f302ef21 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -200,6 +200,22 @@ bool llvm::BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
   return ModuleLevelChanges;
 }
 
+void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
+                                     ValueToValueMapTy &VMap,
+                                     RemapFlags RemapFlag,
+                                     ValueMapTypeRemapper *TypeMapper,
+                                     ValueMaterializer *Materializer) {
+  // Duplicate the metadata that is attached to the cloned function.
+  // Subprograms/CUs/types that were already mapped to themselves won't be
+  // duplicated.
+  SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+  OldFunc->getAllMetadata(MDs);
+  for (auto MD : MDs) {
+    NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
+                                                TypeMapper, Materializer));
+  }
+}
+
 // Clone OldFunc into NewFunc, transforming the old arguments into references to
 // VMap values.
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
@@ -263,15 +279,9 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
       BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
 
   const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
-  // Duplicate the metadata that is attached to the cloned function.
-  // Subprograms/CUs/types that were already mapped to themselves won't be
-  // duplicated.
-  SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
-  OldFunc->getAllMetadata(MDs);
-  for (auto MD : MDs) {
-    NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
-                                                TypeMapper, Materializer));
-  }
+
+  CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper,
+                            Materializer);
 
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of

>From b2682005cb30f83809808ec06d54839424c3477f Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Thu, 12 Sep 2024 15:50:25 -0700
Subject: [PATCH 08/14] [NFC][Utils] Extract CloneFunctionBodyInto from
 CloneFunctionInto

Summary:
This and previously extracted functions will be used in a later diff.

Test Plan:
ninja check-llvm-unit
---
 llvm/include/llvm/Transforms/Utils/Cloning.h |  9 ++
 llvm/lib/Transforms/Utils/CloneFunction.cpp  | 96 +++++++++++---------
 2 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index e9f33f7f1311f8..151b875d005a2f 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -192,6 +192,15 @@ void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
                                ValueMapTypeRemapper *TypeMapper = nullptr,
                                ValueMaterializer *Materializer = nullptr);
 
+/// Clone OldFunc's body NewFunct.
+void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
+                           ValueToValueMapTy &VMap, RemapFlags RemapFlag,
+                           SmallVectorImpl<ReturnInst *> &Returns,
+                           const char *NameSuffix = "",
+                           ClonedCodeInfo *CodeInfo = nullptr,
+                           ValueMapTypeRemapper *TypeMapper = nullptr,
+                           ValueMaterializer *Materializer = nullptr);
+
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const Instruction *StartingInst,
                                ValueToValueMapTy &VMap, bool ModuleLevelChanges,
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 766722f302ef21..24e5e2f3aa66df 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -216,6 +216,59 @@ void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
   }
 }
 
+void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
+                                 ValueToValueMapTy &VMap, RemapFlags RemapFlag,
+                                 SmallVectorImpl<ReturnInst *> &Returns,
+                                 const char *NameSuffix,
+                                 ClonedCodeInfo *CodeInfo,
+                                 ValueMapTypeRemapper *TypeMapper,
+                                 ValueMaterializer *Materializer) {
+  if (OldFunc->isDeclaration())
+    return;
+
+  // Loop over all of the basic blocks in the function, cloning them as
+  // appropriate.  Note that we save BE this way in order to handle cloning of
+  // recursive functions into themselves.
+  for (const BasicBlock &BB : *OldFunc) {
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+
+    // Add basic block mapping.
+    VMap[&BB] = CBB;
+
+    // It is only legal to clone a function if a block address within that
+    // function is never referenced outside of the function.  Given that, we
+    // want to map block addresses from the old function to block addresses in
+    // the clone. (This is different from the generic ValueMapper
+    // implementation, which generates an invalid blockaddress when
+    // cloning a function.)
+    if (BB.hasAddressTaken()) {
+      Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
+                                              const_cast<BasicBlock *>(&BB));
+      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
+    }
+
+    // Note return instructions for the caller.
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+      Returns.push_back(RI);
+  }
+
+  // Loop over all of the instructions in the new function, fixing up operand
+  // references as we go. This uses VMap to do all the hard work.
+  for (Function::iterator
+           BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+           BE = NewFunc->end();
+       BB != BE; ++BB)
+    // Loop over all instructions, fixing each one as we find it, and any
+    // attached debug-info records.
+    for (Instruction &II : *BB) {
+      RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+      RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
+                          RemapFlag, TypeMapper, Materializer);
+    }
+}
+
 // Clone OldFunc into NewFunc, transforming the old arguments into references to
 // VMap values.
 void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
@@ -283,47 +336,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper,
                             Materializer);
 
-  // Loop over all of the basic blocks in the function, cloning them as
-  // appropriate.  Note that we save BE this way in order to handle cloning of
-  // recursive functions into themselves.
-  for (const BasicBlock &BB : *OldFunc) {
-
-    // Create a new basic block and copy instructions into it!
-    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
-
-    // Add basic block mapping.
-    VMap[&BB] = CBB;
-
-    // It is only legal to clone a function if a block address within that
-    // function is never referenced outside of the function.  Given that, we
-    // want to map block addresses from the old function to block addresses in
-    // the clone. (This is different from the generic ValueMapper
-    // implementation, which generates an invalid blockaddress when
-    // cloning a function.)
-    if (BB.hasAddressTaken()) {
-      Constant *OldBBAddr = BlockAddress::get(const_cast<Function *>(OldFunc),
-                                              const_cast<BasicBlock *>(&BB));
-      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
-    }
-
-    // Note return instructions for the caller.
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
-      Returns.push_back(RI);
-  }
-
-  // Loop over all of the instructions in the new function, fixing up operand
-  // references as we go. This uses VMap to do all the hard work.
-  for (Function::iterator
-           BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
-           BE = NewFunc->end();
-       BB != BE; ++BB)
-    // Loop over all instructions, fixing each one as we find it, and any
-    // attached debug-info records.
-    for (Instruction &II : *BB) {
-      RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
-      RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
-                          RemapFlag, TypeMapper, Materializer);
-    }
+  CloneFunctionBodyInto(NewFunc, OldFunc, VMap, RemapFlag, Returns, NameSuffix,
+                        CodeInfo, TypeMapper, Materializer);
 
   // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
   // same module, the compile unit will already be listed (or not). When

>From a6cba599a55cd7e89a7eaf438541601a7a037142 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sat, 14 Sep 2024 16:02:51 -0700
Subject: [PATCH 09/14] [Utils] Eliminate DISubprogram set from
 BuildDebugInfoMDMap

Summary:
Previously, we'd add all SPs distinct from the cloned one into a set. Then when cloning a local
scope we'd check if it's from one of those 'distinct' SPs by checking if it's in the set.

We don't need to do that. We can just check against the cloned SP directly and drop the set.

Test Plan:
ninja check-llvm-unit
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 24e5e2f3aa66df..971d869bd05bfc 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -172,18 +172,15 @@ bool llvm::BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
     };
 
     // Avoid cloning types, compile units, and (other) subprograms.
-    SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
     for (DISubprogram *ISP : DIFinder.subprograms()) {
-      if (ISP != SPClonedWithinModule) {
+      if (ISP != SPClonedWithinModule)
         mapToSelfIfNew(ISP);
-        MappedToSelfSPs.insert(ISP);
-      }
     }
 
     // If a subprogram isn't going to be cloned skip its lexical blocks as well.
     for (DIScope *S : DIFinder.scopes()) {
       auto *LScope = dyn_cast<DILocalScope>(S);
-      if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
+      if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
         mapToSelfIfNew(S);
     }
 

>From 0eefa6ac2e6f019b1e94b46fddb8c585e82baa70 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 02:59:24 -0700
Subject: [PATCH 10/14] [NFC] Remove adhoc definition of MDMapT in IRMover

Summary:
The typedef was there probably because the type alias in ValueMap was private.

Test Plan:
ninja check-llvm-unit
---
 llvm/include/llvm/Linker/IRMover.h | 4 +---
 llvm/lib/Linker/IRMover.cpp        | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Linker/IRMover.h b/llvm/include/llvm/Linker/IRMover.h
index 1e3c5394ffa2af..89e9cbe0be18e1 100644
--- a/llvm/include/llvm/Linker/IRMover.h
+++ b/llvm/include/llvm/Linker/IRMover.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FunctionExtras.h"
+#include "llvm/IR/ValueMap.h"
 #include <functional>
 
 namespace llvm {
@@ -41,9 +42,6 @@ class IRMover {
     static bool isEqual(const StructType *LHS, const StructType *RHS);
   };
 
-  /// Type of the Metadata map in \a ValueToValueMapTy.
-  typedef DenseMap<const Metadata *, TrackingMDRef> MDMapT;
-
 public:
   class IdentifiedStructTypeSet {
     // The set of opaque types is the composite module.
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 3a6c2678cd157f..26e06a89fbf6d0 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -380,9 +380,6 @@ class LocalValueMaterializer final : public ValueMaterializer {
   Value *materialize(Value *V) override;
 };
 
-/// Type of the Metadata map in \a ValueToValueMapTy.
-typedef DenseMap<const Metadata *, TrackingMDRef> MDMapT;
-
 /// This is responsible for keeping track of the state used for moving data
 /// from SrcM to DstM.
 class IRLinker {

>From d7f598cc0f86e62838bd02fe5217d6eb58152956 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 04:39:20 -0700
Subject: [PATCH 11/14] [Utils] Identity map global debug info on first use in
 CloneFunction*

Summary:
To avoid cloning 'global' debug info, CloneFunction implementation used to eagerly identity map a known
subset of global debug into into ValueMap's MD map. In larger modules with meaningful volume of
debug info this gets very expensive.

By passing such global metadata via an IdentityMD set for the ValueMapper to map on first use, we
get several benefits:

1. Mapping metadata is not cheap, particularly because of tracking. When cloning a Function we
identity map lots of global module-level metadata to avoid cloning it, while only a fraction of it
is actually used by the function. Mapping on first use is a lot faster for modules with meaningful
amount of debug info.

2. Eagerly identity mapping metadata makes it harder to cache module-level data (e.g. a set of
metadata nodes in a \a DICompileUnit). With this patch we can cache certain module-level metadata
calculations to speed things up further.

Anecdata from compiling a sample cpp file with full debug info shows that this moderately speeds up
CoroSplitPass which is one of the heavier users of cloning:

|                 | Baseline | IdentityMD set |
|-----------------+----------+----------------|
| CoroSplitPass   | 306ms    | 221ms          |
| CoroCloner      | 101ms    | 72ms           |
|-----------------+----------+----------------|
| Speed up        | 1x       | 1.4x           |

Test Plan:
ninja check-llvm-unit
ninja check-llvm
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  | 18 ++---
 .../llvm/Transforms/Utils/ValueMapper.h       | 67 ++++++++++++++-----
 llvm/lib/Transforms/Utils/CloneFunction.cpp   | 58 ++++++++--------
 llvm/lib/Transforms/Utils/ValueMapper.cpp     | 19 ++++--
 4 files changed, 103 insertions(+), 59 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 151b875d005a2f..5b5fcbe9ea581a 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -190,7 +190,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
 void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
                                ValueToValueMapTy &VMap, RemapFlags RemapFlag,
                                ValueMapTypeRemapper *TypeMapper = nullptr,
-                               ValueMaterializer *Materializer = nullptr);
+                               ValueMaterializer *Materializer = nullptr,
+                               const MetadataSetTy *IdentityMD = nullptr);
 
 /// Clone OldFunc's body NewFunct.
 void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
@@ -199,7 +200,8 @@ void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
                            const char *NameSuffix = "",
                            ClonedCodeInfo *CodeInfo = nullptr,
                            ValueMapTypeRemapper *TypeMapper = nullptr,
-                           ValueMaterializer *Materializer = nullptr);
+                           ValueMaterializer *Materializer = nullptr,
+                           const MetadataSetTy *IdentityMD = nullptr);
 
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const Instruction *StartingInst,
@@ -230,12 +232,12 @@ DISubprogram *ProcessSubprogramAttachment(const Function &F,
                                           CloneFunctionChangeType Changes,
                                           DebugInfoFinder &DIFinder);
 
-/// Build a map of debug info to use during Metadata cloning.
-/// Returns true if cloning would need module level changes and false if there
-/// would only be local changes.
-bool BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
-                         DebugInfoFinder &DIFinder,
-                         DISubprogram *SPClonedWithinModule);
+/// Based on \p Changes and \p DIFinder populate \p MD with debug info that
+/// needs to be identity mapped during Metadata cloning.
+void FindDebugInfoToIdentityMap(MetadataSetTy &MD,
+                                CloneFunctionChangeType Changes,
+                                DebugInfoFinder &DIFinder,
+                                DISubprogram *SPClonedWithinModule);
 
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 743cfeb7ef3f02..b8d612f11d519f 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/simple_ilist.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
@@ -35,6 +36,7 @@ class Value;
 
 using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
 using DbgRecordIterator = simple_ilist<DbgRecord>::iterator;
+using MetadataSetTy = SmallPtrSet<const Metadata *, 16>;
 
 /// This is a class that can be implemented by clients to remap types when
 /// cloning constants and instructions.
@@ -136,6 +138,18 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
 /// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
 /// pass into the schedule*() functions.
 ///
+/// NOTE: \c IdentityMD is used by CloneFunction* to directly specify metadata
+/// that should be identity mapped (and hence not cloned). The metadata will be
+/// identity mapped in \c VM on first use. There are several reasons for doing
+/// it this way rather than eagerly identity mapping metadata nodes in \c VM:
+/// 1. Mapping metadata is not cheap, particularly because of tracking.
+/// 2. When cloning a Function we identity map lots of global module-level
+///    metadata to avoid cloning it, while only a fraction of it is actually
+///    used by the function. Mapping on first use is a lot faster for modules
+///    with meaningful amount of debug info.
+/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
+///    data (e.g. a set of metadata nodes in a \a DICompileUnit).
+///
 /// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
 /// ValueToValueMapTy.  We should template \a ValueMapper (and its
 /// implementation classes), and explicitly instantiate on two concrete
@@ -152,7 +166,8 @@ class ValueMapper {
 public:
   ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
               ValueMapTypeRemapper *TypeMapper = nullptr,
-              ValueMaterializer *Materializer = nullptr);
+              ValueMaterializer *Materializer = nullptr,
+              const MetadataSetTy *IdentityMD = nullptr);
   ValueMapper(ValueMapper &&) = delete;
   ValueMapper(const ValueMapper &) = delete;
   ValueMapper &operator=(ValueMapper &&) = delete;
@@ -218,8 +233,10 @@ class ValueMapper {
 inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
                        RemapFlags Flags = RF_None,
                        ValueMapTypeRemapper *TypeMapper = nullptr,
-                       ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapValue(*V);
+                       ValueMaterializer *Materializer = nullptr,
+                       const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapValue(*V);
 }
 
 /// Lookup or compute a mapping for a piece of metadata.
@@ -231,7 +248,9 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
 ///     \c MD.
 ///  3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
 ///     re-wrap its return (returning nullptr on nullptr).
-///  4. Else, \c MD is an \a MDNode.  These are remapped, along with their
+///  4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
+///     and return it.
+///  5. Else, \c MD is an \a MDNode.  These are remapped, along with their
 ///     transitive operands.  Distinct nodes are duplicated or moved depending
 ///     on \a RF_MoveDistinctNodes.  Uniqued nodes are remapped like constants.
 ///
@@ -240,16 +259,20 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
 inline Metadata *MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
                              RemapFlags Flags = RF_None,
                              ValueMapTypeRemapper *TypeMapper = nullptr,
-                             ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMetadata(*MD);
+                             ValueMaterializer *Materializer = nullptr,
+                             const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapMetadata(*MD);
 }
 
 /// Version of MapMetadata with type safety for MDNode.
 inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
                            RemapFlags Flags = RF_None,
                            ValueMapTypeRemapper *TypeMapper = nullptr,
-                           ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMDNode(*MD);
+                           ValueMaterializer *Materializer = nullptr,
+                           const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapMDNode(*MD);
 }
 
 /// Convert the instruction operands from referencing the current values into
@@ -263,8 +286,10 @@ inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
 inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
                              RemapFlags Flags = RF_None,
                              ValueMapTypeRemapper *TypeMapper = nullptr,
-                             ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer).remapInstruction(*I);
+                             ValueMaterializer *Materializer = nullptr,
+                             const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .remapInstruction(*I);
 }
 
 /// Remap the Values used in the DbgRecord \a DR using the value map \a
@@ -272,8 +297,10 @@ inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
 inline void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM,
                            RemapFlags Flags = RF_None,
                            ValueMapTypeRemapper *TypeMapper = nullptr,
-                           ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer).remapDbgRecord(M, *DR);
+                           ValueMaterializer *Materializer = nullptr,
+                           const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .remapDbgRecord(M, *DR);
 }
 
 /// Remap the Values used in the DbgRecords \a Range using the value map \a
@@ -283,8 +310,9 @@ inline void RemapDbgRecordRange(Module *M,
                                 ValueToValueMapTy &VM,
                                 RemapFlags Flags = RF_None,
                                 ValueMapTypeRemapper *TypeMapper = nullptr,
-                                ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer)
+                                ValueMaterializer *Materializer = nullptr,
+                                const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
       .remapDbgRecordRange(M, Range);
 }
 
@@ -297,16 +325,19 @@ inline void RemapDbgRecordRange(Module *M,
 inline void RemapFunction(Function &F, ValueToValueMapTy &VM,
                           RemapFlags Flags = RF_None,
                           ValueMapTypeRemapper *TypeMapper = nullptr,
-                          ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer).remapFunction(F);
+                          ValueMaterializer *Materializer = nullptr,
+                          const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD).remapFunction(F);
 }
 
 /// Version of MapValue with type safety for Constant.
 inline Constant *MapValue(const Constant *V, ValueToValueMapTy &VM,
                           RemapFlags Flags = RF_None,
                           ValueMapTypeRemapper *TypeMapper = nullptr,
-                          ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapConstant(*V);
+                          ValueMaterializer *Materializer = nullptr,
+                          const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapConstant(*V);
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 971d869bd05bfc..0183ed3abcdb0f 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -153,63 +153,57 @@ DISubprogram *llvm::ProcessSubprogramAttachment(const Function &F,
   return SPClonedWithinModule;
 }
 
-bool llvm::BuildDebugInfoMDMap(MDMapT &MD, CloneFunctionChangeType Changes,
-                               DebugInfoFinder &DIFinder,
-                               DISubprogram *SPClonedWithinModule) {
-  bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
+void llvm::FindDebugInfoToIdentityMap(MetadataSetTy &MD,
+                                      CloneFunctionChangeType Changes,
+                                      DebugInfoFinder &DIFinder,
+                                      DISubprogram *SPClonedWithinModule) {
   if (Changes < CloneFunctionChangeType::DifferentModule &&
       DIFinder.subprogram_count() > 0) {
-    // Turn on module-level changes, since we need to clone (some of) the
-    // debug info metadata.
+    // Even if Changes are local only, we turn on module-level changes, since we
+    // need to clone (some of) the debug info metadata.
     //
     // FIXME: Metadata effectively owned by a function should be made
     // local, and only that local metadata should be cloned.
-    ModuleLevelChanges = true;
-
-    auto mapToSelfIfNew = [&MD](MDNode *N) {
-      // Avoid clobbering an existing mapping.
-      (void)MD.try_emplace(N, N);
-    };
 
     // Avoid cloning types, compile units, and (other) subprograms.
     for (DISubprogram *ISP : DIFinder.subprograms()) {
       if (ISP != SPClonedWithinModule)
-        mapToSelfIfNew(ISP);
+        MD.insert(ISP);
     }
 
     // If a subprogram isn't going to be cloned skip its lexical blocks as well.
     for (DIScope *S : DIFinder.scopes()) {
       auto *LScope = dyn_cast<DILocalScope>(S);
       if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
-        mapToSelfIfNew(S);
+        MD.insert(S);
     }
 
     for (DICompileUnit *CU : DIFinder.compile_units())
-      mapToSelfIfNew(CU);
+      MD.insert(CU);
 
     for (DIType *Type : DIFinder.types())
-      mapToSelfIfNew(Type);
+      MD.insert(Type);
   } else {
     assert(!SPClonedWithinModule &&
            "Subprogram should be in DIFinder->subprogram_count()...");
   }
-
-  return ModuleLevelChanges;
 }
 
 void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
                                      ValueToValueMapTy &VMap,
                                      RemapFlags RemapFlag,
                                      ValueMapTypeRemapper *TypeMapper,
-                                     ValueMaterializer *Materializer) {
+                                     ValueMaterializer *Materializer,
+                                     const MetadataSetTy *IdentityMD) {
   // Duplicate the metadata that is attached to the cloned function.
   // Subprograms/CUs/types that were already mapped to themselves won't be
   // duplicated.
   SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
   OldFunc->getAllMetadata(MDs);
   for (auto MD : MDs) {
-    NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
-                                                TypeMapper, Materializer));
+    NewFunc->addMetadata(MD.first,
+                         *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
+                                      Materializer, IdentityMD));
   }
 }
 
@@ -219,7 +213,8 @@ void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
                                  const char *NameSuffix,
                                  ClonedCodeInfo *CodeInfo,
                                  ValueMapTypeRemapper *TypeMapper,
-                                 ValueMaterializer *Materializer) {
+                                 ValueMaterializer *Materializer,
+                                 const MetadataSetTy *IdentityMD) {
   if (OldFunc->isDeclaration())
     return;
 
@@ -260,9 +255,10 @@ void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
     // Loop over all instructions, fixing each one as we find it, and any
     // attached debug-info records.
     for (Instruction &II : *BB) {
-      RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+      RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer,
+                       IdentityMD);
       RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
-                          RemapFlag, TypeMapper, Materializer);
+                          RemapFlag, TypeMapper, Materializer, IdentityMD);
     }
 }
 
@@ -325,16 +321,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   DISubprogram *SPClonedWithinModule =
       ProcessSubprogramAttachment(*OldFunc, Changes, DIFinder);
 
-  ModuleLevelChanges =
-      BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
+  MetadataSetTy IdentityMD;
+  FindDebugInfoToIdentityMap(IdentityMD, Changes, DIFinder,
+                             SPClonedWithinModule);
 
-  const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
+  // Current implementation always upgrades from local changes to module level
+  // changes due to the way metadata cloning is done. See
+  // BuildDebugInfoToIdentityMap for more details.
+  const auto RemapFlag = RF_None;
 
   CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper,
-                            Materializer);
+                            Materializer, &IdentityMD);
 
   CloneFunctionBodyInto(NewFunc, OldFunc, VMap, RemapFlag, Returns, NameSuffix,
-                        CodeInfo, TypeMapper, Materializer);
+                        CodeInfo, TypeMapper, Materializer, &IdentityMD);
 
   // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
   // same module, the compile unit will already be listed (or not). When
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 56e0eca7cea56c..6d302f80e018bd 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -120,12 +120,14 @@ class Mapper {
   SmallVector<WorklistEntry, 4> Worklist;
   SmallVector<DelayedBasicBlock, 1> DelayedBBs;
   SmallVector<Constant *, 16> AppendingInits;
+  const MetadataSetTy *IdentityMD;
 
 public:
   Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
-         ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
+         ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer,
+         const MetadataSetTy *IdentityMD)
       : Flags(Flags), TypeMapper(TypeMapper),
-        MCs(1, MappingContext(VM, Materializer)) {}
+        MCs(1, MappingContext(VM, Materializer)), IdentityMD(IdentityMD) {}
 
   /// ValueMapper should explicitly call \a flush() before destruction.
   ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
@@ -900,6 +902,14 @@ std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
     return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
   }
 
+  // Map metadata from IdentityMD on first use. We need to add these nodes to
+  // the mapping as otherwise metadata nodes numbering gets messed up. This is
+  // still economical because the amount of data in IdentityMD may be a lot
+  // larger than what will actually get used.
+  if (IdentityMD && IdentityMD->contains(MD)) {
+    return getVM().MD()[MD] = TrackingMDRef(const_cast<Metadata *>(MD));
+  }
+
   assert(isa<MDNode>(MD) && "Expected a metadata node");
 
   return std::nullopt;
@@ -1199,8 +1209,9 @@ class FlushingMapper {
 
 ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
                          ValueMapTypeRemapper *TypeMapper,
-                         ValueMaterializer *Materializer)
-    : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
+                         ValueMaterializer *Materializer,
+                         const MetadataSetTy *IdentityMD)
+    : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer, IdentityMD)) {}
 
 ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
 

>From e14a46f5224f0d974ad7dc5237cd4b9f82bb991b Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 06:19:49 -0700
Subject: [PATCH 12/14] [Coro] Prebuild a global debug info set and share it
 between all coroutine clones

Summary:
CoroCloner, by calling into CloneFunctionInto, does a lot of repeated work priming DIFinder and building
a list of global debug info metadata. For programs compiled with full debug info this gets very
expensive.

This diff builds the data once and shares it between all clones.

Anecdata for a sample cpp source file compiled with full debug info:

|                 | Baseline | IdentityMD set | Prebuilt GlobalDI (cur.) |
|-----------------+----------+----------------+--------------------------|
| CoroSplitPass   | 306ms    | 221ms          | 68ms                     |
| CoroCloner      | 101ms    | 72ms           | 0.5ms                    |
| CollectGlobalDI | -        | -              | 63ms                     |
|-----------------+----------+----------------+--------------------------|
| Speed up        | 1x       | 1.4x           | 4.5x                     |

Note that CollectGlobalDI happens once *per coroutine* rather than per clone.

Test Plan:
ninja check-llvm-unit
ninja check-llvm

Compiled a sample internal source file, checked time trace output for scope timings.
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 79 +++++++++++++++-----
 1 file changed, 61 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 387b1e6942f186..4117ae6dd474fa 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -42,6 +42,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -81,6 +82,23 @@ using namespace llvm;
 
 namespace {
 
+/// Collect (a known) subset of global debug info metadata potentially used by
+/// the function \p F.
+///
+/// This metadata set can be used to avoid cloning debug info not owned by \p F
+/// and is shared among all potential clones \p F.
+void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
+  TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
+
+  DebugInfoFinder DIFinder;
+  DISubprogram *SPClonedWithinModule = ProcessSubprogramAttachment(
+      F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
+
+  FindDebugInfoToIdentityMap(GlobalDebugInfo,
+                             CloneFunctionChangeType::LocalChangesOnly,
+                             DIFinder, SPClonedWithinModule);
+}
+
 /// A little helper class for building
 class CoroCloner {
 public:
@@ -117,21 +135,26 @@ class CoroCloner {
 
   TargetTransformInfo &TTI;
 
+  const MetadataSetTy &GlobalDebugInfo;
+
   /// Create a cloner for a switch lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-             Kind FKind, TargetTransformInfo &TTI)
+             Kind FKind, TargetTransformInfo &TTI,
+             const MetadataSetTy &GlobalDebugInfo)
       : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind),
-        Builder(OrigF.getContext()), TTI(TTI) {
+        Builder(OrigF.getContext()), TTI(TTI),
+        GlobalDebugInfo(GlobalDebugInfo) {
     assert(Shape.ABI == coro::ABI::Switch);
   }
 
   /// Create a cloner for a continuation lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
              Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
-             TargetTransformInfo &TTI)
+             TargetTransformInfo &TTI, const MetadataSetTy &GlobalDebugInfo)
       : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
         FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
-        Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) {
+        Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI),
+        GlobalDebugInfo(GlobalDebugInfo) {
     assert(Shape.ABI == coro::ABI::Retcon ||
            Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
     assert(NewF && "need existing function for continuation");
@@ -142,10 +165,11 @@ class CoroCloner {
   /// Create a clone for a switch lowering.
   static Function *createClone(Function &OrigF, const Twine &Suffix,
                                coro::Shape &Shape, Kind FKind,
-                               TargetTransformInfo &TTI) {
+                               TargetTransformInfo &TTI,
+                               const MetadataSetTy &GlobalDebugInfo) {
     TimeTraceScope FunctionScope("CoroCloner");
 
-    CoroCloner Cloner(OrigF, Suffix, Shape, FKind, TTI);
+    CoroCloner Cloner(OrigF, Suffix, Shape, FKind, TTI, GlobalDebugInfo);
     Cloner.create();
     return Cloner.getFunction();
   }
@@ -154,10 +178,12 @@ class CoroCloner {
   static Function *createClone(Function &OrigF, const Twine &Suffix,
                                coro::Shape &Shape, Function *NewF,
                                AnyCoroSuspendInst *ActiveSuspend,
-                               TargetTransformInfo &TTI) {
+                               TargetTransformInfo &TTI,
+                               const MetadataSetTy &GlobalDebugInfo) {
     TimeTraceScope FunctionScope("CoroCloner");
 
-    CoroCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI);
+    CoroCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI,
+                      GlobalDebugInfo);
     Cloner.create();
     return Cloner.getFunction();
   }
@@ -1013,8 +1039,11 @@ void CoroCloner::create() {
   auto savedLinkage = NewF->getLinkage();
   NewF->setLinkage(llvm::GlobalValue::ExternalLinkage);
 
-  CloneFunctionInto(NewF, &OrigF, VMap,
-                    CloneFunctionChangeType::LocalChangesOnly, Returns);
+  CloneFunctionAttributesInto(NewF, &OrigF, VMap, false);
+  CloneFunctionMetadataInto(NewF, &OrigF, VMap, RF_None, nullptr, nullptr,
+                            &GlobalDebugInfo);
+  CloneFunctionBodyInto(NewF, &OrigF, VMap, RF_None, Returns, "", nullptr,
+                        nullptr, nullptr, &GlobalDebugInfo);
 
   auto &Context = NewF->getContext();
 
@@ -1488,16 +1517,22 @@ struct SwitchCoroutineSplitter {
                     TargetTransformInfo &TTI) {
     assert(Shape.ABI == coro::ABI::Switch);
 
+    MetadataSetTy GlobalDebugInfo;
+    collectGlobalDebugInfo(F, GlobalDebugInfo);
+
     // Create a resume clone by cloning the body of the original function,
     // setting new entry block and replacing coro.suspend an appropriate value
     // to force resume or cleanup pass for every suspend point.
     createResumeEntryBlock(F, Shape);
-    auto *ResumeClone = CoroCloner::createClone(
-        F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
-    auto *DestroyClone = CoroCloner::createClone(
-        F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
+    auto *ResumeClone = CoroCloner::createClone(F, ".resume", Shape,
+                                                CoroCloner::Kind::SwitchResume,
+                                                TTI, GlobalDebugInfo);
+    auto *DestroyClone = CoroCloner::createClone(F, ".destroy", Shape,
+                                                 CoroCloner::Kind::SwitchUnwind,
+                                                 TTI, GlobalDebugInfo);
     auto *CleanupClone = CoroCloner::createClone(
-        F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
+        F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI,
+        GlobalDebugInfo);
 
     postSplitCleanup(*ResumeClone);
     postSplitCleanup(*DestroyClone);
@@ -1882,12 +1917,16 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
   }
 
   assert(Clones.size() == Shape.CoroSuspends.size());
+
+  MetadataSetTy GlobalDebugInfo;
+  collectGlobalDebugInfo(F, GlobalDebugInfo);
+
   for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
     auto *Suspend = Shape.CoroSuspends[Idx];
     auto *Clone = Clones[Idx];
 
     CoroCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend,
-                            TTI);
+                            TTI, GlobalDebugInfo);
   }
 }
 
@@ -2012,12 +2051,16 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
   }
 
   assert(Clones.size() == Shape.CoroSuspends.size());
+
+  MetadataSetTy GlobalDebugInfo;
+  collectGlobalDebugInfo(F, GlobalDebugInfo);
+
   for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
     auto Suspend = Shape.CoroSuspends[i];
     auto Clone = Clones[i];
 
-    CoroCloner::createClone(F, "resume." + Twine(i), Shape, Clone, Suspend,
-                            TTI);
+    CoroCloner::createClone(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI,
+                            GlobalDebugInfo);
   }
 }
 

>From d1479b9884641799cef12f3020906d225790d38c Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 10:51:38 -0700
Subject: [PATCH 13/14] [Analysis] Add DebugInfoCache analysis

Summary:
The analysis simply primes and caches DebugInfoFinders for each DICompileUnit in a module. This
allows (future) callers like CoroSplitPass to compute global debug info metadata (required for
coroutine function cloning) much faster. Specifically, pay the price of DICompileUnit processing
only once per compile unit, rather than once per coroutine.

Test Plan:
Added a smoke test for the new analysis
ninja check-llvm-unit
---
 llvm/include/llvm/Analysis/DebugInfoCache.h   |  43 ++++
 llvm/include/llvm/IR/DebugInfo.h              |   4 +-
 llvm/lib/Analysis/CMakeLists.txt              |   1 +
 llvm/lib/Analysis/DebugInfoCache.cpp          |  47 ++++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/unittests/Analysis/CMakeLists.txt        |   1 +
 .../unittests/Analysis/DebugInfoCacheTest.cpp | 211 ++++++++++++++++++
 8 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 llvm/include/llvm/Analysis/DebugInfoCache.h
 create mode 100644 llvm/lib/Analysis/DebugInfoCache.cpp
 create mode 100644 llvm/unittests/Analysis/DebugInfoCacheTest.cpp

diff --git a/llvm/include/llvm/Analysis/DebugInfoCache.h b/llvm/include/llvm/Analysis/DebugInfoCache.h
new file mode 100644
index 00000000000000..9de7357829a770
--- /dev/null
+++ b/llvm/include/llvm/Analysis/DebugInfoCache.h
@@ -0,0 +1,43 @@
+//===- llvm/Analysis/DebugInfoCache.h - debug info cache ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an analysis that builds a cache of debug info for each
+// DICompileUnit in a module.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEBUGINFOCACHE_H
+#define LLVM_ANALYSIS_DEBUGINFOCACHE_H
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class DebugInfoCache {
+public:
+  using DIFinderCache = SmallDenseMap<const DICompileUnit *, DebugInfoFinder>;
+  DIFinderCache Result;
+
+  DebugInfoCache(const Module &M);
+
+  bool invalidate(Module &, const PreservedAnalyses &,
+                  ModuleAnalysisManager::Invalidator &);
+};
+
+class DebugInfoCacheAnalysis
+    : public AnalysisInfoMixin<DebugInfoCacheAnalysis> {
+  friend AnalysisInfoMixin<DebugInfoCacheAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  using Result = DebugInfoCache;
+  Result run(Module &M, ModuleAnalysisManager &);
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index 73f45c3769be44..11907fbb7f20b3 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -120,11 +120,13 @@ class DebugInfoFinder {
   /// Process subprogram.
   void processSubprogram(DISubprogram *SP);
 
+  /// Process a compile unit.
+  void processCompileUnit(DICompileUnit *CU);
+
   /// Clear all lists.
   void reset();
 
 private:
-  void processCompileUnit(DICompileUnit *CU);
   void processScope(DIScope *Scope);
   void processType(DIType *DT);
   bool addCompileUnit(DICompileUnit *CU);
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 393803fad89383..432476a0fa69bf 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_component_library(LLVMAnalysis
   DDGPrinter.cpp
   ConstraintSystem.cpp
   Delinearization.cpp
+  DebugInfoCache.cpp
   DemandedBits.cpp
   DependenceAnalysis.cpp
   DependenceGraphBuilder.cpp
diff --git a/llvm/lib/Analysis/DebugInfoCache.cpp b/llvm/lib/Analysis/DebugInfoCache.cpp
new file mode 100644
index 00000000000000..eac8748de468a5
--- /dev/null
+++ b/llvm/lib/Analysis/DebugInfoCache.cpp
@@ -0,0 +1,47 @@
+//===- llvm/Analysis/DebugInfoCache.cpp - debug info cache ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an analysis that builds a cache of debug info for each
+// DICompileUnit in a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfoCache.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+namespace {
+DebugInfoFinder processCompileUnit(DICompileUnit *CU) {
+  DebugInfoFinder DIFinder;
+  DIFinder.processCompileUnit(CU);
+
+  return DIFinder;
+}
+} // namespace
+
+DebugInfoCache::DebugInfoCache(const Module &M) {
+  for (const auto CU : M.debug_compile_units()) {
+    auto DIFinder = processCompileUnit(CU);
+    Result[CU] = std::move(DIFinder);
+  }
+}
+
+bool DebugInfoCache::invalidate(Module &M, const PreservedAnalyses &PA,
+                                ModuleAnalysisManager::Invalidator &) {
+  // Check whether the analysis has been explicitly invalidated. Otherwise, it's
+  // stateless and remains preserved.
+  auto PAC = PA.getChecker<DebugInfoCacheAnalysis>();
+  return !PAC.preservedWhenStateless();
+}
+
+AnalysisKey DebugInfoCacheAnalysis::Key;
+
+DebugInfoCache DebugInfoCacheAnalysis::run(Module &M, ModuleAnalysisManager &) {
+  return DebugInfoCache(M);
+}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c34f9148cce58b..e80f747f6b2753 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Analysis/DDGPrinter.h"
 #include "llvm/Analysis/DXILMetadataAnalysis.h"
 #include "llvm/Analysis/DXILResource.h"
+#include "llvm/Analysis/DebugInfoCache.h"
 #include "llvm/Analysis/Delinearization.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 3dc7f185f330c5..7556a318abc292 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -21,6 +21,7 @@
 MODULE_ANALYSIS("callgraph", CallGraphAnalysis())
 MODULE_ANALYSIS("collector-metadata", CollectorMetadataAnalysis())
 MODULE_ANALYSIS("ctx-prof-analysis", CtxProfAnalysis())
+MODULE_ANALYSIS("debug-info-cache", DebugInfoCacheAnalysis())
 MODULE_ANALYSIS("dxil-metadata", DXILMetadataAnalysis())
 MODULE_ANALYSIS("dxil-resource", DXILResourceAnalysis())
 MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index a3f4c10fcb9ad2..6412e907477c53 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -25,6 +25,7 @@ set(ANALYSIS_TEST_SOURCES
   ConstraintSystemTest.cpp
   CtxProfAnalysisTest.cpp
   DDGTest.cpp
+  DebugInfoCacheTest.cpp
   DomTreeUpdaterTest.cpp
   DXILResourceTest.cpp
   GraphWriterTest.cpp
diff --git a/llvm/unittests/Analysis/DebugInfoCacheTest.cpp b/llvm/unittests/Analysis/DebugInfoCacheTest.cpp
new file mode 100644
index 00000000000000..be9cc9e3e6c4ad
--- /dev/null
+++ b/llvm/unittests/Analysis/DebugInfoCacheTest.cpp
@@ -0,0 +1,211 @@
+//===- DebugInfoCacheTest.cpp - DebugInfoCache unit tests ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfoCache.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+// Forward declare the assembly
+extern StringRef MultiCUModule;
+
+const DICompileUnit *findCU(const Module &M, StringRef FileName) {
+  for (const auto CU : M.debug_compile_units()) {
+    if (CU->getFilename() == FileName)
+      return CU;
+  }
+
+  return nullptr;
+}
+
+class DebugInfoCacheTest : public testing::Test {
+protected:
+  LLVMContext C;
+
+  std::unique_ptr<Module> makeModule(StringRef Assembly) {
+    SMDiagnostic Err;
+    auto M = parseAssemblyString(Assembly, Err, C);
+    if (!M)
+      Err.print("DebugInfoCacheTest", errs());
+
+    verifyModule(*M, &errs());
+    return M;
+  }
+};
+
+TEST_F(DebugInfoCacheTest, TestEmpty) {
+  auto M = makeModule("");
+  DebugInfoCache DIC{*M};
+  EXPECT_EQ(DIC.Result.size(), 0u);
+}
+
+TEST_F(DebugInfoCacheTest, TestMultiCU) {
+  auto M = makeModule(MultiCUModule);
+  DebugInfoCache DIC{*M};
+  EXPECT_EQ(DIC.Result.size(), 2u);
+
+  auto *File1CU = findCU(*M, "file1.cpp");
+  EXPECT_NE(File1CU, nullptr);
+
+  auto File1DIFinder = DIC.Result.find(File1CU);
+  EXPECT_NE(File1DIFinder, DIC.Result.end());
+
+  EXPECT_EQ(File1DIFinder->getSecond().compile_unit_count(), 1u);
+  EXPECT_EQ(File1DIFinder->getSecond().type_count(), 6u);
+  EXPECT_EQ(File1DIFinder->getSecond().subprogram_count(), 0u);
+  EXPECT_EQ(File1DIFinder->getSecond().scope_count(), 1u);
+
+  auto *File2CU = findCU(*M, "file2.cpp");
+  EXPECT_NE(File1CU, nullptr);
+
+  auto File2DIFinder = DIC.Result.find(File2CU);
+  EXPECT_NE(File2DIFinder, DIC.Result.end());
+
+  EXPECT_EQ(File2DIFinder->getSecond().compile_unit_count(), 1u);
+  EXPECT_EQ(File2DIFinder->getSecond().type_count(), 2u);
+  EXPECT_EQ(File2DIFinder->getSecond().subprogram_count(), 0u);
+  EXPECT_EQ(File2DIFinder->getSecond().scope_count(), 2u);
+}
+
+/* Generated roughly by
+file1.cpp:
+struct file1_extern_type1;
+struct file1_extern_type2;
+
+namespace file1 {
+typedef struct file1_type1 { int x; float y; } file1_type1;
+file1_type1 global{0, 1.};
+} // file1
+
+extern struct file1_extern_type1 *file1_extern_func1(struct
+file1_extern_type2*);
+
+file1::file1_type1 file1_func1(file1::file1_type1 x) { return x; }
+--------
+file2.cpp:
+struct file2_extern_type1;
+struct file2_extern_type2;
+
+namespace file2 {
+typedef struct file2_type1 { float x; float y; } file2_type1;
+enum class file2_type2 { opt1, opt2 };
+
+namespace inner {
+file2_type2 inner_global{file2_type2::opt2};
+} // inner
+} // file2
+
+extern struct file2_extern_type1 *file2_extern_func1(struct
+file2_extern_type2*);
+
+file2::file2_type1 file2_func1(file2::file2_type1 x, file2::file2_type2 y) {
+return x; }
+--------
+$ clang -S -emit-llvm file*.cpp
+$ llvm-link -S -o single.ll file*.ll
+*/
+StringRef MultiCUModule = R"""(
+%"struct.file1::file1_type1" = type { i32, float }
+%"struct.file2::file2_type1" = type { float, float }
+
+ at _ZN5file16globalE = dso_local global %"struct.file1::file1_type1" { i32 0, float 1.000000e+00 }, align 4, !dbg !0
+ at _ZN5file25inner12inner_globalE = dso_local global i32 1, align 4, !dbg !11
+
+define dso_local i64 @_Z11file1_func1N5file111file1_type1E(i64 %0) !dbg !33 {
+  %2 = alloca %"struct.file1::file1_type1", align 4
+  %3 = alloca %"struct.file1::file1_type1", align 4
+  store i64 %0, ptr %3, align 4
+    #dbg_declare(ptr %3, !37, !DIExpression(), !38)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %2, ptr align 4 %3, i64 8, i1 false), !dbg !39
+  %4 = load i64, ptr %2, align 4, !dbg !40
+  ret i64 %4, !dbg !40
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+define dso_local <2 x float> @_Z11file2_func1N5file211file2_type1ENS_11file2_type2E(<2 x float> %0, i32 noundef %1) !dbg !41 {
+  %3 = alloca %"struct.file2::file2_type1", align 4
+  %4 = alloca %"struct.file2::file2_type1", align 4
+  %5 = alloca i32, align 4
+  store <2 x float> %0, ptr %4, align 4
+    #dbg_declare(ptr %4, !49, !DIExpression(), !50)
+  store i32 %1, ptr %5, align 4
+    #dbg_declare(ptr %5, !51, !DIExpression(), !52)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %3, ptr align 4 %4, i64 8, i1 false), !dbg !53
+  %6 = load <2 x float>, ptr %3, align 4, !dbg !54
+  ret <2 x float> %6, !dbg !54
+}
+
+!llvm.dbg.cu = !{!20, !22}
+!llvm.ident = !{!25, !25}
+!llvm.module.flags = !{!26, !27, !28, !29, !30, !31, !32}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "global", linkageName: "_ZN5file16globalE", scope: !2, file: !3, line: 6, type: !4, isLocal: false, isDefinition: true)
+!2 = !DINamespace(name: "file1", scope: null)
+!3 = !DIFile(filename: "file1.cpp", directory: "")
+!4 = !DIDerivedType(tag: DW_TAG_typedef, name: "file1_type1", scope: !2, file: !3, line: 5, baseType: !5)
+!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "file1_type1", scope: !2, file: !3, line: 5, size: 64, flags: DIFlagTypePassByValue, elements: !6, identifier: "_ZTSN5file111file1_type1E")
+!6 = !{!7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !5, file: !3, line: 5, baseType: !8, size: 32)
+!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !5, file: !3, line: 5, baseType: !10, size: 32, offset: 32)
+!10 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+!11 = !DIGlobalVariableExpression(var: !12, expr: !DIExpression())
+!12 = distinct !DIGlobalVariable(name: "inner_global", linkageName: "_ZN5file25inner12inner_globalE", scope: !13, file: !15, line: 9, type: !16, isLocal: false, isDefinition: true)
+!13 = !DINamespace(name: "inner", scope: !14)
+!14 = !DINamespace(name: "file2", scope: null)
+!15 = !DIFile(filename: "file2.cpp", directory: "")
+!16 = distinct !DICompositeType(tag: DW_TAG_enumeration_type, name: "file2_type2", scope: !14, file: !15, line: 6, baseType: !8, size: 32, flags: DIFlagEnumClass, elements: !17, identifier: "_ZTSN5file211file2_type2E")
+!17 = !{!18, !19}
+!18 = !DIEnumerator(name: "opt1", value: 0)
+!19 = !DIEnumerator(name: "opt2", value: 1)
+!20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !21, splitDebugInlining: false, nameTableKind: None)
+!21 = !{!0}
+!22 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !15, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !23, globals: !24, splitDebugInlining: false, nameTableKind: None)
+!23 = !{!16}
+!24 = !{!11}
+!25 = !{!"clang"}
+!26 = !{i32 7, !"Dwarf Version", i32 5}
+!27 = !{i32 2, !"Debug Info Version", i32 3}
+!28 = !{i32 1, !"wchar_size", i32 4}
+!29 = !{i32 8, !"PIC Level", i32 2}
+!30 = !{i32 7, !"PIE Level", i32 2}
+!31 = !{i32 7, !"uwtable", i32 2}
+!32 = !{i32 7, !"frame-pointer", i32 2}
+!33 = distinct !DISubprogram(name: "file1_func1", linkageName: "_Z11file1_func1N5file111file1_type1E", scope: !3, file: !3, line: 11, type: !34, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !20, retainedNodes: !36)
+!34 = !DISubroutineType(types: !35)
+!35 = !{!4, !4}
+!36 = !{}
+!37 = !DILocalVariable(name: "x", arg: 1, scope: !33, file: !3, line: 11, type: !4)
+!38 = !DILocation(line: 11, column: 51, scope: !33)
+!39 = !DILocation(line: 11, column: 63, scope: !33)
+!40 = !DILocation(line: 11, column: 56, scope: !33)
+!41 = distinct !DISubprogram(name: "file2_func1", linkageName: "_Z11file2_func1N5file211file2_type1ENS_11file2_type2E", scope: !15, file: !15, line: 15, type: !42, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !22, retainedNodes: !36)
+!42 = !DISubroutineType(types: !43)
+!43 = !{!44, !44, !16}
+!44 = !DIDerivedType(tag: DW_TAG_typedef, name: "file2_type1", scope: !14, file: !15, line: 5, baseType: !45)
+!45 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "file2_type1", scope: !14, file: !15, line: 5, size: 64, flags: DIFlagTypePassByValue, elements: !46, identifier: "_ZTSN5file211file2_type1E")
+!46 = !{!47, !48}
+!47 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !45, file: !15, line: 5, baseType: !10, size: 32)
+!48 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !45, file: !15, line: 5, baseType: !10, size: 32, offset: 32)
+!49 = !DILocalVariable(name: "x", arg: 1, scope: !41, file: !15, line: 15, type: !44)
+!50 = !DILocation(line: 15, column: 51, scope: !41)
+!51 = !DILocalVariable(name: "y", arg: 2, scope: !41, file: !15, line: 15, type: !16)
+!52 = !DILocation(line: 15, column: 73, scope: !41)
+!53 = !DILocation(line: 15, column: 85, scope: !41)
+!54 = !DILocation(line: 15, column: 78, scope: !41)
+)""";
+} // namespace
+} // namespace llvm

>From 4c76540db1990a49b6126de83c7ccd12e28e8cb0 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 11:00:00 -0700
Subject: [PATCH 14/14] [Coro] Use DebugInfoCache to speed up cloning in
 CoroSplitPass

Summary:
We can use a DebugInfoFinder from DebugInfoCache which is already primed on a compile unit to speed
up collection of global debug info.

The pass could likely be another 2x+ faster if we avoid rebuilding the set of global debug
info. This needs further massaging of CloneFunction and ValueMapper, though, and can be done
incrementally on top of this.

Comparing performance of CoroSplitPass at various points in this stack, this is anecdata from a sample
cpp file compiled with full debug info:
|                 | Baseline | IdentityMD set | Prebuilt GlobalDI | Cached CU DIFinder (cur.) |
|-----------------+----------+----------------+-------------------+---------------------------|
| CoroSplitPass   | 306ms    | 221ms          | 68ms              | 17ms                      |
| CoroCloner      | 101ms    | 72ms           | 0.5ms             | 0.5ms                     |
| CollectGlobalDI | -        | -              | 63ms              | 13ms                      |
|-----------------+----------+----------------+-------------------+---------------------------|
| Speed up        | 1x       | 1.4x           | 4.5x              | 18x                       |

Test Plan:
ninja check-llvm-unit
ninja check-llvm

Compiled a sample cpp file with time trace to get the avg. duration of the pass and inner scopes.
---
 llvm/lib/Analysis/CGSCCPassManager.cpp        |  7 +++
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 55 +++++++++++++++----
 llvm/test/Other/new-pass-manager.ll           |  1 +
 llvm/test/Other/new-pm-defaults.ll            |  1 +
 llvm/test/Other/new-pm-lto-defaults.ll        |  1 +
 llvm/test/Other/new-pm-pgo-preinline.ll       |  1 +
 .../Other/new-pm-thinlto-postlink-defaults.ll |  1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |  1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |  1 +
 .../Other/new-pm-thinlto-prelink-defaults.ll  |  1 +
 .../new-pm-thinlto-prelink-pgo-defaults.ll    |  1 +
 ...w-pm-thinlto-prelink-samplepgo-defaults.ll |  1 +
 .../Analysis/CGSCCPassManagerTest.cpp         |  4 +-
 13 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
index c32739a5655419..f86d863d0598ff 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/DebugInfoCache.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/InstIterator.h"
@@ -141,6 +142,11 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
   // Get the call graph for this module.
   LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
 
+  // Prime DebugInfoCache.
+  // TODO: Currently, the only user is CoroSplitPass. Consider running
+  // conditionally.
+  AM.getResult<DebugInfoCacheAnalysis>(M);
+
   // Get Function analysis manager from its proxy.
   FunctionAnalysisManager &FAM =
       AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager();
@@ -352,6 +358,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
   // analysis proxies by handling them above and in any nested pass managers.
   PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
   PA.preserve<LazyCallGraphAnalysis>();
+  PA.preserve<DebugInfoCacheAnalysis>();
   PA.preserve<CGSCCAnalysisManagerModuleProxy>();
   PA.preserve<FunctionAnalysisManagerModuleProxy>();
   return PA;
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 4117ae6dd474fa..3a95733bbe8882 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfoCache.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -82,15 +83,39 @@ using namespace llvm;
 
 namespace {
 
+const DebugInfoFinder *cachedDIFinder(Function &F,
+                                      const DebugInfoCache *DICache) {
+  if (!DICache)
+    return nullptr;
+
+  auto *SP = F.getSubprogram();
+  auto *CU = SP ? SP->getUnit() : nullptr;
+  if (!CU)
+    return nullptr;
+
+  auto Found = DICache->Result.find(CU);
+  if (Found == DICache->Result.end())
+    return nullptr;
+
+  return &Found->getSecond();
+}
+
 /// Collect (a known) subset of global debug info metadata potentially used by
 /// the function \p F.
 ///
 /// This metadata set can be used to avoid cloning debug info not owned by \p F
 /// and is shared among all potential clones \p F.
-void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo) {
+void collectGlobalDebugInfo(Function &F, MetadataSetTy &GlobalDebugInfo,
+                            const DebugInfoCache *DICache) {
   TimeTraceScope FunctionScope("CollectGlobalDebugInfo");
 
   DebugInfoFinder DIFinder;
+
+  // Copy DIFinder from cache which is primed on F's compile unit when available
+  auto *PrimedDIFinder = cachedDIFinder(F, DICache);
+  if (PrimedDIFinder)
+    DIFinder = *PrimedDIFinder;
+
   DISubprogram *SPClonedWithinModule = ProcessSubprogramAttachment(
       F, CloneFunctionChangeType::LocalChangesOnly, DIFinder);
 
@@ -1514,11 +1539,11 @@ namespace {
 struct SwitchCoroutineSplitter {
   static void split(Function &F, coro::Shape &Shape,
                     SmallVectorImpl<Function *> &Clones,
-                    TargetTransformInfo &TTI) {
+                    TargetTransformInfo &TTI, const DebugInfoCache *DICache) {
     assert(Shape.ABI == coro::ABI::Switch);
 
     MetadataSetTy GlobalDebugInfo;
-    collectGlobalDebugInfo(F, GlobalDebugInfo);
+    collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
 
     // Create a resume clone by cloning the body of the original function,
     // setting new entry block and replacing coro.suspend an appropriate value
@@ -1832,7 +1857,8 @@ CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
 
 static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
                                 SmallVectorImpl<Function *> &Clones,
-                                TargetTransformInfo &TTI) {
+                                TargetTransformInfo &TTI,
+                                const DebugInfoCache *DICache) {
   assert(Shape.ABI == coro::ABI::Async);
   assert(Clones.empty());
   // Reset various things that the optimizer might have decided it
@@ -1919,7 +1945,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
   assert(Clones.size() == Shape.CoroSuspends.size());
 
   MetadataSetTy GlobalDebugInfo;
-  collectGlobalDebugInfo(F, GlobalDebugInfo);
+  collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
 
   for (size_t Idx = 0, End = Shape.CoroSuspends.size(); Idx != End; ++Idx) {
     auto *Suspend = Shape.CoroSuspends[Idx];
@@ -1932,7 +1958,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
 
 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
                                  SmallVectorImpl<Function *> &Clones,
-                                 TargetTransformInfo &TTI) {
+                                 TargetTransformInfo &TTI,
+                                 const DebugInfoCache *DICache) {
   assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
   assert(Clones.empty());
 
@@ -2053,7 +2080,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
   assert(Clones.size() == Shape.CoroSuspends.size());
 
   MetadataSetTy GlobalDebugInfo;
-  collectGlobalDebugInfo(F, GlobalDebugInfo);
+  collectGlobalDebugInfo(F, GlobalDebugInfo, DICache);
 
   for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) {
     auto Suspend = Shape.CoroSuspends[i];
@@ -2108,7 +2135,8 @@ static bool hasSafeElideCaller(Function &F) {
 static coro::Shape
 splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
                TargetTransformInfo &TTI, bool OptimizeFrame,
-               std::function<bool(Instruction &)> MaterializableCallback) {
+               std::function<bool(Instruction &)> MaterializableCallback,
+               const DebugInfoCache *DICache) {
   PrettyStackTraceFunction prettyStackTrace(F);
 
   // The suspend-crossing algorithm in buildCoroutineFrame get tripped
@@ -2138,14 +2166,14 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
   } else {
     switch (Shape.ABI) {
     case coro::ABI::Switch:
-      SwitchCoroutineSplitter::split(F, Shape, Clones, TTI);
+      SwitchCoroutineSplitter::split(F, Shape, Clones, TTI, DICache);
       break;
     case coro::ABI::Async:
-      splitAsyncCoroutine(F, Shape, Clones, TTI);
+      splitAsyncCoroutine(F, Shape, Clones, TTI, DICache);
       break;
     case coro::ABI::Retcon:
     case coro::ABI::RetconOnce:
-      splitRetconCoroutine(F, Shape, Clones, TTI);
+      splitRetconCoroutine(F, Shape, Clones, TTI, DICache);
       break;
     }
   }
@@ -2282,6 +2310,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
   auto &FAM =
       AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
 
+  const auto &MAMProxy = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG);
+  const auto *DICache = MAMProxy.getCachedResult<DebugInfoCacheAnalysis>(M);
+
   // Check for uses of llvm.coro.prepare.retcon/async.
   SmallVector<Function *, 2> PrepareFns;
   addPrepareFunction(M, PrepareFns, "llvm.coro.prepare.retcon");
@@ -2307,7 +2338,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
     SmallVector<Function *, 4> Clones;
     coro::Shape Shape =
         splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
-                       OptimizeFrame, MaterializableCallback);
+                       OptimizeFrame, MaterializableCallback, DICache);
     CurrentSCC = &updateCallGraphAfterCoroutineSplit(
         *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM);
 
diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll
index f0fe708806f1b6..53fd6fe2a317ec 100644
--- a/llvm/test/Other/new-pass-manager.ll
+++ b/llvm/test/Other/new-pass-manager.ll
@@ -23,6 +23,7 @@
 ; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*(FunctionAnalysisManager|AnalysisManager<.*Function.*>).*}},{{.*}}Module>
 ; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis
 ; CHECK-CGSCC-PASS-NEXT: Running analysis: TargetLibraryAnalysis
+; CHECK-CGSCC-PASS-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-CGSCC-PASS-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-CGSCC-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
 ; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 55dbdb1b8366d6..9e82afad4b85cf 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -138,6 +138,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
 ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 5543472df685b0..9bc9ce310c5506 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -47,6 +47,7 @@
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
 ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
 ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
diff --git a/llvm/test/Other/new-pm-pgo-preinline.ll b/llvm/test/Other/new-pm-pgo-preinline.ll
index f07a3728ba3d48..97813bb2433642 100644
--- a/llvm/test/Other/new-pm-pgo-preinline.ll
+++ b/llvm/test/Other/new-pm-pgo-preinline.ll
@@ -5,6 +5,7 @@
 ; CHECK-Osz-NEXT: Running analysis: InlineAdvisorAnalysis
 ; CHECK-Osz-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-Osz-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-Osz-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-Osz-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
 ; CHECK-Osz-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-Osz-NEXT: Running pass: InlinerPass on (foo)
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index fcf84dc5e11051..7a6d2854154f49 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -73,6 +73,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 4d5b5e733a87c2..b96110225dffe9 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -61,6 +61,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
 ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index 62b81ac7cad03f..671aaaa29d4264 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -70,6 +70,7 @@
 ; CHECK-O-NEXT: Invalidating analysis: AAManager
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index ab04f80abc5722..14e7c123a1eb2e 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -105,6 +105,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index c5fc4d57539c5f..8a58ee2b9fc872 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -61,6 +61,7 @@
 ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo)
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: InlinerPass on (foo)
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 096110f775b04f..f137a9df26fa47 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -75,6 +75,7 @@
 ; CHECK-O-NEXT: Invalidating analysis: AAManager
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
+; CHECK-O-NEXT: Running analysis: DebugInfoCacheAnalysis
 ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
 ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
diff --git a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp
index 5c71bc8063d6c9..7212107d992638 100644
--- a/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp
+++ b/llvm/unittests/Analysis/CGSCCPassManagerTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/DebugInfoCache.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
@@ -16,8 +17,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
 #include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
@@ -255,6 +256,7 @@ class CGSCCPassManagerTest : public ::testing::Test {
             "}\n")) {
     FAM.registerPass([&] { return TargetLibraryAnalysis(); });
     MAM.registerPass([&] { return LazyCallGraphAnalysis(); });
+    MAM.registerPass([&] { return DebugInfoCacheAnalysis(); });
     MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
 
     // Register required pass instrumentation analysis.



More information about the llvm-commits mailing list