[llvm-branch-commits] [clang] [llvm] [Utils] Identity map module-level debug info on first use in CloneFunction* (PR #118627)

Artem Pianykh via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 16 14:31:29 PST 2024


https://github.com/artempyanykh updated https://github.com/llvm/llvm-project/pull/118627

>From 8c163237573df097a99b65a83280757d1b39062c Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Mon, 16 Dec 2024 13:25:21 -0800
Subject: [PATCH 1/3] [OpenACC/NFC] Make 'trailing objects' use private
 inheritence.

I noticed this while working on something else, these are supposed to be
privately inherited.
---
 clang/include/clang/AST/OpenACCClause.h | 57 ++++++++++++++++---------
 clang/include/clang/AST/StmtOpenACC.h   | 23 ++++++----
 2 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 7a1b17cc4e44e3..51db58d484a25f 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -191,8 +191,9 @@ using DeviceTypeArgument = std::pair<IdentifierInfo *, SourceLocation>;
 /// an identifier. The 'asterisk' means 'the rest'.
 class OpenACCDeviceTypeClause final
     : public OpenACCClauseWithParams,
-      public llvm::TrailingObjects<OpenACCDeviceTypeClause,
+      private llvm::TrailingObjects<OpenACCDeviceTypeClause,
                                    DeviceTypeArgument> {
+  friend TrailingObjects;
   // Data stored in trailing objects as IdentifierInfo* /SourceLocation pairs. A
   // nullptr IdentifierInfo* represents an asterisk.
   unsigned NumArchs;
@@ -377,7 +378,8 @@ class OpenACCClauseWithExprs : public OpenACCClauseWithParams {
 // Represents the 'devnum' and expressions lists for the 'wait' clause.
 class OpenACCWaitClause final
     : public OpenACCClauseWithExprs,
-      public llvm::TrailingObjects<OpenACCWaitClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCWaitClause, Expr *> {
+  friend TrailingObjects;
   SourceLocation QueuesLoc;
   OpenACCWaitClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                     Expr *DevNumExpr, SourceLocation QueuesLoc,
@@ -419,7 +421,8 @@ class OpenACCWaitClause final
 
 class OpenACCNumGangsClause final
     : public OpenACCClauseWithExprs,
-      public llvm::TrailingObjects<OpenACCNumGangsClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCNumGangsClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCNumGangsClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                         ArrayRef<Expr *> IntExprs, SourceLocation EndLoc)
@@ -449,7 +452,8 @@ class OpenACCNumGangsClause final
 
 class OpenACCTileClause final
     : public OpenACCClauseWithExprs,
-      public llvm::TrailingObjects<OpenACCTileClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCTileClause, Expr *> {
+  friend TrailingObjects;
   OpenACCTileClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                     ArrayRef<Expr *> SizeExprs, SourceLocation EndLoc)
       : OpenACCClauseWithExprs(OpenACCClauseKind::Tile, BeginLoc, LParenLoc,
@@ -503,7 +507,8 @@ class OpenACCClauseWithSingleIntExpr : public OpenACCClauseWithExprs {
 
 class OpenACCGangClause final
     : public OpenACCClauseWithExprs,
-      public llvm::TrailingObjects<OpenACCGangClause, Expr *, OpenACCGangKind> {
+      private llvm::TrailingObjects<OpenACCGangClause, Expr *, OpenACCGangKind> {
+  friend TrailingObjects;
 protected:
   OpenACCGangClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                     ArrayRef<OpenACCGangKind> GangKinds,
@@ -658,7 +663,8 @@ class OpenACCClauseWithVarList : public OpenACCClauseWithExprs {
 
 class OpenACCPrivateClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCPrivateClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCPrivateClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCPrivateClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                        ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -680,7 +686,8 @@ class OpenACCPrivateClause final
 
 class OpenACCFirstPrivateClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCFirstPrivateClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCFirstPrivateClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCFirstPrivateClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                             ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -702,7 +709,8 @@ class OpenACCFirstPrivateClause final
 
 class OpenACCDevicePtrClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCDevicePtrClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCDevicePtrClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCDevicePtrClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                          ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -724,7 +732,8 @@ class OpenACCDevicePtrClause final
 
 class OpenACCAttachClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCAttachClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCAttachClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCAttachClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -746,7 +755,8 @@ class OpenACCAttachClause final
 
 class OpenACCDetachClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCDetachClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCDetachClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCDetachClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -768,7 +778,8 @@ class OpenACCDetachClause final
 
 class OpenACCDeleteClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCDeleteClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCDeleteClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCDeleteClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -790,7 +801,8 @@ class OpenACCDeleteClause final
 
 class OpenACCUseDeviceClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCUseDeviceClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCUseDeviceClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCUseDeviceClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                          ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -812,7 +824,8 @@ class OpenACCUseDeviceClause final
 
 class OpenACCNoCreateClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCNoCreateClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCNoCreateClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCNoCreateClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                         ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -834,7 +847,8 @@ class OpenACCNoCreateClause final
 
 class OpenACCPresentClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCPresentClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCPresentClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCPresentClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
                        ArrayRef<Expr *> VarList, SourceLocation EndLoc)
@@ -856,7 +870,8 @@ class OpenACCPresentClause final
 
 class OpenACCCopyClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCCopyClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCCopyClause, Expr *> {
+  friend TrailingObjects;
 
   OpenACCCopyClause(OpenACCClauseKind Spelling, SourceLocation BeginLoc,
                     SourceLocation LParenLoc, ArrayRef<Expr *> VarList,
@@ -885,7 +900,8 @@ class OpenACCCopyClause final
 
 class OpenACCCopyInClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCCopyInClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCCopyInClause, Expr *> {
+  friend TrailingObjects;
   bool IsReadOnly;
 
   OpenACCCopyInClause(OpenACCClauseKind Spelling, SourceLocation BeginLoc,
@@ -917,7 +933,8 @@ class OpenACCCopyInClause final
 
 class OpenACCCopyOutClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCCopyOutClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCCopyOutClause, Expr *> {
+  friend TrailingObjects;
   bool IsZero;
 
   OpenACCCopyOutClause(OpenACCClauseKind Spelling, SourceLocation BeginLoc,
@@ -949,7 +966,8 @@ class OpenACCCopyOutClause final
 
 class OpenACCCreateClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCCreateClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCCreateClause, Expr *> {
+  friend TrailingObjects;
   bool IsZero;
 
   OpenACCCreateClause(OpenACCClauseKind Spelling, SourceLocation BeginLoc,
@@ -981,7 +999,8 @@ class OpenACCCreateClause final
 
 class OpenACCReductionClause final
     : public OpenACCClauseWithVarList,
-      public llvm::TrailingObjects<OpenACCReductionClause, Expr *> {
+      private llvm::TrailingObjects<OpenACCReductionClause, Expr *> {
+  friend TrailingObjects;
   OpenACCReductionOperator Op;
 
   OpenACCReductionClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h
index e256a4b7c69eb5..a1903cd9e3a23a 100644
--- a/clang/include/clang/AST/StmtOpenACC.h
+++ b/clang/include/clang/AST/StmtOpenACC.h
@@ -127,11 +127,12 @@ class OpenACCAssociatedStmtConstruct : public OpenACCConstructStmt {
 /// the 'Kind'.
 class OpenACCComputeConstruct final
     : public OpenACCAssociatedStmtConstruct,
-      public llvm::TrailingObjects<OpenACCComputeConstruct,
-                                   const OpenACCClause *> {
+      private llvm::TrailingObjects<OpenACCComputeConstruct,
+                                    const OpenACCClause *> {
   friend class ASTStmtWriter;
   friend class ASTStmtReader;
   friend class ASTContext;
+  friend TrailingObjects;
   OpenACCComputeConstruct(unsigned NumClauses)
       : OpenACCAssociatedStmtConstruct(
             OpenACCComputeConstructClass, OpenACCDirectiveKind::Invalid,
@@ -189,7 +190,7 @@ class OpenACCComputeConstruct final
 /// Construct.
 class OpenACCLoopConstruct final
     : public OpenACCAssociatedStmtConstruct,
-      public llvm::TrailingObjects<OpenACCLoopConstruct,
+      private llvm::TrailingObjects<OpenACCLoopConstruct,
                                    const OpenACCClause *> {
   // The compute/combined construct kind this loop is associated with, or
   // invalid if this is an orphaned loop construct.
@@ -202,6 +203,7 @@ class OpenACCLoopConstruct final
   friend class OpenACCAssociatedStmtConstruct;
   friend class OpenACCCombinedConstruct;
   friend class OpenACCComputeConstruct;
+  friend TrailingObjects;
 
   OpenACCLoopConstruct(unsigned NumClauses);
 
@@ -245,8 +247,9 @@ class OpenACCLoopConstruct final
 // shared with both loop and compute constructs.
 class OpenACCCombinedConstruct final
     : public OpenACCAssociatedStmtConstruct,
-      public llvm::TrailingObjects<OpenACCCombinedConstruct,
+      private llvm::TrailingObjects<OpenACCCombinedConstruct,
                                    const OpenACCClause *> {
+  friend TrailingObjects;
   OpenACCCombinedConstruct(unsigned NumClauses)
       : OpenACCAssociatedStmtConstruct(
             OpenACCCombinedConstructClass, OpenACCDirectiveKind::Invalid,
@@ -297,8 +300,9 @@ class OpenACCCombinedConstruct final
 // and clauses, but is otherwise pretty simple.
 class OpenACCDataConstruct final
     : public OpenACCAssociatedStmtConstruct,
-      public llvm::TrailingObjects<OpenACCDataConstruct,
+      private llvm::TrailingObjects<OpenACCDataConstruct,
                                    const OpenACCClause *> {
+  friend TrailingObjects;
   OpenACCDataConstruct(unsigned NumClauses)
       : OpenACCAssociatedStmtConstruct(
             OpenACCDataConstructClass, OpenACCDirectiveKind::Data,
@@ -345,8 +349,9 @@ class OpenACCDataConstruct final
 // This class represents a 'enter data' construct, which JUST has clauses.
 class OpenACCEnterDataConstruct final
     : public OpenACCConstructStmt,
-      public llvm::TrailingObjects<OpenACCEnterDataConstruct,
+      private llvm::TrailingObjects<OpenACCEnterDataConstruct,
                                    const OpenACCClause *> {
+  friend TrailingObjects;
   OpenACCEnterDataConstruct(unsigned NumClauses)
       : OpenACCConstructStmt(OpenACCEnterDataConstructClass,
                              OpenACCDirectiveKind::EnterData, SourceLocation{},
@@ -382,8 +387,9 @@ class OpenACCEnterDataConstruct final
 // This class represents a 'exit data' construct, which JUST has clauses.
 class OpenACCExitDataConstruct final
     : public OpenACCConstructStmt,
-      public llvm::TrailingObjects<OpenACCExitDataConstruct,
+      private llvm::TrailingObjects<OpenACCExitDataConstruct,
                                    const OpenACCClause *> {
+  friend TrailingObjects;
   OpenACCExitDataConstruct(unsigned NumClauses)
       : OpenACCConstructStmt(OpenACCExitDataConstructClass,
                              OpenACCDirectiveKind::ExitData, SourceLocation{},
@@ -420,8 +426,9 @@ class OpenACCExitDataConstruct final
 // statement and clauses, but is otherwise pretty simple.
 class OpenACCHostDataConstruct final
     : public OpenACCAssociatedStmtConstruct,
-      public llvm::TrailingObjects<OpenACCHostDataConstruct,
+      private llvm::TrailingObjects<OpenACCHostDataConstruct,
                                    const OpenACCClause *> {
+  friend TrailingObjects;
   OpenACCHostDataConstruct(unsigned NumClauses)
       : OpenACCAssociatedStmtConstruct(
             OpenACCHostDataConstructClass, OpenACCDirectiveKind::HostData,

>From 8f32caf1b94eebcb35bf3e9d9f52fc94032f03c7 Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sat, 14 Sep 2024 16:02:51 -0700
Subject: [PATCH 2/3] [NFC][Utils] Eliminate DISubprogram set from
 BuildDebugInfoMDMap

Summary:
Previously, we'd add all SPs distinct from the cloned one into a set.
Then when cloning a local scope we'd check if it's from one of those
'distinct' SPs by checking if it's in the set. We don't need to do that.
We can just check against the cloned SP directly and drop the set.

Test Plan:
ninja check-llvm-unit check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118625, branch: users/artempyanykh/fast-coro-upstream/6
---
 llvm/lib/Transforms/Utils/CloneFunction.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index d47633a05fce96..8863dff4482a1a 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -172,18 +172,15 @@ bool llvm::BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
     };
 
     // Avoid cloning types, compile units, and (other) subprograms.
-    SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
     for (DISubprogram *ISP : DIFinder.subprograms()) {
-      if (ISP != SPClonedWithinModule) {
+      if (ISP != SPClonedWithinModule)
         mapToSelfIfNew(ISP);
-        MappedToSelfSPs.insert(ISP);
-      }
     }
 
     // If a subprogram isn't going to be cloned skip its lexical blocks as well.
     for (DIScope *S : DIFinder.scopes()) {
       auto *LScope = dyn_cast<DILocalScope>(S);
-      if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
+      if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
         mapToSelfIfNew(S);
     }
 

>From d9e79ccd00d73201fcbff0039b99d0c87615e70d Mon Sep 17 00:00:00 2001
From: Artem Pianykh <arr at fb.com>
Date: Sun, 15 Sep 2024 04:39:20 -0700
Subject: [PATCH 3/3] [Utils] Identity map module-level debug info on first use
 in CloneFunction*

Summary:
To avoid cloning module-level debug info (owned by the module rather
than the function), CloneFunction implementation used to eagerly
identity map such debug info into ValueMap's MD map. In larger modules
with meaningful volume of debug info this gets very expensive.

By passing such debug info metadata via an IdentityMD set for the
ValueMapper to map on first use, we get several benefits:

1. Mapping metadata is not cheap, particularly because of tracking. When
   cloning a Function we identity map lots of global module-level
   metadata to avoid cloning it, while only a fraction of it is actually
   used by the function. Mapping on first use is a lot faster for
   modules with meaningful amount of debug info.

2. Eagerly identity mapping metadata makes it harder to cache
   module-level data (e.g. a set of metadata nodes in a \a DICompileUnit).
   With this patch we can cache certain module-level metadata
   calculations to speed things up further.

Anecdata from compiling a sample cpp file with full debug info shows that this moderately speeds up
CoroSplitPass which is one of the heavier users of cloning:

|                 | Baseline | IdentityMD set |
|-----------------|----------|----------------|
| CoroSplitPass   | 306ms    | 221ms          |
| CoroCloner      | 101ms    | 72ms           |
| Speed up        | 1x       | 1.4x           |

Test Plan:
ninja check-llvm-unit
ninja check-llvm

stack-info: PR: https://github.com/llvm/llvm-project/pull/118627, branch: users/artempyanykh/fast-coro-upstream/8
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  | 19 +++---
 .../llvm/Transforms/Utils/ValueMapper.h       | 67 ++++++++++++++-----
 llvm/lib/Transforms/Utils/CloneFunction.cpp   | 60 ++++++++---------
 llvm/lib/Transforms/Utils/ValueMapper.cpp     | 19 ++++--
 4 files changed, 104 insertions(+), 61 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 2fcb64206387ed..72d3eebabc5c9a 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -193,7 +193,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
 void CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc,
                                ValueToValueMapTy &VMap, RemapFlags RemapFlag,
                                ValueMapTypeRemapper *TypeMapper = nullptr,
-                               ValueMaterializer *Materializer = nullptr);
+                               ValueMaterializer *Materializer = nullptr,
+                               const MetadataSetTy *IdentityMD = nullptr);
 
 /// Clone OldFunc's body into NewFunc.
 void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
@@ -202,7 +203,8 @@ void CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
                            const char *NameSuffix = "",
                            ClonedCodeInfo *CodeInfo = nullptr,
                            ValueMapTypeRemapper *TypeMapper = nullptr,
-                           ValueMaterializer *Materializer = nullptr);
+                           ValueMaterializer *Materializer = nullptr,
+                           const MetadataSetTy *IdentityMD = nullptr);
 
 void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                                const Instruction *StartingInst,
@@ -242,13 +244,12 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
                                          CloneFunctionChangeType Changes,
                                          DebugInfoFinder &DIFinder);
 
-/// Build a map of debug info to use during Metadata cloning.
-/// Returns true if cloning would need module level changes and false if there
-/// would only be local changes.
-bool BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
-                         CloneFunctionChangeType Changes,
-                         DebugInfoFinder &DIFinder,
-                         DISubprogram *SPClonedWithinModule);
+/// Based on \p Changes and \p DIFinder populate \p MD with debug info that
+/// needs to be identity mapped during Metadata cloning.
+void FindDebugInfoToIdentityMap(MetadataSetTy &MD,
+                                CloneFunctionChangeType Changes,
+                                DebugInfoFinder &DIFinder,
+                                DISubprogram *SPClonedWithinModule);
 
 /// This class captures the data input to the InlineFunction call, and records
 /// the auxiliary results produced by it.
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 743cfeb7ef3f02..b8d612f11d519f 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/simple_ilist.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
@@ -35,6 +36,7 @@ class Value;
 
 using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
 using DbgRecordIterator = simple_ilist<DbgRecord>::iterator;
+using MetadataSetTy = SmallPtrSet<const Metadata *, 16>;
 
 /// This is a class that can be implemented by clients to remap types when
 /// cloning constants and instructions.
@@ -136,6 +138,18 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
 /// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
 /// pass into the schedule*() functions.
 ///
+/// NOTE: \c IdentityMD is used by CloneFunction* to directly specify metadata
+/// that should be identity mapped (and hence not cloned). The metadata will be
+/// identity mapped in \c VM on first use. There are several reasons for doing
+/// it this way rather than eagerly identity mapping metadata nodes in \c VM:
+/// 1. Mapping metadata is not cheap, particularly because of tracking.
+/// 2. When cloning a Function we identity map lots of global module-level
+///    metadata to avoid cloning it, while only a fraction of it is actually
+///    used by the function. Mapping on first use is a lot faster for modules
+///    with meaningful amount of debug info.
+/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
+///    data (e.g. a set of metadata nodes in a \a DICompileUnit).
+///
 /// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
 /// ValueToValueMapTy.  We should template \a ValueMapper (and its
 /// implementation classes), and explicitly instantiate on two concrete
@@ -152,7 +166,8 @@ class ValueMapper {
 public:
   ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
               ValueMapTypeRemapper *TypeMapper = nullptr,
-              ValueMaterializer *Materializer = nullptr);
+              ValueMaterializer *Materializer = nullptr,
+              const MetadataSetTy *IdentityMD = nullptr);
   ValueMapper(ValueMapper &&) = delete;
   ValueMapper(const ValueMapper &) = delete;
   ValueMapper &operator=(ValueMapper &&) = delete;
@@ -218,8 +233,10 @@ class ValueMapper {
 inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
                        RemapFlags Flags = RF_None,
                        ValueMapTypeRemapper *TypeMapper = nullptr,
-                       ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapValue(*V);
+                       ValueMaterializer *Materializer = nullptr,
+                       const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapValue(*V);
 }
 
 /// Lookup or compute a mapping for a piece of metadata.
@@ -231,7 +248,9 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
 ///     \c MD.
 ///  3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
 ///     re-wrap its return (returning nullptr on nullptr).
-///  4. Else, \c MD is an \a MDNode.  These are remapped, along with their
+///  4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
+///     and return it.
+///  5. Else, \c MD is an \a MDNode.  These are remapped, along with their
 ///     transitive operands.  Distinct nodes are duplicated or moved depending
 ///     on \a RF_MoveDistinctNodes.  Uniqued nodes are remapped like constants.
 ///
@@ -240,16 +259,20 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
 inline Metadata *MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
                              RemapFlags Flags = RF_None,
                              ValueMapTypeRemapper *TypeMapper = nullptr,
-                             ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMetadata(*MD);
+                             ValueMaterializer *Materializer = nullptr,
+                             const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapMetadata(*MD);
 }
 
 /// Version of MapMetadata with type safety for MDNode.
 inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
                            RemapFlags Flags = RF_None,
                            ValueMapTypeRemapper *TypeMapper = nullptr,
-                           ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMDNode(*MD);
+                           ValueMaterializer *Materializer = nullptr,
+                           const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapMDNode(*MD);
 }
 
 /// Convert the instruction operands from referencing the current values into
@@ -263,8 +286,10 @@ inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
 inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
                              RemapFlags Flags = RF_None,
                              ValueMapTypeRemapper *TypeMapper = nullptr,
-                             ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer).remapInstruction(*I);
+                             ValueMaterializer *Materializer = nullptr,
+                             const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .remapInstruction(*I);
 }
 
 /// Remap the Values used in the DbgRecord \a DR using the value map \a
@@ -272,8 +297,10 @@ inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
 inline void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM,
                            RemapFlags Flags = RF_None,
                            ValueMapTypeRemapper *TypeMapper = nullptr,
-                           ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer).remapDbgRecord(M, *DR);
+                           ValueMaterializer *Materializer = nullptr,
+                           const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .remapDbgRecord(M, *DR);
 }
 
 /// Remap the Values used in the DbgRecords \a Range using the value map \a
@@ -283,8 +310,9 @@ inline void RemapDbgRecordRange(Module *M,
                                 ValueToValueMapTy &VM,
                                 RemapFlags Flags = RF_None,
                                 ValueMapTypeRemapper *TypeMapper = nullptr,
-                                ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer)
+                                ValueMaterializer *Materializer = nullptr,
+                                const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
       .remapDbgRecordRange(M, Range);
 }
 
@@ -297,16 +325,19 @@ inline void RemapDbgRecordRange(Module *M,
 inline void RemapFunction(Function &F, ValueToValueMapTy &VM,
                           RemapFlags Flags = RF_None,
                           ValueMapTypeRemapper *TypeMapper = nullptr,
-                          ValueMaterializer *Materializer = nullptr) {
-  ValueMapper(VM, Flags, TypeMapper, Materializer).remapFunction(F);
+                          ValueMaterializer *Materializer = nullptr,
+                          const MetadataSetTy *IdentityMD = nullptr) {
+  ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD).remapFunction(F);
 }
 
 /// Version of MapValue with type safety for Constant.
 inline Constant *MapValue(const Constant *V, ValueToValueMapTy &VM,
                           RemapFlags Flags = RF_None,
                           ValueMapTypeRemapper *TypeMapper = nullptr,
-                          ValueMaterializer *Materializer = nullptr) {
-  return ValueMapper(VM, Flags, TypeMapper, Materializer).mapConstant(*V);
+                          ValueMaterializer *Materializer = nullptr,
+                          const MetadataSetTy *IdentityMD = nullptr) {
+  return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
+      .mapConstant(*V);
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 8863dff4482a1a..a962d017c73998 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -152,61 +152,54 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
   return SPClonedWithinModule;
 }
 
-bool llvm::BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
-                               CloneFunctionChangeType Changes,
-                               DebugInfoFinder &DIFinder,
-                               DISubprogram *SPClonedWithinModule) {
-  bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
+void llvm::FindDebugInfoToIdentityMap(MetadataSetTy &MD,
+                                      CloneFunctionChangeType Changes,
+                                      DebugInfoFinder &DIFinder,
+                                      DISubprogram *SPClonedWithinModule) {
   if (Changes < CloneFunctionChangeType::DifferentModule &&
       DIFinder.subprogram_count() > 0) {
-    // Turn on module-level changes, since we need to clone (some of) the
-    // debug info metadata.
+    // Even if Changes are local only, we turn on module-level changes, since we
+    // need to clone (some of) the debug info metadata.
     //
     // FIXME: Metadata effectively owned by a function should be made
     // local, and only that local metadata should be cloned.
-    ModuleLevelChanges = true;
-
-    auto mapToSelfIfNew = [&MD](MDNode *N) {
-      // Avoid clobbering an existing mapping.
-      (void)MD.try_emplace(N, N);
-    };
 
     // Avoid cloning types, compile units, and (other) subprograms.
     for (DISubprogram *ISP : DIFinder.subprograms()) {
       if (ISP != SPClonedWithinModule)
-        mapToSelfIfNew(ISP);
+        MD.insert(ISP);
     }
 
     // If a subprogram isn't going to be cloned skip its lexical blocks as well.
     for (DIScope *S : DIFinder.scopes()) {
       auto *LScope = dyn_cast<DILocalScope>(S);
       if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
-        mapToSelfIfNew(S);
+        MD.insert(S);
     }
 
     for (DICompileUnit *CU : DIFinder.compile_units())
-      mapToSelfIfNew(CU);
+      MD.insert(CU);
 
     for (DIType *Type : DIFinder.types())
-      mapToSelfIfNew(Type);
+      MD.insert(Type);
   } else {
     assert(!SPClonedWithinModule &&
            "Subprogram should be in DIFinder->subprogram_count()...");
   }
-
-  return ModuleLevelChanges;
 }
 
 void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc,
                                      ValueToValueMapTy &VMap,
                                      RemapFlags RemapFlag,
                                      ValueMapTypeRemapper *TypeMapper,
-                                     ValueMaterializer *Materializer) {
+                                     ValueMaterializer *Materializer,
+                                     const MetadataSetTy *IdentityMD) {
   SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
   OldFunc.getAllMetadata(MDs);
   for (auto MD : MDs) {
-    NewFunc.addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
-                                               TypeMapper, Materializer));
+    NewFunc.addMetadata(MD.first,
+                        *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
+                                     Materializer, IdentityMD));
   }
 }
 
@@ -216,7 +209,8 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
                                  const char *NameSuffix,
                                  ClonedCodeInfo *CodeInfo,
                                  ValueMapTypeRemapper *TypeMapper,
-                                 ValueMaterializer *Materializer) {
+                                 ValueMaterializer *Materializer,
+                                 const MetadataSetTy *IdentityMD) {
   if (OldFunc.isDeclaration())
     return;
 
@@ -258,9 +252,10 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
     // Loop over all instructions, fixing each one as we find it, and any
     // attached debug-info records.
     for (Instruction &II : *BB) {
-      RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
+      RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer,
+                       IdentityMD);
       RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
-                          RemapFlag, TypeMapper, Materializer);
+                          RemapFlag, TypeMapper, Materializer, IdentityMD);
     }
 }
 
@@ -322,16 +317,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   DISubprogram *SPClonedWithinModule =
       CollectDebugInfoForCloning(*OldFunc, Changes, DIFinder);
 
-  ModuleLevelChanges =
-      BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
+  MetadataSetTy IdentityMD;
+  FindDebugInfoToIdentityMap(IdentityMD, Changes, DIFinder,
+                             SPClonedWithinModule);
 
-  const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
+  // Current implementation always upgrades from local changes to module level
+  // changes due to the way metadata cloning is done. See
+  // BuildDebugInfoToIdentityMap for more details.
+  const auto RemapFlag = RF_None;
 
   CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper,
-                            Materializer);
+                            Materializer, &IdentityMD);
 
   CloneFunctionBodyInto(*NewFunc, *OldFunc, VMap, RemapFlag, Returns,
-                        NameSuffix, CodeInfo, TypeMapper, Materializer);
+                        NameSuffix, CodeInfo, TypeMapper, Materializer,
+                        &IdentityMD);
 
   // Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
   // same module, the compile unit will already be listed (or not). When
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 3faea48466ba9d..f6ab5bd284597a 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -120,12 +120,14 @@ class Mapper {
   SmallVector<WorklistEntry, 4> Worklist;
   SmallVector<DelayedBasicBlock, 1> DelayedBBs;
   SmallVector<Constant *, 16> AppendingInits;
+  const MetadataSetTy *IdentityMD;
 
 public:
   Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
-         ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
+         ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer,
+         const MetadataSetTy *IdentityMD)
       : Flags(Flags), TypeMapper(TypeMapper),
-        MCs(1, MappingContext(VM, Materializer)) {}
+        MCs(1, MappingContext(VM, Materializer)), IdentityMD(IdentityMD) {}
 
   /// ValueMapper should explicitly call \a flush() before destruction.
   ~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
@@ -899,6 +901,14 @@ std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
     return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
   }
 
+  // Map metadata from IdentityMD on first use. We need to add these nodes to
+  // the mapping as otherwise metadata nodes numbering gets messed up. This is
+  // still economical because the amount of data in IdentityMD may be a lot
+  // larger than what will actually get used.
+  if (IdentityMD && IdentityMD->contains(MD)) {
+    return getVM().MD()[MD] = TrackingMDRef(const_cast<Metadata *>(MD));
+  }
+
   assert(isa<MDNode>(MD) && "Expected a metadata node");
 
   return std::nullopt;
@@ -1198,8 +1208,9 @@ class FlushingMapper {
 
 ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
                          ValueMapTypeRemapper *TypeMapper,
-                         ValueMaterializer *Materializer)
-    : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
+                         ValueMaterializer *Materializer,
+                         const MetadataSetTy *IdentityMD)
+    : pImpl(new Mapper(VM, Flags, TypeMapper, Materializer, IdentityMD)) {}
 
 ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
 



More information about the llvm-branch-commits mailing list