[llvm] 62b27f8 - [CodeExtractor] Correctly propagate scope information post extraction

Felipe de Azevedo Piovezan via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 6 11:26:48 PST 2022


Author: Felipe de Azevedo Piovezan
Date: 2022-12-06T14:26:06-05:00
New Revision: 62b27f893ecceade799df80b343d00851db250ba

URL: https://github.com/llvm/llvm-project/commit/62b27f893ecceade799df80b343d00851db250ba
DIFF: https://github.com/llvm/llvm-project/commit/62b27f893ecceade799df80b343d00851db250ba.diff

LOG: [CodeExtractor] Correctly propagate scope information post extraction

When a new function "NewF" is created with instructions extracted from
another function "OldF", the CodeExtractor only preserves debug
line/column of the extracted instructions. However:

1. Any inlinedAt nodes are dropped.
2. The scope chain is replaced with a single node, the Subprogram of NewF.

Both of these are incorrect: most of the debug metadata from the
original instructions should be preserved. We only need to update the
Subprogram found at the scope of the last node of the inline chain; this
Subprogram used to be OldF but now should be NewF.

Differential Revision: https://reviews.llvm.org/D139217

Added: 
    

Modified: 
    llvm/include/llvm/IR/DebugInfoMetadata.h
    llvm/include/llvm/IR/DebugLoc.h
    llvm/lib/IR/DebugLoc.cpp
    llvm/lib/Transforms/Utils/CodeExtractor.cpp
    llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index c85c21ddd9f15..8b25395ff653c 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2121,6 +2121,11 @@ class DILexicalBlockBase : public DILocalScope {
 
   Metadata *getRawScope() const { return getOperand(1); }
 
+  void replaceScope(DIScope *Scope) {
+    assert(!isUniqued());
+    setOperand(1, Scope);
+  }
+
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DILexicalBlockKind ||
            MD->getMetadataID() == DILexicalBlockFileKind;

diff  --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h
index 4c48f048d60c2..c22d3e9b10d27 100644
--- a/llvm/include/llvm/IR/DebugLoc.h
+++ b/llvm/include/llvm/IR/DebugLoc.h
@@ -86,6 +86,13 @@ namespace llvm {
     /// Gets the inlined-at scope for a DebugLoc.
     MDNode *getInlinedAtScope() const;
 
+    /// Rebuild the entire inline-at chain by replacing the subprogram at the
+    /// end of the chain with NewSP.
+    static DebugLoc
+    replaceInlinedAtSubprogram(const DebugLoc &DL, DISubprogram &NewSP,
+                               LLVMContext &Ctx,
+                               DenseMap<const MDNode *, MDNode *> &Cache);
+
     /// Find the debug info location for the start of the function.
     ///
     /// Walk up the scope chain of given debug loc and find line number info

diff  --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index 34c9d026b19aa..62230b51e9146 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -67,6 +67,77 @@ void DebugLoc::setImplicitCode(bool ImplicitCode) {
   }
 }
 
+/// Traverses the scope chain rooted at RootScope until it hits a Subprogram,
+/// recreating the chain with "NewSP" instead.
+static DIScope *
+cloneScopeForSubprogram(DILocalScope &RootScope, DISubprogram &NewSP,
+                        LLVMContext &Ctx,
+                        DenseMap<const MDNode *, MDNode *> &Cache) {
+  SmallVector<DIScope *> ScopeChain;
+  DIScope *CachedResult = nullptr;
+
+  for (DIScope *Scope = &RootScope; !isa<DISubprogram>(Scope);
+       Scope = Scope->getScope()) {
+    if (auto It = Cache.find(Scope); It != Cache.end()) {
+      CachedResult = cast<DIScope>(It->second);
+      break;
+    }
+    ScopeChain.push_back(Scope);
+  }
+
+  // Recreate the scope chain, bottom-up, starting at the new subprogram (or a
+  // cached result).
+  DIScope *UpdatedScope = CachedResult ? CachedResult : &NewSP;
+  for (DIScope *ScopeToUpdate : reverse(ScopeChain)) {
+    TempMDNode ClonedScope = ScopeToUpdate->clone();
+    cast<DILexicalBlockBase>(*ClonedScope).replaceScope(UpdatedScope);
+    UpdatedScope =
+        cast<DIScope>(MDNode::replaceWithUniqued(std::move(ClonedScope)));
+    Cache[ScopeToUpdate] = UpdatedScope;
+  }
+  return UpdatedScope;
+}
+
+DebugLoc DebugLoc::replaceInlinedAtSubprogram(
+    const DebugLoc &RootLoc, DISubprogram &NewSP, LLVMContext &Ctx,
+    DenseMap<const MDNode *, MDNode *> &Cache) {
+  SmallVector<DILocation *> LocChain;
+  DILocation *CachedResult = nullptr;
+
+  // Collect the inline chain, stopping if we find a location that has already
+  // been processed.
+  for (DILocation *Loc = RootLoc; Loc; Loc = Loc->getInlinedAt()) {
+    if (auto It = Cache.find(Loc); It != Cache.end()) {
+      CachedResult = cast<DILocation>(It->second);
+      break;
+    }
+    LocChain.push_back(Loc);
+  }
+
+  DILocation *UpdatedLoc = CachedResult;
+  if (!UpdatedLoc) {
+    // If no cache hits, then back() is the end of the inline chain, that is,
+    // the DILocation whose scope ends in the Subprogram to be replaced.
+    DILocation *LocToUpdate = LocChain.pop_back_val();
+    DIScope *NewScope =
+        cloneScopeForSubprogram(*LocToUpdate->getScope(), NewSP, Ctx, Cache);
+    UpdatedLoc = DILocation::get(Ctx, LocToUpdate->getLine(),
+                                 LocToUpdate->getColumn(), NewScope);
+    Cache[LocToUpdate] = UpdatedLoc;
+  }
+
+  // Recreate the location chain, bottom-up, starting at the new scope (or a
+  // cached result).
+  for (const DILocation *LocToUpdate : reverse(LocChain)) {
+    UpdatedLoc =
+        DILocation::get(Ctx, LocToUpdate->getLine(), LocToUpdate->getColumn(),
+                        LocToUpdate->getScope(), UpdatedLoc);
+    Cache[LocToUpdate] = UpdatedLoc;
+  }
+
+  return UpdatedLoc;
+}
+
 DebugLoc DebugLoc::appendInlinedAt(const DebugLoc &DL, DILocation *InlinedAt,
                                    LLVMContext &Ctx,
                                    DenseMap<const MDNode *, MDNode *> &Cache) {

diff  --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 9a38f9279be14..2435b15666da6 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1606,9 +1606,11 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
 
   // Fix up the scope information attached to the line locations in the new
   // function.
+  DenseMap<const MDNode *, MDNode *> Cache;
   for (Instruction &I : instructions(NewFunc)) {
     if (const DebugLoc &DL = I.getDebugLoc())
-      I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP));
+      I.setDebugLoc(
+          DebugLoc::replaceInlinedAtSubprogram(DL, *NewSP, Ctx, Cache));
 
     // Loop info metadata may contain line locations. Fix them up.
     auto updateLoopInfoLoc = [&Ctx, NewSP](Metadata *MD) -> Metadata * {

diff  --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
index d28f46a3c9f18..8632a722868a3 100644
--- a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
+++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
@@ -28,15 +28,25 @@ target triple = "x86_64-apple-macosx10.14.0"
 ; - Expressions inside of dbg.value intrinsics are preserved
 ; CHECK-NEXT: llvm.dbg.value(metadata i32 [[ADD1]], metadata [[VAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_plus, DW_OP_stack_value)
 
+; CHECK-NEXT: call void @sink(i32 [[ADD1]]), !dbg [[LINE2:![0-9]+]]
+; CHECK-NEXT: call void @sink(i32 [[ADD1]]), !dbg [[LINE3:![0-9]+]]
+
 ; - The DISubprogram for @foo.cold.1 has an empty DISubroutineType
 ; CHECK: [[FILE:![0-9]+]] = !DIFile(filename: "<stdin>"
 ; CHECK: [[EMPTY_MD:![0-9]+]] = !{}
 ; CHECK: [[EMPTY_TYPE:![0-9]+]] = !DISubroutineType(types: [[EMPTY_MD]])
+; CHECK: [[INLINE_ME_SCOPE:![0-9]+]] = distinct !DISubprogram(name: "inline_me"
 ; CHECK: [[NEWSCOPE:![0-9]+]] = distinct !DISubprogram(name: "foo.cold.1", linkageName: "foo.cold.1", scope: null, file: [[FILE]], type: [[EMPTY_TYPE]], spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized
 
 ; - Line locations in @foo.cold.1 point to the new scope for @foo.cold.1
 ; CHECK: [[LINE1]] = !DILocation(line: 1, column: 1, scope: [[NEWSCOPE]])
 
+; CHECK: [[LINE2]] =          !DILocation(line: 2, column: 2, scope: [[INLINE_ME_SCOPE]]
+; CHECK-SAME:                            inlinedAt: [[LINE3]]
+; CHECK: [[LINE3]] =          !DILocation(line: 3, column: 3, scope: [[INLINED_SCOPE1:![0-9]*]]
+; CHECK: [[INLINED_SCOPE1]] = !DILexicalBlock(scope: [[INLINED_SCOPE2:![0-9]*]], file: [[FILE]], line: 4, column: 4)
+; CHECK: [[INLINED_SCOPE2]] = !DILexicalBlock(scope: [[NEWSCOPE]], file: [[FILE]], line: 5, column: 5)
+
 define void @foo(i32 %arg1) !dbg !6 {
 entry:
   %var = add i32 0, 0, !dbg !11
@@ -52,6 +62,8 @@ if.end:                                           ; preds = %entry
   call void @sink(i32 %add1), !dbg !11
   call void @llvm.dbg.value(metadata i32 %add1, metadata !9, metadata !DIExpression()), !dbg !11
   call void @llvm.dbg.value(metadata i32 %add1, metadata !9, metadata !DIExpression(DW_OP_constu, 1, DW_OP_plus, DW_OP_stack_value)), !dbg !11
+  call void @sink(i32 %add1), !dbg !13 ; inlined from @inline_me
+  call void @sink(i32 %add1), !dbg !14 ; not inlined, but inside some scope of foo
   ret void
 }
 
@@ -59,6 +71,10 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 
 declare void @sink(i32) cold
 
+define void @inline_me() !dbg !12{
+  ret void
+}
+
 !llvm.dbg.cu = !{!0}
 !llvm.debugify = !{!3, !4}
 !llvm.module.flags = !{!5}
@@ -75,3 +91,8 @@ declare void @sink(i32) cold
 !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
 !10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
 !11 = !DILocation(line: 1, column: 1, scope: !6)
+!12 = distinct !DISubprogram(name: "inline_me", linkageName: "inline_me", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
+!13 = !DILocation(line: 2, column: 2, scope: !12, inlinedAt: !14)
+!14 = !DILocation(line: 3, column: 3, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 4)
+!16 = distinct !DILexicalBlock(scope: !6, file: !1, line: 5, column: 5)


        


More information about the llvm-commits mailing list