[llvm] [CodeExtractor] Improve debug info for input values. (PR #136016)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 16 13:12:13 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Abid Qadeer (abidh)

<details>
<summary>Changes</summary>

If we use `CodeExtractor` to extract the block1 into a new function,

```
define void @<!-- -->foo() !dbg !2 {
entry:
  %1 = alloca i32, i64 1, align 4
  %2 = alloca i32, i64 1, align 4
  #dbg_declare(ptr %1, !8, !DIExpression(), !1)
  br label %block1

block1:
  store i32 1, ptr %1, align 4
  store i32 2, ptr %2, align 4
  #dbg_declare(ptr %2, !10, !DIExpression(), !1)
  ret void
}
```

it will look like the extracted function shown below (with some irrelevent details removed).

```
define internal void @<!-- -->extracted(ptr %arg0, ptr %arg1) { 
newFuncRoot:
  br label %block1

block1:
  store i32 1, ptr %arg0, align 4
  store i32 2, ptr %arg1, align 4
  ret void
}
```

You will notice that it has replaced the usage of values that were in the parent function (%1 and %2) with the arguments to the new function. But it did not do the same thing with `#dbg_declare` which was simply dropped because its location pointed to a value outside of the new function. Similarly arg0 is without any debug record, although the value that it replaced had one and we could materialize one for it based on that.

This is not just a theoretical limitations. `CodeExtractor` is used to create functions that implement many of the `OpenMP` constructs in `OMPIRBuilder`. As a result of these limitations, the debug information is missing from the created functions.

This PR tries to address this problem. It iterates over the input to the extracted function and looks at their debug uses. If they were present in the new function, it updates their location. Otherwise it materialize a similar usage in the new function.

Most of these changes are localized in `fixupDebugInfoPostExtraction`. Only other change is to propagate function inputs and the replacement values to it.

---
Full diff: https://github.com/llvm/llvm-project/pull/136016.diff


6 Files Affected:

- (modified) llvm/include/llvm/Transforms/Utils/CodeExtractor.h (+2-1) 
- (modified) llvm/lib/Transforms/Utils/CodeExtractor.cpp (+60-15) 
- (modified) llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll (+1) 
- (added) llvm/test/Transforms/CodeExtractor/input-value-debug.ll (+53) 
- (modified) llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll (-4) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+85) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 60c5def3472b6..d23229750ca68 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -290,7 +290,8 @@ class CodeExtractorAnalysisCache {
     void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs,
                           const ValueSet &StructValues, Function *newFunction,
                           StructType *StructArgTy, BasicBlock *header,
-                          const ValueSet &SinkingCands);
+                          const ValueSet &SinkingCands,
+                          SmallVector<Value *> &NewValues);
 
     /// Generates a Basic Block that calls the extracted function.
     CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs,
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 2b055020022be..13083b62a4701 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1242,11 +1242,18 @@ static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
   }
 }
 
-/// Fix up the debug info in the old and new functions by pointing line
-/// locations and debug intrinsics to the new subprogram scope, and by deleting
-/// intrinsics which point to values outside of the new function.
-static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
-                                         CallInst &TheCall) {
+/// Fix up the debug info in the old and new functions. Following changes are
+/// done.
+/// 1. If a debug record points to a value that has been replaced, update the
+/// record to use the new value.
+/// 2. If an Input value that has been replaced was used as a location of a
+/// debug record in the Parent function, then materealize a similar record in
+/// the new function.
+/// 3. Point line locations and debug intrinsics to the new subprogram scope
+/// 4. Remove intrinsics which point to values outside of the new function.
+static void fixupDebugInfoPostExtraction(
+    Function &OldFunc, Function &NewFunc, CallInst &TheCall,
+    const SetVector<Value *> &Inputs, const SmallVector<Value *> &NewValues) {
   DISubprogram *OldSP = OldFunc.getSubprogram();
   LLVMContext &Ctx = OldFunc.getContext();
 
@@ -1273,14 +1280,50 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
       /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
   NewFunc.setSubprogram(NewSP);
 
+  auto UpdateOrInsertDebugRecord = [&](auto *DR, Value *OldLoc, Value *NewLoc,
+                                       DIExpression *Expr, bool Declare) {
+    if (DR->getParent()->getParent() == &NewFunc)
+      DR->replaceVariableLocationOp(OldLoc, NewLoc);
+    else {
+      if (Declare)
+        DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(),
+                          &NewFunc.getEntryBlock());
+      else
+        DIB.insertDbgValueIntrinsic(
+            NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(),
+            NewFunc.getEntryBlock().getTerminator()->getIterator());
+    }
+  };
+  for (auto [Input, NewVal] : zip_equal(Inputs, NewValues)) {
+    SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+    SmallVector<DbgVariableRecord *, 1> DPUsers;
+    findDbgUsers(DbgUsers, Input, &DPUsers);
+    DIExpression *Expr = DIB.createExpression();
+
+    // Iterate the debud users of the Input values. If they are in the extracted
+    // function then update their location with the new value. If they are in
+    // the parent function then create a similar debug record.
+    for (auto *DVI : DbgUsers) {
+      UpdateOrInsertDebugRecord(DVI, Input, NewVal, Expr,
+                                isa<DbgDeclareInst>(DVI));
+    }
+    for (auto *DVR : DPUsers)
+      UpdateOrInsertDebugRecord(DVR, Input, NewVal, Expr,
+                                DVR->isDbgDeclare());
+  }
+
   auto IsInvalidLocation = [&NewFunc](Value *Location) {
-    // Location is invalid if it isn't a constant or an instruction, or is an
-    // instruction but isn't in the new function.
-    if (!Location ||
-        (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+    // Location is invalid if it isn't a constant, an instruction or an
+    // argument, or is an instruction/argument but isn't in the new function.
+    if (!Location || (!isa<Constant>(Location) && !isa<Argument>(Location) &&
+                      !isa<Instruction>(Location)))
       return true;
-    Instruction *LocationInst = dyn_cast<Instruction>(Location);
-    return LocationInst && LocationInst->getFunction() != &NewFunc;
+
+    if (Argument *Arg = dyn_cast<Argument>(Location))
+      return Arg->getParent() != &NewFunc;
+    else if (Instruction *LocationInst = dyn_cast<Instruction>(Location))
+      return LocationInst->getFunction() != &NewFunc;
+    return false;
   };
 
   // Debug intrinsics in the new function need to be updated in one of two
@@ -1509,9 +1552,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
       inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse,
       StructValues, StructTy);
   newFunction->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
+  SmallVector<Value *> NewValues;
 
   emitFunctionBody(inputs, outputs, StructValues, newFunction, StructTy, header,
-                   SinkingCands);
+                   SinkingCands, NewValues);
 
   std::vector<Value *> Reloads;
   CallInst *TheCall = emitReplacerCall(
@@ -1521,7 +1565,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
   insertReplacerCall(oldFunction, header, TheCall->getParent(), outputs,
                      Reloads, ExitWeights);
 
-  fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
+  fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
+                               NewValues);
 
   LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
     newFunction->dump();
@@ -1586,7 +1631,8 @@ Type *CodeExtractor::getSwitchType() {
 void CodeExtractor::emitFunctionBody(
     const ValueSet &inputs, const ValueSet &outputs,
     const ValueSet &StructValues, Function *newFunction,
-    StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands) {
+    StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands,
+    SmallVector<Value *> &NewValues) {
   Function *oldFunction = header->getParent();
   LLVMContext &Context = oldFunction->getContext();
 
@@ -1618,7 +1664,6 @@ void CodeExtractor::emitFunctionBody(
 
   // Rewrite all users of the inputs in the extracted region to use the
   // arguments (or appropriate addressing into struct) instead.
-  SmallVector<Value *> NewValues;
   for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
     Value *RewriteVal;
     if (StructValues.contains(inputs[i])) {
diff --git a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
index ff1ed786cefc4..b932a7dc0bf9f 100644
--- a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
+++ b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
@@ -19,6 +19,7 @@
 
 ; CHECK-LABEL: define internal void @test.loop1(ptr %v1)
 ; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: #dbg_value
 ; CHECK-NEXT:   br
 
 define void @test() {
diff --git a/llvm/test/Transforms/CodeExtractor/input-value-debug.ll b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
new file mode 100644
index 0000000000000..d9723d61b4d63
--- /dev/null
+++ b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
@@ -0,0 +1,53 @@
+; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) !dbg !2 {
+entry:
+   %1 = alloca i32, i64 1, align 4
+   %2 = alloca i32, i64 1, align 4
+   store i32 %a, ptr %1, align 4
+   #dbg_declare(ptr %1, !8, !DIExpression(), !1)
+   #dbg_value(i32 %b, !9, !DIExpression(), !1)
+  br i1 undef, label %if.then, label %if.end
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+   store i32 10, ptr %1, align 4
+   %3 = add i32 %b, 1
+   store i32 1, ptr %2, align 4
+   call void @sink(i32 %3)
+   #dbg_declare(ptr %2, !10, !DIExpression(), !1)
+   ret void
+}
+
+declare void @sink(i32) cold
+
+!llvm.dbg.cu = !{!6}
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocation(line: 11, column: 7, scope: !2)
+!2 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+!3 = !DIFile(filename: "test.c", directory: "")
+!4 = !DISubroutineType(cc: DW_CC_program, types: !5)
+!5 = !{}
+!6 = distinct !DICompileUnit(language: DW_LANG_C, file: !3)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !DILocalVariable(name: "a", scope: !2, file: !3, type: !7)
+!9 = !DILocalVariable(name: "b", scope: !2, file: !3, type: !7)
+!10 = !DILocalVariable(name: "c", scope: !2, file: !3, type: !7)
+
+; CHECK: define {{.*}}@foo.cold.1(ptr %[[ARG0:.*]], i32 %[[ARG1:.*]], ptr %[[ARG2:.*]]){{.*}} !dbg ![[FN:.*]] {
+; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: #dbg_declare(ptr %[[ARG0]], ![[V1:[0-9]+]], {{.*}})
+; CHECK-NEXT: #dbg_value(i32 %[[ARG1]], ![[V2:[0-9]+]], {{.*}})
+; CHECK-NEXT: br
+; CHECK: if.end:
+; CHECK:     #dbg_declare(ptr %[[ARG2]], ![[V3:[0-9]+]], {{.*}})
+; CHECK: }
+
+; CHECK: ![[V1]] = !DILocalVariable(name: "a", scope: ![[FN]]{{.*}})
+; CHECK: ![[V2]] = !DILocalVariable(name: "b", scope: ![[FN]]{{.*}})
+; CHECK: ![[V3]] = !DILocalVariable(name: "c", scope: ![[FN]]{{.*}})
diff --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
index e8c1b464ab0c6..3f69f0c200dad 100644
--- a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
+++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
@@ -8,10 +8,6 @@ target triple = "x86_64-apple-macosx10.14.0"
 
 ; CHECK-LABEL: define {{.*}}@foo.cold.1
 
-; - The llvm.dbg.value intrinsic pointing to an argument in @foo (%arg1) is
-;   dropped
-; CHECK-NOT: #dbg_value
-
 ; - Instructions without locations in the original function have no
 ;   location in the new function
 ; CHECK:      [[ADD1:%.*]] = add i32 %{{.*}}, 1{{$}}
diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
index cfe07a2f6c461..c54e93b59e04b 100644
--- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
@@ -731,4 +732,88 @@ TEST(CodeExtractor, OpenMPAggregateArgs) {
   EXPECT_FALSE(verifyFunction(*Outlined));
   EXPECT_FALSE(verifyFunction(*Func));
 }
+
+TEST(CodeExtractor, ArgsDebugInfo) {
+  LLVMContext Ctx;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M(parseAssemblyString(R"ir(
+
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+
+  define void @foo(i32 %a, i32 %b) !dbg !2 {
+    %1 = alloca i32, i64 1, align 4, !dbg !1
+    store i32 %a, ptr %1, align 4, !dbg !1
+    #dbg_declare(ptr %1, !8, !DIExpression(), !1)
+    #dbg_value(i32 %b, !9, !DIExpression(), !1)
+    br label %entry
+
+  entry:
+    br label %extract
+
+  extract:
+    store i32 10, ptr %1, align 4, !dbg !1
+    %2 = add i32 %b, 1, !dbg !1
+    br label %exit
+
+  exit:
+    ret void
+  }
+  !llvm.dbg.cu = !{!6}
+  !llvm.module.flags = !{!0}
+  !0 = !{i32 2, !"Debug Info Version", i32 3}
+  !1 = !DILocation(line: 11, column: 7, scope: !2)
+  !2 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+  !3 = !DIFile(filename: "test.f90", directory: "")
+  !4 = !DISubroutineType(cc: DW_CC_program, types: !5)
+  !5 = !{null}
+  !6 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3)
+  !7 = !DIBasicType(name: "integer", size: 32, encoding: DW_ATE_signed)
+  !8 = !DILocalVariable(name: "a", scope: !2, file: !3, type: !7)
+  !9 = !DILocalVariable(name: "b", scope: !2, file: !3, type: !7)
+
+  )ir",
+                                                Err, Ctx));
+
+  Function *Func = M->getFunction("foo");
+  SmallVector<BasicBlock *, 1> Blocks{getBlockByName(Func, "extract")};
+
+  auto TestExtracted = [&](bool AggregateArgs) {
+    CodeExtractor CE(Blocks, /* DominatorTree */ nullptr, AggregateArgs);
+    EXPECT_TRUE(CE.isEligible());
+    CodeExtractorAnalysisCache CEAC(*Func);
+    SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+    BasicBlock *CommonExit = nullptr;
+    CE.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+    CE.findInputsOutputs(Inputs, Outputs, SinkingCands);
+    Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs);
+    Outlined->dump();
+    EXPECT_TRUE(Outlined);
+    BasicBlock &EB = Outlined->getEntryBlock();
+    Instruction *Term = EB.getTerminator();
+    EXPECT_TRUE(Term);
+    EXPECT_TRUE(Term->hasDbgRecords());
+    for (DbgVariableRecord &DVR : filterDbgVars(Term->getDbgRecordRange())) {
+      DILocalVariable *Var = DVR.getVariable();
+      EXPECT_TRUE(Var);
+      if (DVR.isDbgDeclare())
+        EXPECT_TRUE(Var->getName() == "a");
+      else
+        EXPECT_TRUE(Var->getName() == "b");
+      for (Value *Loc : DVR.location_ops()) {
+        if (Instruction *I = dyn_cast<Instruction>(Loc))
+          EXPECT_TRUE(I->getParent() == &EB);
+        else if (Argument *A = dyn_cast<Argument>(Loc))
+          EXPECT_TRUE(A->getParent() == Outlined);
+      }
+    }
+    EXPECT_FALSE(verifyFunction(*Outlined));
+  };
+  // Test with both true and false for AggregateArgs.
+  TestExtracted(true);
+  TestExtracted(false);
+  EXPECT_FALSE(verifyFunction(*Func));
+
+}
+
 } // end anonymous namespace

``````````

</details>


https://github.com/llvm/llvm-project/pull/136016


More information about the llvm-commits mailing list