[llvm] [CodeExtractor] Improve debug info for input values. (PR #136016)

Abid Qadeer via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 17 02:02:45 PDT 2025


https://github.com/abidh updated https://github.com/llvm/llvm-project/pull/136016

>From ba9271606045b2457368e572146914dfaa2945a8 Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Thu, 10 Apr 2025 16:56:54 +0100
Subject: [PATCH 1/2] [CodeExtractor] Improve debug info for input values.

If we use CodeExtractor to extract the block1 into a new function, it
will look like the extracted function shown below (with some irrelevent
details removed).

define void @foo() !dbg !2 {
entry:
  %1 = alloca i32, i64 1, align 4
  %2 = alloca i32, i64 1, align 4
  #dbg_declare(ptr %1, !8, !DIExpression(), !1)
  br label %block1

block1:
  store i32 1, ptr %1, align 4
  store i32 2, ptr %2, align 4
  #dbg_declare(ptr %2, !10, !DIExpression(), !1)
  ret void
}

define internal void @extracted(ptr %arg0, ptr %arg1) {
newFuncRoot:
  br label %block1

block1:
  store i32 1, ptr %arg0, align 4
  store i32 2, ptr %arg1, align 4
  ret void
}

You will notice that it has replaced the usage of values that
were in the parent function (%1 and %2) with the arguments to the new
function. But it did not do the same thing with #dbg_declare
which was simply dropped because its location pointed to a value outside
of the new function. Similarly arg0 is without any debug record,
although the value that it replaced had one and we could materealize
one for it based on that.

This is not just a theoratical limitations. CodeExtractor is used to
create functions that implmenent many of the OpenMP constructs in
OMPIRBuilder. As a result of these limitations, the debug information is
missing from the created functions.

This PR tries to address this problem. It iterates over the input to the
extraced function and looks at their debug uses. If they were present in
the new function, it updates their location. Otherwise it materealize a
similar usage in the new function.

Most of these changes are localised in fixupDebugInfoPostExtraction.
Only other change is to propogate function inputs and the replacement
values to the it.
---
 .../llvm/Transforms/Utils/CodeExtractor.h     |  3 +-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 75 ++++++++++++----
 .../CodeExtractor/LoopExtractor_alloca.ll     |  1 +
 .../CodeExtractor/input-value-debug.ll        | 53 ++++++++++++
 .../HotColdSplit/transfer-debug-info.ll       |  4 -
 .../Transforms/Utils/CodeExtractorTest.cpp    | 85 +++++++++++++++++++
 6 files changed, 201 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeExtractor/input-value-debug.ll

diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 60c5def3472b6..d23229750ca68 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -290,7 +290,8 @@ class CodeExtractorAnalysisCache {
     void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs,
                           const ValueSet &StructValues, Function *newFunction,
                           StructType *StructArgTy, BasicBlock *header,
-                          const ValueSet &SinkingCands);
+                          const ValueSet &SinkingCands,
+                          SmallVector<Value *> &NewValues);
 
     /// Generates a Basic Block that calls the extracted function.
     CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs,
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 2b055020022be..13083b62a4701 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1242,11 +1242,18 @@ static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
   }
 }
 
-/// Fix up the debug info in the old and new functions by pointing line
-/// locations and debug intrinsics to the new subprogram scope, and by deleting
-/// intrinsics which point to values outside of the new function.
-static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
-                                         CallInst &TheCall) {
+/// Fix up the debug info in the old and new functions. Following changes are
+/// done.
+/// 1. If a debug record points to a value that has been replaced, update the
+/// record to use the new value.
+/// 2. If an Input value that has been replaced was used as a location of a
+/// debug record in the Parent function, then materealize a similar record in
+/// the new function.
+/// 3. Point line locations and debug intrinsics to the new subprogram scope
+/// 4. Remove intrinsics which point to values outside of the new function.
+static void fixupDebugInfoPostExtraction(
+    Function &OldFunc, Function &NewFunc, CallInst &TheCall,
+    const SetVector<Value *> &Inputs, const SmallVector<Value *> &NewValues) {
   DISubprogram *OldSP = OldFunc.getSubprogram();
   LLVMContext &Ctx = OldFunc.getContext();
 
@@ -1273,14 +1280,50 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
       /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
   NewFunc.setSubprogram(NewSP);
 
+  auto UpdateOrInsertDebugRecord = [&](auto *DR, Value *OldLoc, Value *NewLoc,
+                                       DIExpression *Expr, bool Declare) {
+    if (DR->getParent()->getParent() == &NewFunc)
+      DR->replaceVariableLocationOp(OldLoc, NewLoc);
+    else {
+      if (Declare)
+        DIB.insertDeclare(NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(),
+                          &NewFunc.getEntryBlock());
+      else
+        DIB.insertDbgValueIntrinsic(
+            NewLoc, DR->getVariable(), Expr, DR->getDebugLoc(),
+            NewFunc.getEntryBlock().getTerminator()->getIterator());
+    }
+  };
+  for (auto [Input, NewVal] : zip_equal(Inputs, NewValues)) {
+    SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+    SmallVector<DbgVariableRecord *, 1> DPUsers;
+    findDbgUsers(DbgUsers, Input, &DPUsers);
+    DIExpression *Expr = DIB.createExpression();
+
+    // Iterate the debud users of the Input values. If they are in the extracted
+    // function then update their location with the new value. If they are in
+    // the parent function then create a similar debug record.
+    for (auto *DVI : DbgUsers) {
+      UpdateOrInsertDebugRecord(DVI, Input, NewVal, Expr,
+                                isa<DbgDeclareInst>(DVI));
+    }
+    for (auto *DVR : DPUsers)
+      UpdateOrInsertDebugRecord(DVR, Input, NewVal, Expr,
+                                DVR->isDbgDeclare());
+  }
+
   auto IsInvalidLocation = [&NewFunc](Value *Location) {
-    // Location is invalid if it isn't a constant or an instruction, or is an
-    // instruction but isn't in the new function.
-    if (!Location ||
-        (!isa<Constant>(Location) && !isa<Instruction>(Location)))
+    // Location is invalid if it isn't a constant, an instruction or an
+    // argument, or is an instruction/argument but isn't in the new function.
+    if (!Location || (!isa<Constant>(Location) && !isa<Argument>(Location) &&
+                      !isa<Instruction>(Location)))
       return true;
-    Instruction *LocationInst = dyn_cast<Instruction>(Location);
-    return LocationInst && LocationInst->getFunction() != &NewFunc;
+
+    if (Argument *Arg = dyn_cast<Argument>(Location))
+      return Arg->getParent() != &NewFunc;
+    else if (Instruction *LocationInst = dyn_cast<Instruction>(Location))
+      return LocationInst->getFunction() != &NewFunc;
+    return false;
   };
 
   // Debug intrinsics in the new function need to be updated in one of two
@@ -1509,9 +1552,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
       inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse,
       StructValues, StructTy);
   newFunction->IsNewDbgInfoFormat = oldFunction->IsNewDbgInfoFormat;
+  SmallVector<Value *> NewValues;
 
   emitFunctionBody(inputs, outputs, StructValues, newFunction, StructTy, header,
-                   SinkingCands);
+                   SinkingCands, NewValues);
 
   std::vector<Value *> Reloads;
   CallInst *TheCall = emitReplacerCall(
@@ -1521,7 +1565,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
   insertReplacerCall(oldFunction, header, TheCall->getParent(), outputs,
                      Reloads, ExitWeights);
 
-  fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
+  fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
+                               NewValues);
 
   LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
     newFunction->dump();
@@ -1586,7 +1631,8 @@ Type *CodeExtractor::getSwitchType() {
 void CodeExtractor::emitFunctionBody(
     const ValueSet &inputs, const ValueSet &outputs,
     const ValueSet &StructValues, Function *newFunction,
-    StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands) {
+    StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands,
+    SmallVector<Value *> &NewValues) {
   Function *oldFunction = header->getParent();
   LLVMContext &Context = oldFunction->getContext();
 
@@ -1618,7 +1664,6 @@ void CodeExtractor::emitFunctionBody(
 
   // Rewrite all users of the inputs in the extracted region to use the
   // arguments (or appropriate addressing into struct) instead.
-  SmallVector<Value *> NewValues;
   for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
     Value *RewriteVal;
     if (StructValues.contains(inputs[i])) {
diff --git a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
index ff1ed786cefc4..b932a7dc0bf9f 100644
--- a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
+++ b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll
@@ -19,6 +19,7 @@
 
 ; CHECK-LABEL: define internal void @test.loop1(ptr %v1)
 ; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: #dbg_value
 ; CHECK-NEXT:   br
 
 define void @test() {
diff --git a/llvm/test/Transforms/CodeExtractor/input-value-debug.ll b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
new file mode 100644
index 0000000000000..d9723d61b4d63
--- /dev/null
+++ b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
@@ -0,0 +1,53 @@
+; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) !dbg !2 {
+entry:
+   %1 = alloca i32, i64 1, align 4
+   %2 = alloca i32, i64 1, align 4
+   store i32 %a, ptr %1, align 4
+   #dbg_declare(ptr %1, !8, !DIExpression(), !1)
+   #dbg_value(i32 %b, !9, !DIExpression(), !1)
+  br i1 undef, label %if.then, label %if.end
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+   store i32 10, ptr %1, align 4
+   %3 = add i32 %b, 1
+   store i32 1, ptr %2, align 4
+   call void @sink(i32 %3)
+   #dbg_declare(ptr %2, !10, !DIExpression(), !1)
+   ret void
+}
+
+declare void @sink(i32) cold
+
+!llvm.dbg.cu = !{!6}
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocation(line: 11, column: 7, scope: !2)
+!2 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+!3 = !DIFile(filename: "test.c", directory: "")
+!4 = !DISubroutineType(cc: DW_CC_program, types: !5)
+!5 = !{}
+!6 = distinct !DICompileUnit(language: DW_LANG_C, file: !3)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !DILocalVariable(name: "a", scope: !2, file: !3, type: !7)
+!9 = !DILocalVariable(name: "b", scope: !2, file: !3, type: !7)
+!10 = !DILocalVariable(name: "c", scope: !2, file: !3, type: !7)
+
+; CHECK: define {{.*}}@foo.cold.1(ptr %[[ARG0:.*]], i32 %[[ARG1:.*]], ptr %[[ARG2:.*]]){{.*}} !dbg ![[FN:.*]] {
+; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT: #dbg_declare(ptr %[[ARG0]], ![[V1:[0-9]+]], {{.*}})
+; CHECK-NEXT: #dbg_value(i32 %[[ARG1]], ![[V2:[0-9]+]], {{.*}})
+; CHECK-NEXT: br
+; CHECK: if.end:
+; CHECK:     #dbg_declare(ptr %[[ARG2]], ![[V3:[0-9]+]], {{.*}})
+; CHECK: }
+
+; CHECK: ![[V1]] = !DILocalVariable(name: "a", scope: ![[FN]]{{.*}})
+; CHECK: ![[V2]] = !DILocalVariable(name: "b", scope: ![[FN]]{{.*}})
+; CHECK: ![[V3]] = !DILocalVariable(name: "c", scope: ![[FN]]{{.*}})
diff --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
index e8c1b464ab0c6..3f69f0c200dad 100644
--- a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
+++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll
@@ -8,10 +8,6 @@ target triple = "x86_64-apple-macosx10.14.0"
 
 ; CHECK-LABEL: define {{.*}}@foo.cold.1
 
-; - The llvm.dbg.value intrinsic pointing to an argument in @foo (%arg1) is
-;   dropped
-; CHECK-NOT: #dbg_value
-
 ; - Instructions without locations in the original function have no
 ;   location in the new function
 ; CHECK:      [[ADD1:%.*]] = add i32 %{{.*}}, 1{{$}}
diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
index cfe07a2f6c461..c54e93b59e04b 100644
--- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
@@ -731,4 +732,88 @@ TEST(CodeExtractor, OpenMPAggregateArgs) {
   EXPECT_FALSE(verifyFunction(*Outlined));
   EXPECT_FALSE(verifyFunction(*Func));
 }
+
+TEST(CodeExtractor, ArgsDebugInfo) {
+  LLVMContext Ctx;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M(parseAssemblyString(R"ir(
+
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+
+  define void @foo(i32 %a, i32 %b) !dbg !2 {
+    %1 = alloca i32, i64 1, align 4, !dbg !1
+    store i32 %a, ptr %1, align 4, !dbg !1
+    #dbg_declare(ptr %1, !8, !DIExpression(), !1)
+    #dbg_value(i32 %b, !9, !DIExpression(), !1)
+    br label %entry
+
+  entry:
+    br label %extract
+
+  extract:
+    store i32 10, ptr %1, align 4, !dbg !1
+    %2 = add i32 %b, 1, !dbg !1
+    br label %exit
+
+  exit:
+    ret void
+  }
+  !llvm.dbg.cu = !{!6}
+  !llvm.module.flags = !{!0}
+  !0 = !{i32 2, !"Debug Info Version", i32 3}
+  !1 = !DILocation(line: 11, column: 7, scope: !2)
+  !2 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+  !3 = !DIFile(filename: "test.f90", directory: "")
+  !4 = !DISubroutineType(cc: DW_CC_program, types: !5)
+  !5 = !{null}
+  !6 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3)
+  !7 = !DIBasicType(name: "integer", size: 32, encoding: DW_ATE_signed)
+  !8 = !DILocalVariable(name: "a", scope: !2, file: !3, type: !7)
+  !9 = !DILocalVariable(name: "b", scope: !2, file: !3, type: !7)
+
+  )ir",
+                                                Err, Ctx));
+
+  Function *Func = M->getFunction("foo");
+  SmallVector<BasicBlock *, 1> Blocks{getBlockByName(Func, "extract")};
+
+  auto TestExtracted = [&](bool AggregateArgs) {
+    CodeExtractor CE(Blocks, /* DominatorTree */ nullptr, AggregateArgs);
+    EXPECT_TRUE(CE.isEligible());
+    CodeExtractorAnalysisCache CEAC(*Func);
+    SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+    BasicBlock *CommonExit = nullptr;
+    CE.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+    CE.findInputsOutputs(Inputs, Outputs, SinkingCands);
+    Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs);
+    Outlined->dump();
+    EXPECT_TRUE(Outlined);
+    BasicBlock &EB = Outlined->getEntryBlock();
+    Instruction *Term = EB.getTerminator();
+    EXPECT_TRUE(Term);
+    EXPECT_TRUE(Term->hasDbgRecords());
+    for (DbgVariableRecord &DVR : filterDbgVars(Term->getDbgRecordRange())) {
+      DILocalVariable *Var = DVR.getVariable();
+      EXPECT_TRUE(Var);
+      if (DVR.isDbgDeclare())
+        EXPECT_TRUE(Var->getName() == "a");
+      else
+        EXPECT_TRUE(Var->getName() == "b");
+      for (Value *Loc : DVR.location_ops()) {
+        if (Instruction *I = dyn_cast<Instruction>(Loc))
+          EXPECT_TRUE(I->getParent() == &EB);
+        else if (Argument *A = dyn_cast<Argument>(Loc))
+          EXPECT_TRUE(A->getParent() == Outlined);
+      }
+    }
+    EXPECT_FALSE(verifyFunction(*Outlined));
+  };
+  // Test with both true and false for AggregateArgs.
+  TestExtracted(true);
+  TestExtracted(false);
+  EXPECT_FALSE(verifyFunction(*Func));
+
+}
+
 } // end anonymous namespace

>From f4ed681717efe732e8a5c6a0fbc1782ae5ff24b5 Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Thu, 17 Apr 2025 10:01:51 +0100
Subject: [PATCH 2/2] Removed use of undef from the test.

---
 llvm/test/Transforms/CodeExtractor/input-value-debug.ll | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/CodeExtractor/input-value-debug.ll b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
index d9723d61b4d63..87826bb0e3d07 100644
--- a/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
+++ b/llvm/test/Transforms/CodeExtractor/input-value-debug.ll
@@ -10,7 +10,8 @@ entry:
    store i32 %a, ptr %1, align 4
    #dbg_declare(ptr %1, !8, !DIExpression(), !1)
    #dbg_value(i32 %b, !9, !DIExpression(), !1)
-  br i1 undef, label %if.then, label %if.end
+   %tobool = icmp eq i32 %a, 0
+   br i1 %tobool, label %if.then, label %if.end
 if.then:                                          ; preds = %entry
   ret void
 



More information about the llvm-commits mailing list